Skip to content

Commit 142f903

Browse files
authored
Merge pull request #459 from Blosc/fancyIndex
Fancy index
2 parents 0ba130d + 47bcb89 commit 142f903

File tree

4 files changed

+223
-75
lines changed

4 files changed

+223
-75
lines changed

bench/ndarray/fancy_index.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626

2727
NUMPY = True
2828
BLOSC = True
29-
ZARR = False
30-
HDF5 = False
29+
ZARR = True
30+
HDF5 = True
3131
SPARSE = False
3232

3333
NDIMS = 2 # must be at least 2
@@ -37,9 +37,9 @@ def genarray(r, ndims=2, verbose=True):
3737
shape = (d,) * ndims
3838
chunks = (d // 4,) * ndims
3939
blocks = (max(d // 10, 1),) * ndims
40+
urlpath = f'linspace{r}{ndims}D.b2nd'
4041
t = time.time()
41-
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64,
42-
urlpath=f'linspace{r}{ndims}D.b2nd', mode='w')
42+
arr = blosc2.linspace(0, 1000, num=np.prod(shape), shape=shape, dtype=np.float64, urlpath=urlpath, mode='w')
4343
t = time.time() - t
4444
arrsize = np.prod(arr.shape) * arr.dtype.itemsize / 2 ** 30
4545
if verbose:
@@ -134,18 +134,19 @@ def timer(arr):
134134
err = (mean - times.min(axis=1), times.max(axis=1)-mean)
135135
plt.bar(x + w, mean , width, color=c, label=label, yerr=err, capsize=5, ecolor='k',
136136
error_kw=dict(lw=2, capthick=2, ecolor='k'))
137-
labs+=label
137+
labs += label
138138

139-
filename = f"results{labs}{NDIMS}D" + "sparse" if SPARSE else f"results{labs}{NDIMS}D"
139+
filename = f"{labs}{NDIMS}D" + "sparse" if SPARSE else f"{labs}{NDIMS}D"
140+
filename += blosc2.__version__.replace('.','_')
140141

141142
with open(f"{filename}.pkl", 'wb') as f:
142-
pickle.dump(result_tuple, f)
143+
pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f)
143144

144145
plt.xlabel('Array size (GB)')
145146
plt.legend()
146147
plt.xticks(x-width, np.round(genuine_sizes, 2))
147148
plt.ylabel("Time (s)")
148-
plt.title(f"Fancy indexing performance comparison, {NDIMS}D" +f"{" sparse" if SPARSE else ""}")
149+
plt.title(f"Fancy indexing {blosc2.__version__}, {NDIMS}D" +f"{" sparse" if SPARSE else ""}")
149150
plt.gca().set_yscale('log')
150151
plt.savefig(f'plots/fancyIdx{filename}.png', format="png")
151152
plt.show()

bench/ndarray/fancy_index1D.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727

2828
NUMPY = True
2929
BLOSC = True
30-
ZARR = True
31-
HDF5 = True
32-
SPARSE = True
30+
ZARR = False
31+
HDF5 = False
32+
SPARSE = False
3333

3434
if HDF5:
3535
SPARSE = True # HDF5 takes too long for non-sparse indexing
@@ -50,7 +50,7 @@ def genarray(r, verbose=True):
5050
return arr, arrsize
5151

5252

53-
target_sizes = np.float64(np.array([.2, .5, 1, 2, 5, 10]))
53+
target_sizes = np.float64(np.array([.1, .2, .5, 1, 2, 3]))
5454
rng = np.random.default_rng()
5555
blosctimes = []
5656
nptimes = []
@@ -61,14 +61,14 @@ def genarray(r, verbose=True):
6161
arr, arrsize = genarray(d)
6262
genuine_sizes += [arrsize]
6363
idx = rng.integers(low=0, high=arr.shape[0], size=(1000,)) if SPARSE else rng.integers(low=0, high=arr.shape[0], size=(arr.shape[0]//4,))
64-
sorted_idx = np.unique(np.sort(idx))
64+
sorted_idx = np.sort(np.unique(idx))
6565
## Test fancy indexing for different use cases
6666
def timer(arr):
6767
time_list = []
6868
if not (HDF5 or ZARR):
69-
b = arr[[[sorted_idx], [idx]]]
70-
time_list += [time.time() - t]
71-
t = time.time()
69+
t = time.time()
70+
b = arr[[[idx[::-1]], [idx]]]
71+
time_list += [time.time() - t]
7272
t = time.time()
7373
b = arr[sorted_idx] if HDF5 else arr[idx]
7474
time_list += [time.time() - t]
@@ -114,15 +114,16 @@ def timer(arr):
114114
error_kw=dict(lw=2, capthick=2, ecolor='k'))
115115
labs+=label
116116

117-
filename = f"results{labs}1Dsparse" if SPARSE else f"results{labs}1D"
117+
filename = f"{labs}1Dsparse" if SPARSE else f"{labs}1D"
118+
filename+=blosc2.__version__.replace('.','_')
118119
with open(filename+".pkl", 'wb') as f:
119120
pickle.dump({'times':result_tuple, 'sizes':genuine_sizes}, f)
120121

121122
plt.xlabel('Array size (GB)')
122123
plt.legend()
123124
plt.xticks(x-width, np.round(genuine_sizes, 2))
124125
plt.ylabel("Time (s)")
125-
plt.title(f"Fancy indexing performance comparison, 1D {' sparse' if SPARSE else ''}")
126+
plt.title(f"Fancy indexing {blosc2.__version__}, 1D {' sparse' if SPARSE else ''}")
126127
plt.gca().set_yscale('log')
127128
plt.savefig(f'plots/{filename}.png', format="png")
128129
plt.show()

src/blosc2/ndarray.py

Lines changed: 184 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import ndindex
2929
import numpy as np
30+
from ndindex.subindex_helpers import ceiling
3031

3132
import blosc2
3233
from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks
@@ -1476,66 +1477,124 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
14761477
out: np.ndarray
14771478
14781479
"""
1479-
# TODO: Make this faster for broadcasted keys
1480-
## Can`t do this because ndindex doesn't support all the same indexing cases as Numpy
1480+
# TODO: Make this faster and avoid running out of memory - avoid broadcasting keys
1481+
1482+
## Can't do this because ndindex doesn't support all the same indexing cases as Numpy
14811483
# if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
14821484
# return self[:][key] # load into memory for smallish arrays
14831485
shape = self.shape
14841486
chunks = self.chunks
1487+
1488+
# TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast
14851489
_slice = ndindex.ndindex(key).expand(shape)
14861490
out_shape = _slice.newshape(shape)
1487-
out = np.empty(out_shape, dtype=self.dtype)
1488-
1489-
if self.ndim == 1:
1490-
# Fast path so we can avoid the costly lines in slow path
1491-
# (sub_idx = _slice.as_subindex(c).raw, sel_idx = c.as_subindex(_slice))
1492-
key = np.array(key)
1493-
if np.any(np.diff(key) < 0):
1494-
idx_order = np.argsort(key)
1495-
sorted_idxs = key[idx_order]
1491+
_slice = _slice.raw
1492+
# now all indices are slices or arrays of integers (or booleans)
1493+
# moreover, all arrays are consecutive (otherwise an error is raised)
1494+
1495+
if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all(
1496+
[s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)]
1497+
):
1498+
chunks = np.array(chunks)
1499+
# |------|
1500+
# ------| arrs |------
1501+
arridxs = [i for i, s in enumerate(_slice) if isinstance(s, np.ndarray)]
1502+
begin, end = arridxs[0], arridxs[-1] + 1
1503+
flat_shape = tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[:begin])
1504+
idx_dim = np.prod(_slice[begin].shape)
1505+
1506+
# TODO: find a nicer way to do the copy maybe
1507+
arr = np.empty((idx_dim, end - begin), dtype=_slice[begin].dtype)
1508+
for i, s in enumerate(_slice[begin:end]):
1509+
arr[:, i] = s.reshape(-1) # have to do a copy
1510+
1511+
flat_shape += (idx_dim,)
1512+
flat_shape += tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[end:])
1513+
# out_shape could have new dims if indexing arrays are not all 1D
1514+
# (we have just flattened them so need to handle accordingly)
1515+
prior_tuple = _slice[:begin]
1516+
post_tuple = _slice[end:]
1517+
divider = chunks[begin:end]
1518+
chunked_arr = arr // divider
1519+
if arr.shape[-1] == 1: # 1D chunks, can avoid loading whole chunks
1520+
idx_order = np.argsort(arr.squeeze(axis=1), axis=-1) # sort by real index
1521+
chunk_nitems = np.bincount(chunked_arr.reshape(-1), minlength=self.schunk.nchunks)
1522+
unique_chunks = np.nonzero(chunk_nitems)[0][:, None] # add dummy axis
1523+
chunk_nitems = chunk_nitems[unique_chunks]
14961524
else:
1497-
idx_order = None
1498-
sorted_idxs = key
1499-
chunk_nitems = np.bincount(
1500-
sorted_idxs // chunks[0], minlength=self.schunk.nchunks
1501-
) # number of items per chunk
1525+
chunked_arr = np.ascontiguousarray(
1526+
chunked_arr
1527+
) # ensure C-order memory to allow structured dtype view
1528+
# TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?)
1529+
_, row_ids, idx_inv, chunk_nitems = np.unique(
1530+
chunked_arr.view([("", chunked_arr.dtype)] * chunked_arr.shape[1]),
1531+
return_counts=True,
1532+
return_index=True,
1533+
return_inverse=True,
1534+
)
1535+
unique_chunks = chunked_arr[row_ids]
1536+
idx_order = np.argsort(
1537+
idx_inv.squeeze(-1)
1538+
) # sort by chunks (can't sort by index since larger index could belong to lower chunk)
1539+
# e.g. chunks of (100, 10) means (50, 15) has chunk idx (0,1) but (60,5) has (0, 0)
1540+
sorted_idxs = arr[idx_order]
1541+
out = np.empty(flat_shape, dtype=self.dtype)
1542+
shape = np.array(shape)
1543+
15021544
chunk_nitems_cumsum = np.cumsum(chunk_nitems)
1503-
for chunk_i, c in enumerate(chunk_nitems):
1504-
if c == 0:
1505-
continue # no items in chunk
1545+
cprior_slices = [
1546+
slice_to_chunktuple(s, c) for s, c in zip(prior_tuple, chunks[:begin], strict=True)
1547+
]
1548+
cpost_slices = [slice_to_chunktuple(s, c) for s, c in zip(post_tuple, chunks[end:], strict=True)]
1549+
# TODO: rewrite to allow interleaved slices/array indexes
1550+
for chunk_i, chunk_idx in enumerate(unique_chunks):
15061551
start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
15071552
stop = chunk_nitems_cumsum[chunk_i]
15081553
selection = sorted_idxs[start:stop]
1509-
out_selection = idx_order[start:stop] if idx_order is not None else slice(start, stop)
1510-
to_be_loaded = np.empty((selection[-1] - selection[0] + 1,), dtype=self.dtype)
1511-
# if len(selection) < 10:
1512-
# # TODO: optimise for case of few elements and go index-by-index
1513-
# selector = out_selection.start + np.arange(out_selection.stop) if isinstance(out_selection, slice) else out_selection
1514-
# for j, idx in enumerate(sorted_idxs[start:stop]):
1515-
# out[out_selection[j]] = self[idx]
1516-
# else:
1517-
super().get_slice_numpy(
1518-
to_be_loaded, ((selection[0],), (selection[-1] + 1,))
1519-
) # get relevant section of chunk
1520-
loc_idx = selection - selection[0]
1521-
out[out_selection] = to_be_loaded[loc_idx]
1522-
1523-
else:
1524-
chunk_size = ndindex.ChunkSize(chunks)
1525-
# repeated indices are grouped together
1526-
intersecting_chunks = chunk_size.as_subchunks(
1527-
_slice, shape
1528-
) # if _slice is (), returns all chunks
1529-
for c in intersecting_chunks:
1530-
sub_idx = _slice.as_subindex(c).raw
1531-
sel_idx = c.as_subindex(_slice)
1532-
new_shape = sel_idx.newshape(out_shape)
1533-
start, stop, step = get_ndarray_start_stop(self.ndim, c.raw, shape)
1534-
chunk = np.empty(
1535-
tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype
1536-
)
1537-
super().get_slice_numpy(chunk, (start, stop))
1538-
out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape)
1554+
out_mid_selection = (idx_order[start:stop],)
1555+
if (
1556+
arr.shape[-1] == 1
1557+
): # can avoid loading in whole chunk if 1D for array indexed chunks, a bit faster
1558+
chunk_begin = selection[0]
1559+
chunk_end = selection[-1] + 1
1560+
else:
1561+
chunk_begin = chunk_idx * chunks[begin:end]
1562+
chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end])
1563+
loc_mid_selection = tuple(a for a in (selection - chunk_begin).T)
1564+
1565+
# loop over chunks coming from slices before and after array indices
1566+
for cprior_tuple in product(*cprior_slices):
1567+
out_prior_selection, prior_selection, loc_prior_selection = _get_selection(
1568+
cprior_tuple, prior_tuple, chunks[:begin]
1569+
)
1570+
for cpost_tuple in product(*cpost_slices):
1571+
out_post_selection, post_selection, loc_post_selection = _get_selection(
1572+
cpost_tuple, post_tuple, chunks[end:] if end is not None else []
1573+
)
1574+
locbegin, locend = _get_local_slice(
1575+
prior_selection, post_selection, (chunk_begin, chunk_end)
1576+
)
1577+
to_be_loaded = np.empty(locend - locbegin, dtype=self.dtype)
1578+
# basically load whole chunk, except for slice part at beginning and end
1579+
super().get_slice_numpy(to_be_loaded, (locbegin, locend))
1580+
loc_idx = loc_prior_selection + loc_mid_selection + loc_post_selection
1581+
out_idx = out_prior_selection + out_mid_selection + out_post_selection
1582+
out[out_idx] = to_be_loaded[loc_idx]
1583+
return out.reshape(out_shape) # should have filled in correct order, just need to reshape
1584+
1585+
# Default when there are booleans
1586+
out = np.empty(out_shape, dtype=self.dtype)
1587+
chunk_size = ndindex.ChunkSize(chunks)
1588+
# repeated indices are grouped together
1589+
intersecting_chunks = chunk_size.as_subchunks(_slice, shape) # if _slice is (), returns all chunks
1590+
for c in intersecting_chunks:
1591+
sub_idx = _slice.as_subindex(c).raw
1592+
sel_idx = c.as_subindex(_slice)
1593+
new_shape = sel_idx.newshape(out_shape)
1594+
start, stop, step = get_ndarray_start_stop(self.ndim, c.raw, shape)
1595+
chunk = np.empty(tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype)
1596+
super().get_slice_numpy(chunk, (start, stop))
1597+
out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape)
15391598

15401599
return out
15411600

@@ -4537,3 +4596,79 @@ def __setitem__(self, selection, input) -> np.ndarray:
45374596

45384597
# def __setitem__(self, selection, input) -> np.ndarray:
45394598
# return NotImplementedError
4599+
4600+
4601+
def slice_to_chunktuple(s, n):
4602+
"""
4603+
Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks.
4604+
Parameters
4605+
----------
4606+
s : slice
4607+
A slice object with start, stop, and step attributes.
4608+
n : int
4609+
The number of elements in the chunk axis
4610+
4611+
Returns
4612+
-------
4613+
out: tuple
4614+
"""
4615+
start, stop, step = s.start, s.stop, s.step
4616+
if step > n:
4617+
return tuple((start + k * step) // n for k in range(ceiling(stop - start, step)))
4618+
else:
4619+
return tuple(range(start // n, ceiling(stop, n)))
4620+
4621+
4622+
def _get_selection(ctuple, ptuple, chunks):
4623+
# we assume that at least one element of chunk intersects with the slice
4624+
# (as a consequence of only looping over intersecting chunks)
4625+
pselection = ()
4626+
for i, s, csize in zip(ctuple, ptuple, chunks, strict=True):
4627+
# we need to advance to first element within chunk that intersects with slice, not
4628+
# necessarily the first element of chunk
4629+
# i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step]
4630+
np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1)
4631+
# can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk
4632+
pselection += (
4633+
slice(
4634+
builtins.max(s.start, s.start + np1 * s.step), # start+(n+1)*step gives i*csize if k=step
4635+
builtins.min(csize * (i + 1), s.stop),
4636+
s.step,
4637+
),
4638+
)
4639+
4640+
# selection relative to coordinates of out (necessarily step = 1)
4641+
# stop = start + step * n + k => n = (stop - start - 1) // step
4642+
# hence, out_stop = out_start + n + 1
4643+
# ps.start = pt.start + out_start * step
4644+
out_pselection = tuple(
4645+
slice(
4646+
(ps.start - pt.start + pt.step - 1) // pt.step,
4647+
(ps.start - pt.start + pt.step - 1) // pt.step + (ps.stop - ps.start + ps.step - 1) // ps.step,
4648+
1,
4649+
)
4650+
for ps, pt in zip(pselection, ptuple, strict=True)
4651+
)
4652+
4653+
loc_selection = tuple(slice(0, s.stop - s.start, s.step) for s in pselection)
4654+
4655+
return out_pselection, pselection, loc_selection
4656+
4657+
4658+
def _get_local_slice(prior_selection, post_selection, chunk_bounds):
4659+
chunk_begin, chunk_end = chunk_bounds
4660+
locbegin = np.hstack(
4661+
(
4662+
[s.start for s in prior_selection],
4663+
chunk_begin,
4664+
[s.start for s in post_selection],
4665+
),
4666+
casting="unsafe",
4667+
dtype="int64",
4668+
)
4669+
locend = np.hstack(
4670+
([s.stop for s in prior_selection], chunk_end, [s.stop for s in post_selection]),
4671+
casting="unsafe",
4672+
dtype="int64",
4673+
)
4674+
return locbegin, locend

0 commit comments

Comments
 (0)