|
27 | 27 |
|
28 | 28 | import ndindex
|
29 | 29 | import numpy as np
|
| 30 | +from ndindex.subindex_helpers import ceiling |
30 | 31 |
|
31 | 32 | import blosc2
|
32 | 33 | from blosc2 import SpecialValue, blosc2_ext, compute_chunks_blocks
|
@@ -1476,66 +1477,124 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
|
1476 | 1477 | out: np.ndarray
|
1477 | 1478 |
|
1478 | 1479 | """
|
1479 |
| - # TODO: Make this faster for broadcasted keys |
1480 |
| - ## Can`t do this because ndindex doesn't support all the same indexing cases as Numpy |
| 1480 | + # TODO: Make this faster and avoid running out of memory - avoid broadcasting keys |
| 1481 | + |
| 1482 | + ## Can't do this because ndindex doesn't support all the same indexing cases as Numpy |
1481 | 1483 | # if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
|
1482 | 1484 | # return self[:][key] # load into memory for smallish arrays
|
1483 | 1485 | shape = self.shape
|
1484 | 1486 | chunks = self.chunks
|
| 1487 | + |
| 1488 | + # TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast |
1485 | 1489 | _slice = ndindex.ndindex(key).expand(shape)
|
1486 | 1490 | out_shape = _slice.newshape(shape)
|
1487 |
| - out = np.empty(out_shape, dtype=self.dtype) |
1488 |
| - |
1489 |
| - if self.ndim == 1: |
1490 |
| - # Fast path so we can avoid the costly lines in slow path |
1491 |
| - # (sub_idx = _slice.as_subindex(c).raw, sel_idx = c.as_subindex(_slice)) |
1492 |
| - key = np.array(key) |
1493 |
| - if np.any(np.diff(key) < 0): |
1494 |
| - idx_order = np.argsort(key) |
1495 |
| - sorted_idxs = key[idx_order] |
| 1491 | + _slice = _slice.raw |
| 1492 | + # now all indices are slices or arrays of integers (or booleans) |
| 1493 | + # moreover, all arrays are consecutive (otherwise an error is raised) |
| 1494 | + |
| 1495 | + if np.all([isinstance(s, (slice, np.ndarray)) for s in _slice]) and np.all( |
| 1496 | + [s.dtype is not bool for s in _slice if isinstance(s, np.ndarray)] |
| 1497 | + ): |
| 1498 | + chunks = np.array(chunks) |
| 1499 | + # |------| |
| 1500 | + # ------| arrs |------ |
| 1501 | + arridxs = [i for i, s in enumerate(_slice) if isinstance(s, np.ndarray)] |
| 1502 | + begin, end = arridxs[0], arridxs[-1] + 1 |
| 1503 | + flat_shape = tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[:begin]) |
| 1504 | + idx_dim = np.prod(_slice[begin].shape) |
| 1505 | + |
| 1506 | + # TODO: find a nicer way to do the copy maybe |
| 1507 | + arr = np.empty((idx_dim, end - begin), dtype=_slice[begin].dtype) |
| 1508 | + for i, s in enumerate(_slice[begin:end]): |
| 1509 | + arr[:, i] = s.reshape(-1) # have to do a copy |
| 1510 | + |
| 1511 | + flat_shape += (idx_dim,) |
| 1512 | + flat_shape += tuple((i.stop - i.start + (i.step - 1)) // i.step for i in _slice[end:]) |
| 1513 | + # out_shape could have new dims if indexing arrays are not all 1D |
| 1514 | + # (we have just flattened them so need to handle accordingly) |
| 1515 | + prior_tuple = _slice[:begin] |
| 1516 | + post_tuple = _slice[end:] |
| 1517 | + divider = chunks[begin:end] |
| 1518 | + chunked_arr = arr // divider |
| 1519 | + if arr.shape[-1] == 1: # 1D chunks, can avoid loading whole chunks |
| 1520 | + idx_order = np.argsort(arr.squeeze(axis=1), axis=-1) # sort by real index |
| 1521 | + chunk_nitems = np.bincount(chunked_arr.reshape(-1), minlength=self.schunk.nchunks) |
| 1522 | + unique_chunks = np.nonzero(chunk_nitems)[0][:, None] # add dummy axis |
| 1523 | + chunk_nitems = chunk_nitems[unique_chunks] |
1496 | 1524 | else:
|
1497 |
| - idx_order = None |
1498 |
| - sorted_idxs = key |
1499 |
| - chunk_nitems = np.bincount( |
1500 |
| - sorted_idxs // chunks[0], minlength=self.schunk.nchunks |
1501 |
| - ) # number of items per chunk |
| 1525 | + chunked_arr = np.ascontiguousarray( |
| 1526 | + chunked_arr |
| 1527 | + ) # ensure C-order memory to allow structured dtype view |
| 1528 | + # TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?) |
| 1529 | + _, row_ids, idx_inv, chunk_nitems = np.unique( |
| 1530 | + chunked_arr.view([("", chunked_arr.dtype)] * chunked_arr.shape[1]), |
| 1531 | + return_counts=True, |
| 1532 | + return_index=True, |
| 1533 | + return_inverse=True, |
| 1534 | + ) |
| 1535 | + unique_chunks = chunked_arr[row_ids] |
| 1536 | + idx_order = np.argsort( |
| 1537 | + idx_inv.squeeze(-1) |
| 1538 | + ) # sort by chunks (can't sort by index since larger index could belong to lower chunk) |
| 1539 | + # e.g. chunks of (100, 10) means (50, 15) has chunk idx (0,1) but (60,5) has (0, 0) |
| 1540 | + sorted_idxs = arr[idx_order] |
| 1541 | + out = np.empty(flat_shape, dtype=self.dtype) |
| 1542 | + shape = np.array(shape) |
| 1543 | + |
1502 | 1544 | chunk_nitems_cumsum = np.cumsum(chunk_nitems)
|
1503 |
| - for chunk_i, c in enumerate(chunk_nitems): |
1504 |
| - if c == 0: |
1505 |
| - continue # no items in chunk |
| 1545 | + cprior_slices = [ |
| 1546 | + slice_to_chunktuple(s, c) for s, c in zip(prior_tuple, chunks[:begin], strict=True) |
| 1547 | + ] |
| 1548 | + cpost_slices = [slice_to_chunktuple(s, c) for s, c in zip(post_tuple, chunks[end:], strict=True)] |
| 1549 | + # TODO: rewrite to allow interleaved slices/array indexes |
| 1550 | + for chunk_i, chunk_idx in enumerate(unique_chunks): |
1506 | 1551 | start = 0 if chunk_i == 0 else chunk_nitems_cumsum[chunk_i - 1]
|
1507 | 1552 | stop = chunk_nitems_cumsum[chunk_i]
|
1508 | 1553 | selection = sorted_idxs[start:stop]
|
1509 |
| - out_selection = idx_order[start:stop] if idx_order is not None else slice(start, stop) |
1510 |
| - to_be_loaded = np.empty((selection[-1] - selection[0] + 1,), dtype=self.dtype) |
1511 |
| - # if len(selection) < 10: |
1512 |
| - # # TODO: optimise for case of few elements and go index-by-index |
1513 |
| - # selector = out_selection.start + np.arange(out_selection.stop) if isinstance(out_selection, slice) else out_selection |
1514 |
| - # for j, idx in enumerate(sorted_idxs[start:stop]): |
1515 |
| - # out[out_selection[j]] = self[idx] |
1516 |
| - # else: |
1517 |
| - super().get_slice_numpy( |
1518 |
| - to_be_loaded, ((selection[0],), (selection[-1] + 1,)) |
1519 |
| - ) # get relevant section of chunk |
1520 |
| - loc_idx = selection - selection[0] |
1521 |
| - out[out_selection] = to_be_loaded[loc_idx] |
1522 |
| - |
1523 |
| - else: |
1524 |
| - chunk_size = ndindex.ChunkSize(chunks) |
1525 |
| - # repeated indices are grouped together |
1526 |
| - intersecting_chunks = chunk_size.as_subchunks( |
1527 |
| - _slice, shape |
1528 |
| - ) # if _slice is (), returns all chunks |
1529 |
| - for c in intersecting_chunks: |
1530 |
| - sub_idx = _slice.as_subindex(c).raw |
1531 |
| - sel_idx = c.as_subindex(_slice) |
1532 |
| - new_shape = sel_idx.newshape(out_shape) |
1533 |
| - start, stop, step = get_ndarray_start_stop(self.ndim, c.raw, shape) |
1534 |
| - chunk = np.empty( |
1535 |
| - tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype |
1536 |
| - ) |
1537 |
| - super().get_slice_numpy(chunk, (start, stop)) |
1538 |
| - out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape) |
| 1554 | + out_mid_selection = (idx_order[start:stop],) |
| 1555 | + if ( |
| 1556 | + arr.shape[-1] == 1 |
| 1557 | + ): # can avoid loading in whole chunk if 1D for array indexed chunks, a bit faster |
| 1558 | + chunk_begin = selection[0] |
| 1559 | + chunk_end = selection[-1] + 1 |
| 1560 | + else: |
| 1561 | + chunk_begin = chunk_idx * chunks[begin:end] |
| 1562 | + chunk_end = np.minimum((chunk_idx + 1) * chunks[begin:end], shape[begin:end]) |
| 1563 | + loc_mid_selection = tuple(a for a in (selection - chunk_begin).T) |
| 1564 | + |
| 1565 | + # loop over chunks coming from slices before and after array indices |
| 1566 | + for cprior_tuple in product(*cprior_slices): |
| 1567 | + out_prior_selection, prior_selection, loc_prior_selection = _get_selection( |
| 1568 | + cprior_tuple, prior_tuple, chunks[:begin] |
| 1569 | + ) |
| 1570 | + for cpost_tuple in product(*cpost_slices): |
| 1571 | + out_post_selection, post_selection, loc_post_selection = _get_selection( |
| 1572 | + cpost_tuple, post_tuple, chunks[end:] if end is not None else [] |
| 1573 | + ) |
| 1574 | + locbegin, locend = _get_local_slice( |
| 1575 | + prior_selection, post_selection, (chunk_begin, chunk_end) |
| 1576 | + ) |
| 1577 | + to_be_loaded = np.empty(locend - locbegin, dtype=self.dtype) |
| 1578 | + # basically load whole chunk, except for slice part at beginning and end |
| 1579 | + super().get_slice_numpy(to_be_loaded, (locbegin, locend)) |
| 1580 | + loc_idx = loc_prior_selection + loc_mid_selection + loc_post_selection |
| 1581 | + out_idx = out_prior_selection + out_mid_selection + out_post_selection |
| 1582 | + out[out_idx] = to_be_loaded[loc_idx] |
| 1583 | + return out.reshape(out_shape) # should have filled in correct order, just need to reshape |
| 1584 | + |
| 1585 | + # Default when there are booleans |
| 1586 | + out = np.empty(out_shape, dtype=self.dtype) |
| 1587 | + chunk_size = ndindex.ChunkSize(chunks) |
| 1588 | + # repeated indices are grouped together |
| 1589 | + intersecting_chunks = chunk_size.as_subchunks(_slice, shape) # if _slice is (), returns all chunks |
| 1590 | + for c in intersecting_chunks: |
| 1591 | + sub_idx = _slice.as_subindex(c).raw |
| 1592 | + sel_idx = c.as_subindex(_slice) |
| 1593 | + new_shape = sel_idx.newshape(out_shape) |
| 1594 | + start, stop, step = get_ndarray_start_stop(self.ndim, c.raw, shape) |
| 1595 | + chunk = np.empty(tuple(sp - st for st, sp in zip(start, stop, strict=True)), dtype=self.dtype) |
| 1596 | + super().get_slice_numpy(chunk, (start, stop)) |
| 1597 | + out[sel_idx.raw] = chunk[sub_idx].reshape(new_shape) |
1539 | 1598 |
|
1540 | 1599 | return out
|
1541 | 1600 |
|
@@ -4537,3 +4596,79 @@ def __setitem__(self, selection, input) -> np.ndarray:
|
4537 | 4596 |
|
4538 | 4597 | # def __setitem__(self, selection, input) -> np.ndarray:
|
4539 | 4598 | # return NotImplementedError
|
| 4599 | + |
| 4600 | + |
| 4601 | +def slice_to_chunktuple(s, n): |
| 4602 | + """ |
| 4603 | + Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks. |
| 4604 | + Parameters |
| 4605 | + ---------- |
| 4606 | + s : slice |
| 4607 | + A slice object with start, stop, and step attributes. |
| 4608 | + n : int |
| 4609 | + The number of elements in the chunk axis |
| 4610 | +
|
| 4611 | + Returns |
| 4612 | + ------- |
| 4613 | + out: tuple |
| 4614 | + """ |
| 4615 | + start, stop, step = s.start, s.stop, s.step |
| 4616 | + if step > n: |
| 4617 | + return tuple((start + k * step) // n for k in range(ceiling(stop - start, step))) |
| 4618 | + else: |
| 4619 | + return tuple(range(start // n, ceiling(stop, n))) |
| 4620 | + |
| 4621 | + |
| 4622 | +def _get_selection(ctuple, ptuple, chunks): |
| 4623 | + # we assume that at least one element of chunk intersects with the slice |
| 4624 | + # (as a consequence of only looping over intersecting chunks) |
| 4625 | + pselection = () |
| 4626 | + for i, s, csize in zip(ctuple, ptuple, chunks, strict=True): |
| 4627 | + # we need to advance to first element within chunk that intersects with slice, not |
| 4628 | + # necessarily the first element of chunk |
| 4629 | + # i * csize = s.start + n*step + k, already added n+1 elements, k in [1, step] |
| 4630 | + np1 = (i * csize - s.start + s.step - 1) // s.step # gives (n + 1) |
| 4631 | + # can have n = -1 if s.start > i * csize, but never < -1 since have to intersect with chunk |
| 4632 | + pselection += ( |
| 4633 | + slice( |
| 4634 | + builtins.max(s.start, s.start + np1 * s.step), # start+(n+1)*step gives i*csize if k=step |
| 4635 | + builtins.min(csize * (i + 1), s.stop), |
| 4636 | + s.step, |
| 4637 | + ), |
| 4638 | + ) |
| 4639 | + |
| 4640 | + # selection relative to coordinates of out (necessarily step = 1) |
| 4641 | + # stop = start + step * n + k => n = (stop - start - 1) // step |
| 4642 | + # hence, out_stop = out_start + n + 1 |
| 4643 | + # ps.start = pt.start + out_start * step |
| 4644 | + out_pselection = tuple( |
| 4645 | + slice( |
| 4646 | + (ps.start - pt.start + pt.step - 1) // pt.step, |
| 4647 | + (ps.start - pt.start + pt.step - 1) // pt.step + (ps.stop - ps.start + ps.step - 1) // ps.step, |
| 4648 | + 1, |
| 4649 | + ) |
| 4650 | + for ps, pt in zip(pselection, ptuple, strict=True) |
| 4651 | + ) |
| 4652 | + |
| 4653 | + loc_selection = tuple(slice(0, s.stop - s.start, s.step) for s in pselection) |
| 4654 | + |
| 4655 | + return out_pselection, pselection, loc_selection |
| 4656 | + |
| 4657 | + |
| 4658 | +def _get_local_slice(prior_selection, post_selection, chunk_bounds): |
| 4659 | + chunk_begin, chunk_end = chunk_bounds |
| 4660 | + locbegin = np.hstack( |
| 4661 | + ( |
| 4662 | + [s.start for s in prior_selection], |
| 4663 | + chunk_begin, |
| 4664 | + [s.start for s in post_selection], |
| 4665 | + ), |
| 4666 | + casting="unsafe", |
| 4667 | + dtype="int64", |
| 4668 | + ) |
| 4669 | + locend = np.hstack( |
| 4670 | + ([s.stop for s in prior_selection], chunk_end, [s.stop for s in post_selection]), |
| 4671 | + casting="unsafe", |
| 4672 | + dtype="int64", |
| 4673 | + ) |
| 4674 | + return locbegin, locend |
0 commit comments