@@ -1478,17 +1478,19 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
14781478
14791479 """
14801480 # TODO: Make this faster and avoid running out of memory - avoid broadcasting keys
1481+
14811482 ## Can't do this because ndindex doesn't support all the same indexing cases as Numpy
14821483 # if math.prod(self.shape) * self.dtype.itemsize < blosc2.MAX_FAST_PATH_SIZE:
14831484 # return self[:][key] # load into memory for smallish arrays
14841485 shape = self .shape
14851486 chunks = self .chunks
1486- # after this, all indices are slices or arrays of integers
1487- # moreover, all arrays are consecutive (otherwise an error is raised)
1487+
14881488 # TODO: try to optimise and avoid this expand which seems to copy - maybe np.broadcast
14891489 _slice = ndindex .ndindex (key ).expand (shape )
14901490 out_shape = _slice .newshape (shape )
14911491 _slice = _slice .raw
1492+ # now all indices are slices or arrays of integers (or booleans)
1493+ # moreover, all arrays are consecutive (otherwise an error is raised)
14921494
14931495 if np .all ([isinstance (s , (slice , np .ndarray )) for s in _slice ]) and np .all (
14941496 [s .dtype is not bool for s in _slice if isinstance (s , np .ndarray )]
@@ -1500,9 +1502,12 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
15001502 begin , end = arridxs [0 ], arridxs [- 1 ] + 1
15011503 flat_shape = tuple ((i .stop - i .start + (i .step - 1 )) // i .step for i in _slice [:begin ])
15021504 idx_dim = np .prod (_slice [begin ].shape )
1505+
1506+ # TODO: find a nicer way to do the copy maybe
15031507 arr = np .empty ((idx_dim , end - begin ), dtype = _slice [begin ].dtype )
15041508 for i , s in enumerate (_slice [begin :end ]):
15051509 arr [:, i ] = s .reshape (- 1 ) # have to do a copy
1510+
15061511 flat_shape += (idx_dim ,)
15071512 flat_shape += tuple ((i .stop - i .start + (i .step - 1 )) // i .step for i in _slice [end :])
15081513 # out_shape could have new dims if indexing arrays are not all 1D
@@ -1520,7 +1525,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
15201525 chunked_arr = np .ascontiguousarray (
15211526 chunked_arr
15221527 ) # ensure C-order memory to allow structured dtype view
1523- # use np.unique but avoid sort and copy
1528+ # TODO: check that avoids sort and copy (alternative: maybe do a bincount with structured data types?)
15241529 _ , row_ids , idx_inv , chunk_nitems = np .unique (
15251530 chunked_arr .view ([("" , chunked_arr .dtype )] * chunked_arr .shape [1 ]),
15261531 return_counts = True ,
@@ -1541,6 +1546,7 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
15411546 slice_to_chunktuple (s , c ) for s , c in zip (prior_tuple , chunks [:begin ], strict = True )
15421547 ]
15431548 cpost_slices = [slice_to_chunktuple (s , c ) for s , c in zip (post_tuple , chunks [end :], strict = True )]
1549+ # TODO: rewrite to allow interleaved slices/array indexes
15441550 for chunk_i , chunk_idx in enumerate (unique_chunks ):
15451551 start = 0 if chunk_i == 0 else chunk_nitems_cumsum [chunk_i - 1 ]
15461552 stop = chunk_nitems_cumsum [chunk_i ]
@@ -4576,7 +4582,7 @@ def __setitem__(self, selection, input) -> np.ndarray:
45764582
45774583def slice_to_chunktuple (s , n ):
45784584 """
4579- # credit to ndindex for this function #
4585+ Adapted from _slice_iter in ndindex.ChunkSize.as_subchunks.
45804586 Parameters
45814587 ----------
45824588 s : slice
0 commit comments