2727
2828import ndindex
2929import numpy as np
30+ from ndindex .subindex_helpers import ceiling
3031
3132import blosc2
3233from blosc2 import SpecialValue , blosc2_ext , compute_chunks_blocks
@@ -1459,7 +1460,7 @@ def T(self):
14591460 raise ValueError ("This property only works for 2-dimensional arrays." )
14601461 return permute_dims (self )
14611462
1462- def get_fselection_numpy (self , key : list | np .ndarray ) -> np .ndarray :
1463+ def get_fselection_numpy (self , key : list | np .ndarray ) -> np .ndarray : # noqa: C901
14631464 """
14641465 Select a slice from the array using a fancy index.
14651466 Closely matches NumPy fancy indexing behaviour, except in
@@ -1482,60 +1483,131 @@ def get_fselection_numpy(self, key: list | np.ndarray) -> np.ndarray:
14821483 # return self[:][key] # load into memory for smallish arrays
14831484 shape = self .shape
14841485 chunks = self .chunks
1486+ # after this, all indices are slices or arrays of integers
1487+ # moreover, all arrays are consecutive (otherwise an error is raised)
14851488 _slice = ndindex .ndindex (key ).expand (shape )
14861489 out_shape = _slice .newshape (shape )
1487- out = np .empty (out_shape , dtype = self .dtype )
1488- key = np .array (key )
1489-
1490- if self .ndim == 1 and len (key .shape ) == 1 :
1491- # Fast path so we can avoid the costly lines in slow path
1492- # (sub_idx = _slice.as_subindex(c).raw, sel_idx = c.as_subindex(_slice))
1493- if np .any (np .diff (key ) < 0 ):
1494- idx_order = np .argsort (key )
1495- sorted_idxs = key [idx_order ]
1496- else :
1497- idx_order = None
1498- sorted_idxs = key
1499- chunk_nitems = np .bincount (
1500- sorted_idxs // chunks [0 ], minlength = self .schunk .nchunks
1501- ) # number of items per chunk
1490+ _slice = _slice .raw
1491+ shape = np .array (shape )
1492+
1493+ if np .all ([isinstance (s , (slice , np .ndarray )) for s in _slice ]) and np .all (
1494+ [s .dtype is not bool for s in _slice if isinstance (s , np .ndarray )]
1495+ ):
1496+ arr = []
1497+ chunks = np .array (chunks )
1498+ f_ = 0
1499+ # |------|
1500+ # ------| arrs |------
1501+ # f_=0 f_=1 f_=2
1502+ begin = 0
1503+ end = None
1504+ extradims = 0
1505+ flat_shape = ()
1506+ for num , i in enumerate (_slice ):
1507+ if isinstance (i , np .ndarray ):
1508+ if f_ == 2 :
1509+ raise ValueError ("Cannot use slices between arrays of integers in index" )
1510+ arr += [i .flatten ()]
1511+ extradims += i .ndim - 1
1512+ if f_ == 0 :
1513+ begin = num
1514+ f_ = 1
1515+ else :
1516+ if f_ == 1 : # finished adding arrays
1517+ arr = np .stack (arr )
1518+ end = num
1519+ f_ = 2
1520+ flat_shape += (arr .shape [- 1 ],)
1521+ flat_shape += ((i .stop - i .start ) // i .step ,)
1522+ if not isinstance (arr , np .ndarray ): # might have missed last part of loop
1523+ arr = np .stack (arr )
1524+ flat_shape += (arr .shape [- 1 ],)
1525+ # out_shape could have new dims if indexing arrays are not all 1D
1526+ # (we have just flattened them so need to handle accordingly)
1527+ idx_order = np .lexsort (
1528+ tuple (a for a in reversed (arr ))
1529+ ) # sort by column with priority to first column
1530+ sorted_idxs = arr [:, idx_order ]
1531+ out = np .empty (flat_shape , dtype = self .dtype )
1532+
1533+ divider = chunks [begin :end ]
1534+ unique_chunks , chunk_nitems = np .unique (sorted_idxs .T // divider , axis = 0 , return_counts = True )
15021535 chunk_nitems_cumsum = np .cumsum (chunk_nitems )
1503- for chunk_i , c in enumerate (chunk_nitems ):
1504- if c == 0 :
1505- continue # no items in chunk
1536+ prior_tuple = _slice [:begin ]
1537+ post_tuple = _slice [end :] if end is not None else ()
1538+ cprior_slices = [
1539+ slice_to_chunktuple (s , c ) for s , c in zip (prior_tuple , chunks [:begin ], strict = True )
1540+ ]
1541+ cpost_slices = (
1542+ [slice_to_chunktuple (s , c ) for s , c in zip (post_tuple , chunks [end :], strict = True )]
1543+ if end is not None
1544+ else []
1545+ )
1546+ for chunk_i , chunk_idx in enumerate (unique_chunks ):
15061547 start = 0 if chunk_i == 0 else chunk_nitems_cumsum [chunk_i - 1 ]
15071548 stop = chunk_nitems_cumsum [chunk_i ]
1508- selection = sorted_idxs [start :stop ]
1509- out_selection = idx_order [start :stop ] if idx_order is not None else slice (start , stop )
1510- to_be_loaded = np .empty ((selection [- 1 ] - selection [0 ] + 1 ,), dtype = self .dtype )
1511- # if len(selection) < 10:
1512- # # TODO: optimise for case of few elements and go index-by-index
1513- # selector = out_selection.start + np.arange(out_selection.stop) if isinstance(out_selection, slice) else out_selection
1514- # for j, idx in enumerate(sorted_idxs[start:stop]):
1515- # out[out_selection[j]] = self[idx]
1516- # else:
1517- super ().get_slice_numpy (
1518- to_be_loaded , ((selection [0 ],), (selection [- 1 ] + 1 ,))
1519- ) # get relevant section of chunk
1520- loc_idx = selection - selection [0 ]
1521- out [out_selection ] = to_be_loaded [loc_idx ]
1522-
1523- else :
1524- chunk_size = ndindex .ChunkSize (chunks )
1525- # repeated indices are grouped together
1526- intersecting_chunks = chunk_size .as_subchunks (
1527- _slice , shape
1528- ) # if _slice is (), returns all chunks
1529- for c in intersecting_chunks :
1530- sub_idx = _slice .as_subindex (c ).raw
1531- sel_idx = c .as_subindex (_slice )
1532- new_shape = sel_idx .newshape (out_shape )
1533- start , stop , step = get_ndarray_start_stop (self .ndim , c .raw , shape )
1534- chunk = np .empty (
1535- tuple (sp - st for st , sp in zip (start , stop , strict = True )), dtype = self .dtype
1536- )
1537- super ().get_slice_numpy (chunk , (start , stop ))
1538- out [sel_idx .raw ] = chunk [sub_idx ].reshape (new_shape )
1549+ selection = sorted_idxs [:, start :stop ].T
1550+ out_selection = idx_order [start :stop ].T
1551+ # loop over chunks coming from slices before and after array indices
1552+ for cprior_tuple in product (* cprior_slices ):
1553+ prior_selection = selector (cprior_tuple , prior_tuple , chunks [:begin ])
1554+ # selection relative to coordinates of out (necessarily step = 1)
1555+ out_prior_selection = tuple (
1556+ slice (ps .start - pt .start , ps .stop - pt .start , 1 )
1557+ for ps , pt in zip (prior_selection , prior_tuple , strict = True )
1558+ )
1559+ for cpost_tuple in product (* cpost_slices ):
1560+ post_selection = selector (
1561+ cpost_tuple , post_tuple , chunks [end :] if end is not None else []
1562+ )
1563+ # selection relative to coordinates of out (necessarily step = 1)
1564+ out_post_selection = tuple (
1565+ slice (ps .start - pt .start , ps .stop - pt .start , 1 )
1566+ for ps , pt in zip (post_selection , post_tuple , strict = True )
1567+ )
1568+ chunk_begin = chunk_idx * chunks [begin :end ]
1569+ chunk_end = np .minimum ((chunk_idx + 1 ) * chunks [begin :end ], shape [begin :end ])
1570+ locbegin = np .hstack (
1571+ (
1572+ [s .start for s in prior_selection ],
1573+ chunk_begin ,
1574+ [s .start for s in post_selection ],
1575+ ),
1576+ casting = "unsafe" ,
1577+ dtype = "int64" ,
1578+ )
1579+ locend = np .hstack (
1580+ ([s .stop for s in prior_selection ], chunk_end , [s .stop for s in post_selection ]),
1581+ casting = "unsafe" ,
1582+ dtype = "int64" ,
1583+ )
1584+ out_selection = out_prior_selection + (out_selection ,) + out_post_selection
1585+ to_be_loaded = np .empty (locend - locbegin , dtype = self .dtype )
1586+ # basically load whole chunk, except for slice part at beginning and end
1587+ super ().get_slice_numpy (
1588+ to_be_loaded , (locbegin , locend )
1589+ ) # get relevant section of chunk
1590+ loc_idx = (
1591+ tuple (slice (0 , s .stop - s .start , s .step ) for s in prior_selection )
1592+ + tuple (a for a in (selection - chunk_begin ).T )
1593+ + tuple (slice (0 , s .stop - s .start , s .step ) for s in post_selection )
1594+ )
1595+ out [out_selection ] = to_be_loaded [loc_idx ]
1596+ return out .reshape (out_shape ) # should have filled in correct order, just need to reshape
1597+
1598+ # Default when there are booleans
1599+ out = np .empty (out_shape , dtype = self .dtype )
1600+ chunk_size = ndindex .ChunkSize (chunks )
1601+ # repeated indices are grouped together
1602+ intersecting_chunks = chunk_size .as_subchunks (_slice , shape ) # if _slice is (), returns all chunks
1603+ for c in intersecting_chunks :
1604+ sub_idx = _slice .as_subindex (c ).raw
1605+ sel_idx = c .as_subindex (_slice )
1606+ new_shape = sel_idx .newshape (out_shape )
1607+ start , stop , step = get_ndarray_start_stop (self .ndim , c .raw , shape )
1608+ chunk = np .empty (tuple (sp - st for st , sp in zip (start , stop , strict = True )), dtype = self .dtype )
1609+ super ().get_slice_numpy (chunk , (start , stop ))
1610+ out [sel_idx .raw ] = chunk [sub_idx ].reshape (new_shape )
15391611
15401612 return out
15411613
@@ -4519,3 +4591,31 @@ def __setitem__(self, selection, input) -> np.ndarray:
45194591
45204592# def __setitem__(self, selection, input) -> np.ndarray:
45214593# return NotImplementedError
4594+
4595+
4596+ def slice_to_chunktuple (s , n ):
4597+ """
4598+ # credit to ndindex for this function #
4599+ Parameters
4600+ ----------
4601+ s : slice
4602+ A slice object with start, stop, and step attributes.
4603+ n : int
4604+ The number of elements in the chunk axis
4605+
4606+ Returns
4607+ -------
4608+ out: tuple
4609+ """
4610+ start , stop , step = s .start , s .stop , s .step
4611+ if step > n :
4612+ return tuple ((start + k * step ) // n for k in range (ceiling (stop - start , step )))
4613+ else :
4614+ return tuple (range (start // n , ceiling (stop , n )))
4615+
4616+
4617+ def selector (ctuple , _tuple , chunks ):
4618+ return tuple (
4619+ slice (builtins .max (s .start , i * csize ), builtins .min (csize * (i + 1 ), s .stop ), s .step )
4620+ for i , s , csize in zip (ctuple , _tuple , chunks , strict = True )
4621+ )
0 commit comments