Improve ndindex performance (PyInf#12655)

We're using ndindex as part of versioned-hdf5. Unfortunately in some common circumstances it can be pretty slow. Consider the following code:
```
>>> # THIS CELL WAS AUTO-GENERATED BY PYFLYBY
>>> import atexit
>>> import h5py
>>> import numpy as np
>>> import shutil
>>> import tempfile
>>> from versioned_hdf5 import VersionedHDF5File
>>> # END AUTO-GENERATED BLOCK


>>> d = tempfile.mkdtemp()
>>> atexit.register(shutil.rmtree, d)

[PYFLYBY] import atexit
[PYFLYBY] import shutil
[PYFLYBY] import tempfile

<function shutil.rmtree(path, ignore_errors=False, onerror=None, *, dir_fd=None)>

>>> def r0(d):
...     with h5py.File(f'{d}/data.h5', mode="w") as f:
...         vf = VersionedHDF5File(f)
...         with vf.stage_version("r0") as sv:
...             sv.create_dataset('values', data=np.arange(100_000_000), chunks=(1_000,), maxshape=(None,))

[PYFLYBY] from versioned_hdf5 import VersionedHDF5File
[PYFLYBY] import h5py
[PYFLYBY] import numpy as np


>>> def r1(d):
...     with h5py.File(f'{d}/data.h5', mode="r+") as f:
...         vf = VersionedHDF5File(f)
...         with vf.stage_version("r1") as sv:
...             values = sv['values']
...             # resizing creates an InMemoryDataset and populates data_dict
...             values.resize((110_000_000,))
...             # reading from InMemoryDataset is now slow
...             _ = values[500:90_000_000]


>>> %load_ext pyinstrument


>>> r0(d)
>>> %pyinstrument r1(d)


  _     ._   __/__   _ _  _  _ _/_   Recorded: 14:20:41  Samples:  30357
 /_//_/// /_\ / //_// / //_'/ //     Duration: 42.410    CPU time: 41.642
/   _/                      v4.6.1

Program: /usr/local/python/python-3.11/std/lib64/python3.11/site-packages/ipykernel_launcher.py -f /u/bessen/.local/share/jupyter/runtime/kernel-f2241356-7014-4cb5-99a3-98f8ade0d4d7.json

42.409 <module>  ../../../tmp/ipykernel_3610251/2296172108.py:1
`- 42.407 r1  ../../../tmp/ipykernel_3610251/2453629480.py:1
   |- 15.246 DatasetWrapper.__getitem__  versioned_hdf5/wrappers.py:1267
   |  `- 15.246 InMemoryDataset.__getitem__  versioned_hdf5/wrappers.py:757
   |     `- 15.246 InMemoryDataset.get_index  versioned_hdf5/wrappers.py:655
   |        |- 9.144 [self]  versioned_hdf5/wrappers.py
   |        |- 4.383 InMemoryDatasetID._read_chunk  versioned_hdf5/wrappers.py:1385
   |        |  `- 4.275 Dataset.__getitem__  h5py/_hl/dataset.py:749
   |        |     |- 1.916 Reader.read  <built-in>
   |        |     |- 1.704 Dataset._fast_reader  h5py/_hl/dataset.py:527
   |        |     |  `- 1.689 [self]  h5py/_hl/dataset.py
   |        |     `- 0.604 [self]  h5py/_hl/dataset.py
   |        `- 0.878 where  <__array_function__ internals>:177
   |              [2 frames hidden]  <__array_function__ internals>, <buil...
   |- 13.388 _GeneratorContextManager.__exit__  contextlib.py:141
   |  `- 13.388 VersionedHDF5File.stage_version  versioned_hdf5/api.py:267
   |     `- 13.371 commit_version  versioned_hdf5/versions.py:71
   |        |- 8.712 create_virtual_dataset  versioned_hdf5/backend.py:429
   |        |  |- 2.525 [self]  versioned_hdf5/backend.py
   |        |  |- 2.068 Group.create_virtual_dataset  h5py/_hl/group.py:188
   |        |  |  `- 1.646 VirtualLayout.make_dataset  h5py/_hl/vds.py:228
   |        |  |- 1.932 select  h5py/_hl/selections.py:19
   |        |  |  `- 1.452 [self]  h5py/_hl/selections.py
   |        |  |- 0.728 Dataset.shape  h5py/_hl/dataset.py:467
   |        |  |  `- 0.699 [self]  h5py/_hl/dataset.py
   |        |  `- 0.530 <dictcomp>  versioned_hdf5/backend.py:437
   |        |     `- 0.525 [self]  versioned_hdf5/backend.py
   |        |- 3.961 write_dataset_chunks  versioned_hdf5/backend.py:346
   |        |  |- 2.349 Hashtable.hash  versioned_hdf5/hashtable.py:116
   |        |  |  `- 1.822 openssl_sha256  <built-in>
   |        |  `- 0.659 Hashtable.__contains__  <frozen _collections_abc>:778
   |        |     `- 0.609 Hashtable.__getitem__  versioned_hdf5/hashtable.py:205
   |        |        `- 0.595 [self]  versioned_hdf5/hashtable.py
   |        `- 0.688 [self]  versioned_hdf5/versions.py
   |- 9.595 InMemoryDataset.resize  versioned_hdf5/wrappers.py:609
   |  |- 5.824 InMemoryDatasetID.data_dict  versioned_hdf5/wrappers.py:1298
   |  |  |- 2.915 <dictcomp>  versioned_hdf5/wrappers.py:1327
   |  |  |  `- 2.680 spaceid_to_slice  versioned_hdf5/slicetools.py:6
   |  |  |     `- 2.317 [self]  versioned_hdf5/slicetools.py
   |  |  |- 1.683 <listcomp>  versioned_hdf5/wrappers.py:1317
   |  |  |  `- 1.627 [self]  versioned_hdf5/wrappers.py
   |  |  `- 0.830 [self]  versioned_hdf5/wrappers.py
   |  |- 1.712 [self]  versioned_hdf5/wrappers.py
   |  `- 1.710 InMemoryDataset.get_index  versioned_hdf5/wrappers.py:655
   |     `- 1.710 InMemoryDataset.__getitem__  h5py/_hl/dataset.py:749
   |        `- 1.706 Reader.read  <built-in>
   |- 3.185 _GeneratorContextManager.__enter__  contextlib.py:132
   |  `- 3.185 VersionedHDF5File.stage_version  versioned_hdf5/api.py:267
   |     `- 3.185 create_version_group  versioned_hdf5/versions.py:22
   |        `- 3.184 Group.visititems  h5py/_hl/group.py:635
   |           |- 2.613 proxy  h5py/_hl/group.py:660
   |           |  |- 1.673 _get  versioned_hdf5/versions.py:57
   |           |  |  `- 1.275 InMemoryGroup.__setitem__  versioned_hdf5/wrappers.py:110
   |           |  |     `- 1.275 InMemoryGroup._add_to_data  versioned_hdf5/wrappers.py:114
   |           |  |        `- 1.275 InMemoryDataset.__init__  versioned_hdf5/wrappers.py:503
   |           |  |           `- 0.592 KeysViewHDF5.__iter__  <frozen _collections_abc>:835
   |           |  |              `- 0.592 AttributeManager.__iter__  h5py/_hl/attrs.py:257
   |           |  |                 `- 0.441 [self]  h5py/_hl/attrs.py
   |           |  `- 0.791 Group.__getitem__  h5py/_hl/group.py:348
   |           |     `- 0.544 [self]  h5py/_hl/group.py
   |           `- 0.571 [self]  h5py/_hl/group.py
   `- 0.985 File.__exit__  h5py/_hl/files.py:601
      `- 0.985 File.close  h5py/_hl/files.py:576



>>> %load_ext line_profiler


>>> from versioned_hdf5.wrappers import InMemoryDataset
>>> r0(d)
>>> %lprun -f InMemoryDataset.get_index.__wrapped__ r1(d)

Timer unit: 1e-09 s

Total time: 14.9595 s
File: /codemill/bessen/ndindex_venv/lib64/python3.11/site-packages/versioned_hdf5/wrappers.py
Function: get_index at line 655

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   655                                               @with_phil
   656                                               def get_index(
   657                                                   self,
   658                                                   args: Union[slice, Slice, Tuple, tuple, h5r.RegionReference],
   659                                                   new_dtype: Optional[str] = None,
   660                                                   can_read_direct: Optional[bool] = None,
   661                                               ) -> np.ndarray:
   662                                                   """Read a slice from the HDF5 dataset given by the index.
   663                                           
   664                                                   Takes slices and recarray-style field names (more than one is
   665                                                   allowed!) in any order.  Obeys basic NumPy rules, including
   666                                                   broadcasting.
   667                                           
   668                                                   Parameters
   669                                                   ----------
   670                                                   args : Union[slice, Slice, Tuple, tuple, h5r.RegionReference]
   671                                                       Index to read from the Dataset
   672                                                   new_dtype : Optional[str]
   673                                                       Dtype of the returned array
   674                                                   can_read_direct : Optional[bool]
   675                                                       True if we can read directly from the underlying hdf5 Dataset, False otherwise.
   676                                                       This should be the value of the InMemoryDatasetID instance's ``can_read_direct``
   677                                                       property for this Dataset.
   678                                           
   679                                                       If None, ``self.id.can_read_direct`` is evaluated first to determine if data can
   680                                                       be read directly from the underlying dataset.
   681                                           
   682                                                   Returns
   683                                                   -------
   684                                                   np.ndarray
   685                                                       Array containing data from this dataset from the requested index
   686                                                   """
   687                                                   # This boilerplate code is based on h5py.Dataset.__getitem__
   688         2       8062.0   4031.0      0.0          args = args if isinstance(args, tuple) else (args,)
   689                                           
   690                                                   # Sort field names from the rest of the args.
   691         2      10665.0   5332.5      0.0          names = tuple(x for x in args if isinstance(x, str))
   692                                           
   693         2       2500.0   1250.0      0.0          if names:
   694                                                       # Read a subset of the fields in this structured dtype
   695                                                       if len(names) == 1:
   696                                                           names = names[0]  # Read with simpler dtype of this field
   697                                                       args = tuple(x for x in args if not isinstance(x, str))
   698                                                       return self.fields(names, _prior_dtype=new_dtype)[args]
   699                                           
   700         2       1745.0    872.5      0.0          if new_dtype is None:
   701         2      20228.0  10114.0      0.0              new_dtype = self.dtype
   702         2      73856.0  36928.0      0.0          mtype = h5t.py_create(new_dtype)
   703                                           
   704                                                   # === Special-case region references ====
   705                                           
   706         2       5677.0   2838.5      0.0          if len(args) == 1 and isinstance(args[0], h5r.RegionReference):
   707                                                       obj = h5r.dereference(args[0], self.id)
   708                                                       if obj != self.id:
   709                                                           raise ValueError("Region reference must point to this dataset")
   710                                           
   711                                                       sid = h5r.get_region(args[0], self.id)
   712                                                       mshape = guess_shape(sid)
   713                                                       if mshape is None:
   714                                                           # 0D with no data (NULL or deselected SCALAR)
   715                                                           return Empty(new_dtype)
   716                                                       out = np.empty(mshape, dtype=new_dtype)
   717                                                       if out.size == 0:
   718                                                           return out
   719                                           
   720                                                       sid_out = h5s.create_simple(mshape)
   721                                                       sid_out.select_all()
   722                                                       self.id.read(sid_out, sid, out, mtype)
   723                                                       return out
   724                                           
   725                                                   # === END CODE FROM h5py.Dataset.__getitem__ ===
   726                                           
   727         2     328533.0 164266.5      0.0          idx = ndindex(args).expand(self.shape)
   728                                           
   729         2       1098.0    549.0      0.0          if can_read_direct is None:
   730         1   48158088.0    5e+07      0.3              can_read_direct = self.id.can_read_direct
   731                                           
   732         2       1628.0    814.0      0.0          if can_read_direct:
   733         1 1411348097.0    1e+09      9.4              return super().__getitem__(idx.raw)
   734                                           
   735         1    1542927.0    2e+06      0.0          arr = np.ndarray(idx.newshape(self.shape), new_dtype, order="C")
   736                                           
   737     90001  994078595.0  11045.2      6.6          for chunk in self.chunks.as_subchunks(idx, self.shape):
   738     90000  367273224.0   4080.8      2.5              if chunk not in self.id.data_dict:
   739                                                           self.id.data_dict[chunk] = np.broadcast_to(
   740                                                               self.fillvalue, chunk.newshape(self.shape)
   741                                                           )
   742     90000  295761742.0   3286.2      2.0              elif isinstance(self.id.data_dict[chunk], (slice, Slice, tuple, Tuple)):
   743    360000  439296532.0   1220.3      2.9                  raw_idx = Tuple(
   744     90000  226675754.0   2518.6      1.5                      self.id.data_dict[chunk],
   745     90000  109581410.0   1217.6      0.7                      *[slice(0, len(i)) for i in chunk.args[1:]],
   746     90000   99604868.0   1106.7      0.7                  ).raw
   747     90000 4136405229.0  45960.1     27.7                  self.id.data_dict[chunk] = self.id._read_chunk(raw_idx)
   748                                           
   749     90000  269383258.0   2993.1      1.8              if self.id.data_dict[chunk].size != 0:
   750     90000 3196593424.0  35517.7     21.4                  arr_idx = chunk.as_subindex(idx)
   751     90000 2472868596.0  27476.3     16.5                  index = idx.as_subindex(chunk)
   752     90000  890518785.0   9894.7      6.0                  arr[arr_idx.raw] = self.id.data_dict[chunk][index.raw]
   753                                           
   754                                                   # Return arr as a scalar if it is shape () (matching h5py)
   755         1       1263.0   1263.0      0.0          return arr[()]
```

In particular note that in `InMemoryDataset.get_index` we spend 9.144 seconds in "[self]" and 0.878 seconds in "where":
```
   |     `- 15.246 InMemoryDataset.get_index  versioned_hdf5/wrappers.py:655
   |        |- 9.144 [self]  versioned_hdf5/wrappers.py
   |        |- 4.383 InMemoryDatasetID._read_chunk  versioned_hdf5/wrappers.py:1385
   |        |  `- 4.275 Dataset.__getitem__  h5py/_hl/dataset.py:749
   |        |     |- 1.916 Reader.read  <built-in>
   |        |     |- 1.704 Dataset._fast_reader  h5py/_hl/dataset.py:527
   |        |     |  `- 1.689 [self]  h5py/_hl/dataset.py
   |        |     `- 0.604 [self]  h5py/_hl/dataset.py
   |        `- 0.878 where  <__array_function__ internals>:177
   |              [2 frames hidden]  <__array_function__ internals>, <buil...
```
These timings come from `ndindex` but don't show up properly because it is compiled as Cython extension. If I disable cythonization you can see where it's spending it's time (and it's also even slower):
```
>>> r0(d)
>>> %pyinstrument r1(d)


  _     ._   __/__   _ _  _  _ _/_   Recorded: 14:24:32  Samples:  55759
 /_//_/// /_\ / //_// / //_'/ //     Duration: 66.820    CPU time: 66.438
/   _/                      v4.6.1

Program: /usr/local/python/python-3.11/std/lib64/python3.11/site-packages/ipykernel_launcher.py -f /u/bessen/.local/share/jupyter/runtime/kernel-f2241356-7014-4cb5-99a3-98f8ade0d4d7.json

66.820 <module>  ../../../tmp/ipykernel_3619864/2296172108.py:1
`- 66.817 r1  ../../../tmp/ipykernel_3619864/2453629480.py:1
   |- 32.529 DatasetWrapper.__getitem__  versioned_hdf5/wrappers.py:1267
   |  `- 32.529 InMemoryDataset.__getitem__  versioned_hdf5/wrappers.py:757
   |     `- 32.529 InMemoryDataset.get_index  versioned_hdf5/wrappers.py:655
   |        |- 19.605 Tuple.as_subindex  ndindex/tuple.py:627
   |        |  |- 8.551 Slice.as_subindex  ndindex/slice.py:501
   |        |  |  |- 3.618 Slice.reduce  ndindex/slice.py:212
   |        |  |  |  |- 2.811 Slice.__init__  ndindex/ndindex.py:159
   |        |  |  |  |  `- 2.539 Slice._typecheck  ndindex/slice.py:62
   |        |  |  |  |     `- 1.768 operator_index  ndindex/ndindex.py:681
   |        |  |  |  |        `- 1.345 [self]  ndindex/ndindex.py
   |        |  |  |  `- 0.807 [self]  ndindex/slice.py
   |        |  |  |- 2.226 subindex_slice  ndindex/subindex_helpers.py:83
   |        |  |  |  `- 1.114 where  ndindex/subindex_helpers.py:46
   |        |  |  |     `- 0.722 where  <__array_function__ internals>:177
   |        |  |  |- 1.144 [self]  ndindex/slice.py
   |        |  |  `- 0.997 Slice.__init__  ndindex/ndindex.py:159
   |        |  |     `- 0.911 Slice._typecheck  ndindex/slice.py:62
   |        |  |- 4.300 Tuple.reduce  ndindex/tuple.py:185
   |        |  |  |- 1.656 [self]  ndindex/tuple.py
   |        |  |  `- 1.513 Slice.reduce  ndindex/slice.py:212
   |        |  |     `- 1.107 Slice.__init__  ndindex/ndindex.py:159
   |        |  |        `- 0.972 Slice._typecheck  ndindex/slice.py:62
   |        |  |           `- 0.697 operator_index  ndindex/ndindex.py:681
   |        |  |- 3.529 Tuple.__init__  ndindex/ndindex.py:159
   |        |  |  `- 3.261 Tuple._typecheck  ndindex/tuple.py:49
   |        |  |     `- 1.929 [self]  ndindex/tuple.py
   |        |  `- 1.650 [self]  ndindex/tuple.py
   |        |- 4.108 InMemoryDatasetID._read_chunk  versioned_hdf5/wrappers.py:1385
   |        |  `- 4.002 Dataset.__getitem__  h5py/_hl/dataset.py:749
   |        |     |- 1.817 Reader.read  <built-in>
   |        |     `- 1.655 Dataset._fast_reader  h5py/_hl/dataset.py:527
   |        |        `- 1.637 [self]  h5py/_hl/dataset.py
   |        |- 2.265 ChunkSize.as_subchunks  ndindex/chunking.py:143
   |        |  `- 2.230 _indices  ndindex/chunking.py:288
   |        |     |- 0.909 <listcomp>  ndindex/chunking.py:292
   |        |     `- 0.871 Tuple.__init__  ndindex/ndindex.py:159
   |        |        `- 0.801 Tuple._typecheck  ndindex/tuple.py:49
   |        |- 2.179 [self]  versioned_hdf5/wrappers.py
   |        |- 1.487 Tuple.__hash__  ndindex/tuple.py:114
   |        |  `- 1.071 Slice.__hash__  ndindex/slice.py:85
   |        |- 1.057 Tuple.__eq__  ndindex/tuple.py:107
   |        `- 0.932 Tuple.__init__  ndindex/ndindex.py:159
   |           `- 0.860 Tuple._typecheck  ndindex/tuple.py:49
   |- 16.648 _GeneratorContextManager.__exit__  contextlib.py:141
   |  `- 16.648 VersionedHDF5File.stage_version  versioned_hdf5/api.py:267
   |     `- 16.633 commit_version  versioned_hdf5/versions.py:71
   |        |- 10.969 create_virtual_dataset  versioned_hdf5/backend.py:429
   |        |  |- 1.989 [self]  versioned_hdf5/backend.py
   |        |  |- 1.897 Group.create_virtual_dataset  h5py/_hl/group.py:188
   |        |  |  `- 1.467 VirtualLayout.make_dataset  h5py/_hl/vds.py:228
   |        |  |- 1.794 select  h5py/_hl/selections.py:19
   |        |  |  `- 1.361 [self]  h5py/_hl/selections.py
   |        |  |- 1.289 <dictcomp>  versioned_hdf5/backend.py:437
   |        |  |  `- 0.964 Slice.reduce  ndindex/slice.py:212
   |        |  |     `- 0.816 Slice.__init__  ndindex/ndindex.py:159
   |        |  |        `- 0.762 Slice._typecheck  ndindex/slice.py:62
   |        |  |- 1.111 Tuple.__init__  ndindex/ndindex.py:159
   |        |  |  `- 1.017 Tuple._typecheck  ndindex/tuple.py:49
   |        |  `- 0.846 Dataset.shape  h5py/_hl/dataset.py:467
   |        |     `- 0.799 [self]  h5py/_hl/dataset.py
   |        `- 4.984 write_dataset_chunks  versioned_hdf5/backend.py:346
   |           |- 2.393 Hashtable.hash  versioned_hdf5/hashtable.py:116
   |           |  `- 1.827 openssl_sha256  <built-in>
   |           `- 0.953 Hashtable.__contains__  <frozen _collections_abc>:778
   |              `- 0.903 Hashtable.__getitem__  versioned_hdf5/hashtable.py:205
   |                 `- 0.673 Slice.__init__  ndindex/ndindex.py:159
   |- 13.589 InMemoryDataset.resize  versioned_hdf5/wrappers.py:609
   |  |- 7.899 InMemoryDatasetID.data_dict  versioned_hdf5/wrappers.py:1298
   |  |  |- 4.633 <dictcomp>  versioned_hdf5/wrappers.py:1327
   |  |  |  `- 4.225 spaceid_to_slice  versioned_hdf5/slicetools.py:6
   |  |  |     |- 1.730 Tuple.__init__  ndindex/ndindex.py:159
   |  |  |     |  `- 1.607 Tuple._typecheck  ndindex/tuple.py:49
   |  |  |     |     `- 0.932 [self]  ndindex/tuple.py
   |  |  |     |- 1.228 [self]  versioned_hdf5/slicetools.py
   |  |  |     `- 1.193 hyperslab_to_slice  versioned_hdf5/slicetools.py:33
   |  |  |        `- 1.112 Slice.__init__  ndindex/ndindex.py:159
   |  |  |           `- 0.682 Slice._typecheck  ndindex/slice.py:62
   |  |  `- 1.464 <listcomp>  versioned_hdf5/wrappers.py:1317
   |  |     `- 1.405 [self]  versioned_hdf5/wrappers.py
   |  |- 2.367 ChunkSize.as_subchunks  ndindex/chunking.py:143
   |  |  `- 2.327 _indices  ndindex/chunking.py:288
   |  |     |- 1.155 <listcomp>  ndindex/chunking.py:292
   |  |     |  `- 0.884 Slice.__init__  ndindex/ndindex.py:159
   |  |     |     `- 0.710 Slice._typecheck  ndindex/slice.py:62
   |  |     `- 0.856 Tuple.__init__  ndindex/ndindex.py:159
   |  |        `- 0.788 Tuple._typecheck  ndindex/tuple.py:49
   |  |- 1.429 InMemoryDataset.get_index  versioned_hdf5/wrappers.py:655
   |  |  `- 1.429 InMemoryDataset.__getitem__  h5py/_hl/dataset.py:749
   |  |     `- 1.422 Reader.read  <built-in>
   |  `- 0.899 [self]  versioned_hdf5/wrappers.py
   |- 3.085 _GeneratorContextManager.__enter__  contextlib.py:132
   |  `- 3.085 VersionedHDF5File.stage_version  versioned_hdf5/api.py:267
   |     `- 3.085 create_version_group  versioned_hdf5/versions.py:22
   |        `- 3.084 Group.visititems  h5py/_hl/group.py:635
   |           `- 2.533 proxy  h5py/_hl/group.py:660
   |              |- 1.618 _get  versioned_hdf5/versions.py:57
   |              |  `- 1.225 InMemoryGroup.__setitem__  versioned_hdf5/wrappers.py:110
   |              |     `- 1.225 InMemoryGroup._add_to_data  versioned_hdf5/wrappers.py:114
   |              |        `- 1.225 InMemoryDataset.__init__  versioned_hdf5/wrappers.py:503
   |              `- 0.767 Group.__getitem__  h5py/_hl/group.py:348
   `- 0.958 File.__exit__  h5py/_hl/files.py:601
      `- 0.958 File.close  h5py/_hl/files.py:576
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Improve ndindex performance (PyInf#12655) #181

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Improve ndindex performance (PyInf#12655) #181

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions