MR: Implement Chunked Slicing

ax3l · ax3l · commit 97034589b3f8 · 2022-02-17T18:27:05.000-08:00
diff --git a/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py b/openpmd_viewer/openpmd_timeseries/data_reader/io_reader/utilities.py
@@ -13,13 +13,14 @@
 
 def chunk_to_slice(chunk):
     """
-    Convert an openPMD_api.ChunkInfo to np.s_
+    Convert an openPMD_api.ChunkInfo to slice
     """
     stops = [a + b for a, b in zip(chunk.offset, chunk.extent)]
     indices_per_dim = zip(chunk.offset, stops)
     index_tuple = map(lambda s: slice(s[0], s[1], None), indices_per_dim)
     return tuple(index_tuple)
 
+
 def get_data(series, record_component, i_slice=None, pos_slice=None,
              output_type=np.float64):
     """
@@ -67,46 +68,60 @@ def get_data(series, record_component, i_slice=None, pos_slice=None,
             series.flush()
             data[chunk_slice] = x
     else:
-        # Get largest element of pos_slice
-        max_pos = max(pos_slice)
-        # Create list of indices list_index of type
-        # [:, :, :, ...] where Ellipsis starts at max_pos + 1
-        list_index = [np.s_[:]] * (max_pos + 2)
-        list_index[max_pos + 1] = np.s_[...]
-        # Fill list_index with elements of i_slice
-        for count, dir_index in enumerate(pos_slice):
-            list_index[dir_index] = i_slice[count]
-        # Convert list_index into a tuple
-        tuple_index = tuple(list_index)
-        print("tuple_index={}".format(tuple_index))
-
-        # potentially a better approach as below, since we only slice
-        # out hyperplanes, planes & lines:
-        # - allocate zero array for result, which is a hyperplane/plane/line
-        # - iterate over slices in tuple_index
-        # - reduce selected read range to "valid" range
-
-        # initial experiment:
-        # full_indices can be HUGE, avoid!!
-        full_indices = np.indices(record_component.shape)[0]
-        #full_shape = full_indices.shape
-        #print("full_shape.shape={}".format(full_shape))
-        #print("full_shape={}".format(full_shape))
-
-        # prepare sliced data according to tuple_index
-        slice_indices = full_indices[tuple_index]
-        slice_shape = slice_indices.shape
+        full_shape = record_component.shape
+
+        slice_shape = list(full_shape)      # copy
+        pos_slice_sorted = pos_slice.copy() # copy for in-place sort
+        pos_slice_sorted.sort(reverse=True)
+        for dir_index in pos_slice_sorted:  # remove indices in list
+            # future note: slice_shape needs to be converted to list and back
+            # to tuple once we fix
+            #   https://github.com/openPMD/openPMD-api/issues/808
+            del slice_shape[dir_index]
+
+        # mask invalid regions with zero
         data = np.zeros(slice_shape, dtype=output_type)
-        # write now in index space between intersection of slice_indices and chunk indices
+
+        # build requested ND slice with respect to full data
+        s = []
+        for d in range(len(full_shape)):
+            if d in pos_slice:
+                s.append(i_slice[pos_slice.index(d)]) # one index in such directions
+            else: # all indices in other direction
+                s.append(slice(None, None, None))
+        s = tuple(s)
+
+        # now we check which chunks contribute to the slice
         for chunk in chunks:
+            skip_this_chunk = False
+            s_valid = list(s)  # same as s but reduced to valid regions in chunk
+            s_target = []  # starts and stops in sliced array
             chunk_slice = chunk_to_slice(chunk)
-            chunk_indices = full_indices[chunk_slice]
-            intersect_indices = np.intersect1d(chunk_indices, slice_indices)
-            print(intersect_indices)
-            data[slice_indices] = record_component[intersect_indices]
-        #data = np.zeros_like(record_component)[tuple_index]  # just avoid invalid reads for now
-
-    series.flush()
+            # read only valid region
+            for d, slice_d in enumerate(s):
+                start = chunk_slice[d].start
+                stop = chunk_slice[d].stop
+                if isinstance(slice_d, int):
+                    # Nothing to do for s_target (dimension sliced out)
+                    # Nothing to do for s_valid (dimension index is set)
+                    if slice_d < start or slice_d >= stop:
+                        # chunk not in slice line/plane
+                        skip_this_chunk = True
+                else:
+                    if slice_d.start is None or slice_d.start < start:
+                        s_valid[d] = slice(start, s_valid[d].stop)
+                    if slice_d.stop is None or slice_d.stop > stop:
+                        s_valid[d] = slice(s_valid[d].start, stop)
+                    s_target.append(slice(start, stop))
+
+            s_valid = tuple(s_valid)
+            s_target = tuple(s_target)
+
+            # read
+            if not skip_this_chunk:
+                x = record_component[s_valid]
+                series.flush()
+                data[s_target] = x
 
     # Convert to the right type
     if data.dtype != output_type:
diff --git a/setup.py b/setup.py
@@ -38,12 +38,12 @@ def run_tests(self):
       tests_require=['pytest', 'jupyter'],
       install_requires=install_requires,
       extras_require = {
-        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.13.3,~=0.14.0", "wget"],
+        'all': ["ipympl", "ipywidgets", "matplotlib", "numba", "openpmd-api~=0.14.0", "wget"],
         'GUI':  ["ipywidgets", "ipympl", "matplotlib"],
         'plot': ["matplotlib"],
         'tutorials': ["ipywidgets", "ipympl", "matplotlib", "wget"],
         'numba': ["numba"],
-        'openpmd-api': ["openpmd-api~=0.13.3,~=0.14.0"]
+        'openpmd-api': ["openpmd-api~=0.14.0"]
         },
       cmdclass={'test': PyTest},
       platforms='any',