fix handling of awkward cases, tests completed

atomprobe-tc · atomprobe-tc · commit 99b166e4fe47 · 2026-02-14T01:47:17.000+01:00
diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
@@ -106,6 +106,7 @@ fairmat
 fillvalue
 flatfield
 fluence
+fourd
 fxcef
 getlink
 getroottree
@@ -170,6 +171,7 @@ nxdata
 nxdl
 nxdls
 nxentry
+oned
 optionalities
 orcid
 otherfile
@@ -193,10 +195,12 @@ showlegend
 straße
 submoduled
 superproject
+threed
 tnxdl
 tofile
 tommaso
 tracebacklimit
+twod
 underload
 uniquify
 unitless
diff --git a/src/pynxtools/dataconverter/chunk.py b/src/pynxtools/dataconverter/chunk.py
@@ -114,11 +114,6 @@ def prioritized_axes_heuristic(
     a compromise for slicing about equally all three orthogonal
     directions.
 
-    Examples:
-    * prioritized_axes_heuristic((100000, 2048, 2048), (0, 1, 2))
-    * prioritized_axes_heuristic((1000000, 3), (0, 1))
-    * prioritized_axes_heuristic((60, 60, 180), (2, 1, 0))
-
     Returns value for the chunks parameter of h5py create_dataset
     * tuple[int, ...], explicit chunk size
     * True, the fallback to h5py guess_chunk auto-chunking."""
@@ -134,14 +129,20 @@ def prioritized_axes_heuristic(
             f"chunk strategy h5py auto used for incorrect axes priority setting"
         )
         return True
+    if len(priority) != len(shape):  # need a priority for each axis
+        logger.info(
+            f"chunk strategy h5py auto used for incorrect axes priority setting"
+        )
+        return True
     if len(shape) == 0:
         raise ValueError("chunk_shape not allowed for scalar datasets.")
         # also h5py by default would raise in such a case
     chunk_shape: list[float] = list(float(extent) for extent in shape)
     max_byte_per_chunk: int = int(CHUNK_CONFIG_DEFAULT["byte_size"])
     byte_per_item: int = data.itemsize
 
-    dim = 0
+    pdx = 0
+    dim = priority[pdx]
     idx = 0
     logger.debug(
         f"chunk strategy, prioritized_axes_heuristic analyzing for shape {shape} and byte_per_item {byte_per_item} ..."
@@ -195,9 +196,10 @@ def prioritized_axes_heuristic(
         else:
             chunk_shape[dim] = (chunk_shape[dim] / 2) + 1
 
-        if dim < (len(shape) - 1):
+        if pdx < (len(shape) - 1):
             if chunk_shape[dim] < 2:
-                dim += 1
+                pdx += 1
+                dim = priority[pdx]
                 # seems we cannot reduce byte_per_chunk further by splitting
                 # along dim, so unfortunately need to consider splitting across
                 # the next, less prioritized axis
diff --git a/tests/dataconverter/test_chunk.py b/tests/dataconverter/test_chunk.py
@@ -18,29 +18,81 @@
 """Test cases chunking and compression."""
 
 import numpy as np
+import pytest
 
 from pynxtools.dataconverter.chunk import prioritized_axes_heuristic
 
 
-def test_prioritized_axes_heuristic():
-    pass
-    # array = np.zeros((8, 1024, 1024), np.float64)
-    # intentional usage
-    # assert () == prioritized_axes_heuristic(array, (0, 1, 2))
-    # awkward
-    # assert () == prioritized_axes_heuristic(array, (0, 2, 1))
-    # assert () == prioritized_axes_heuristic(array, (1, 2, 0))
-    # assert () == prioritized_axes_heuristic(array, (1, 0, 2))
-    # assert () == prioritized_axes_heuristic(array, (2, 0, 1))
-    # assert () == prioritized_axes_heuristic(array, (2, 1, 0))
-    # scalar
-    # assert prioritized_axes_heuristic(2, (0,))
-    # unlimited axis
-    # assert prioritized_axes_heuristic(???, (0,))
-    # multiples
-    # assert prioritized_axes_heuristic(array, ())
-    # assert prioritized_axes_heuristic(array, (0,))
-    # assert prioritized_axes_heuristic(array, (0, 1,))
-    # assert prioritized_axes_heuristic(array, (0, 0,))
-    # assert prioritized_axes_heuristic(array, (0, 1, 1))
-    # assert prioritized_axes_heuristic(array, (0, 1, 2, 2))
+@pytest.mark.parametrize(
+    "axes, expected",
+    [
+        ((0, 1, 2), (1, 250, 1000)),
+        ((0, 2, 1), (1, 250, 1000)),
+        ((1, 2, 0), (7, 32, 1000)),
+        ((1, 0, 2), (7, 32, 1000)),
+        ((2, 0, 1), (8, 250, 125)),
+        ((2, 1, 0), (8, 250, 125)),
+        ((), True),
+        ((0,), True),
+        (
+            (
+                0,
+                1,
+            ),
+            True,
+        ),
+        ((0, 0), True),
+        ((0, 1, 1), True),
+        ((0, 1, 2, 3), True),
+        ((0, 1, 2, 2), True),
+    ],
+    ids=[
+        "intentional-small",
+        "awkward-small",
+        "awkward-small",
+        "awkward-small",
+        "awkward-small",
+        "awkward-small",
+        "scalar",
+        "oned",
+        "twod",
+        "twod-multiples",
+        "threed-multiples",
+        "fourd",
+        "fourd-multiples",
+    ],
+)
+def test_prioritized_axes_heuristic_small(
+    axes: tuple[int, ...], expected: tuple[int, ...] | bool
+):
+    array = np.zeros((8, 250, 1000), np.float32)
+    assert prioritized_axes_heuristic(array, axes) == expected
+
+
+@pytest.mark.parametrize(
+    "axes, expected",
+    [
+        ((0, 1, 2), (1, 125, 2000)),
+        ((0, 2, 1), (1, 1000, 250)),
+        ((1, 2, 0), (128, 1, 2000)),
+        ((1, 0, 2), (128, 1, 2000)),
+        ((2, 0, 1), (87, 1000, 2)),
+        ((2, 1, 0), (87, 1000, 2)),
+    ],
+    ids=[
+        "intentional-large",
+        "awkward-large",
+        "awkward-large",
+        "awkward-large",
+        "awkward-large",
+        "awkward-large",
+    ],
+)
+def test_prioritized_axes_heuristic_large(
+    axes: tuple[int, ...], expected: tuple[int, ...]
+):
+    array = np.zeros((128, 1000, 2000), np.float32)
+    assert prioritized_axes_heuristic(array, axes) == expected
+
+
+# unlimited axis