Skip to content

Commit 99b166e

Browse files
committed
fix handling of awkward cases, tests completed
1 parent 5d82133 commit 99b166e

File tree

3 files changed

+88
-30
lines changed

3 files changed

+88
-30
lines changed

.cspell/custom-dictionary.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ fairmat
106106
fillvalue
107107
flatfield
108108
fluence
109+
fourd
109110
fxcef
110111
getlink
111112
getroottree
@@ -170,6 +171,7 @@ nxdata
170171
nxdl
171172
nxdls
172173
nxentry
174+
oned
173175
optionalities
174176
orcid
175177
otherfile
@@ -193,10 +195,12 @@ showlegend
193195
straße
194196
submoduled
195197
superproject
198+
threed
196199
tnxdl
197200
tofile
198201
tommaso
199202
tracebacklimit
203+
twod
200204
underload
201205
uniquify
202206
unitless

src/pynxtools/dataconverter/chunk.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,6 @@ def prioritized_axes_heuristic(
114114
a compromise for slicing about equally all three orthogonal
115115
directions.
116116
117-
Examples:
118-
* prioritized_axes_heuristic((100000, 2048, 2048), (0, 1, 2))
119-
* prioritized_axes_heuristic((1000000, 3), (0, 1))
120-
* prioritized_axes_heuristic((60, 60, 180), (2, 1, 0))
121-
122117
Returns value for the chunks parameter of h5py create_dataset
123118
* tuple[int, ...], explicit chunk size
124119
* True, the fallback to h5py guess_chunk auto-chunking."""
@@ -134,14 +129,20 @@ def prioritized_axes_heuristic(
134129
f"chunk strategy h5py auto used for incorrect axes priority setting"
135130
)
136131
return True
132+
if len(priority) != len(shape): # need a priority for each axis
133+
logger.info(
134+
f"chunk strategy h5py auto used for incorrect axes priority setting"
135+
)
136+
return True
137137
if len(shape) == 0:
138138
raise ValueError("chunk_shape not allowed for scalar datasets.")
139139
# also h5py by default would raise in such a case
140140
chunk_shape: list[float] = list(float(extent) for extent in shape)
141141
max_byte_per_chunk: int = int(CHUNK_CONFIG_DEFAULT["byte_size"])
142142
byte_per_item: int = data.itemsize
143143

144-
dim = 0
144+
pdx = 0
145+
dim = priority[pdx]
145146
idx = 0
146147
logger.debug(
147148
f"chunk strategy, prioritized_axes_heuristic analyzing for shape {shape} and byte_per_item {byte_per_item} ..."
@@ -195,9 +196,10 @@ def prioritized_axes_heuristic(
195196
else:
196197
chunk_shape[dim] = (chunk_shape[dim] / 2) + 1
197198

198-
if dim < (len(shape) - 1):
199+
if pdx < (len(shape) - 1):
199200
if chunk_shape[dim] < 2:
200-
dim += 1
201+
pdx += 1
202+
dim = priority[pdx]
201203
# seems we cannot reduce byte_per_chunk further by splitting
202204
# along dim, so unfortunately need to consider splitting across
203205
# the next, less prioritized axis

tests/dataconverter/test_chunk.py

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,81 @@
1818
"""Test cases chunking and compression."""
1919

2020
import numpy as np
21+
import pytest
2122

2223
from pynxtools.dataconverter.chunk import prioritized_axes_heuristic
2324

2425

25-
def test_prioritized_axes_heuristic():
26-
pass
27-
# array = np.zeros((8, 1024, 1024), np.float64)
28-
# intentional usage
29-
# assert () == prioritized_axes_heuristic(array, (0, 1, 2))
30-
# awkward
31-
# assert () == prioritized_axes_heuristic(array, (0, 2, 1))
32-
# assert () == prioritized_axes_heuristic(array, (1, 2, 0))
33-
# assert () == prioritized_axes_heuristic(array, (1, 0, 2))
34-
# assert () == prioritized_axes_heuristic(array, (2, 0, 1))
35-
# assert () == prioritized_axes_heuristic(array, (2, 1, 0))
36-
# scalar
37-
# assert prioritized_axes_heuristic(2, (0,))
38-
# unlimited axis
39-
# assert prioritized_axes_heuristic(???, (0,))
40-
# multiples
41-
# assert prioritized_axes_heuristic(array, ())
42-
# assert prioritized_axes_heuristic(array, (0,))
43-
# assert prioritized_axes_heuristic(array, (0, 1,))
44-
# assert prioritized_axes_heuristic(array, (0, 0,))
45-
# assert prioritized_axes_heuristic(array, (0, 1, 1))
46-
# assert prioritized_axes_heuristic(array, (0, 1, 2, 2))
26+
@pytest.mark.parametrize(
27+
"axes, expected",
28+
[
29+
((0, 1, 2), (1, 250, 1000)),
30+
((0, 2, 1), (1, 250, 1000)),
31+
((1, 2, 0), (7, 32, 1000)),
32+
((1, 0, 2), (7, 32, 1000)),
33+
((2, 0, 1), (8, 250, 125)),
34+
((2, 1, 0), (8, 250, 125)),
35+
((), True),
36+
((0,), True),
37+
(
38+
(
39+
0,
40+
1,
41+
),
42+
True,
43+
),
44+
((0, 0), True),
45+
((0, 1, 1), True),
46+
((0, 1, 2, 3), True),
47+
((0, 1, 2, 2), True),
48+
],
49+
ids=[
50+
"intentional-small",
51+
"awkward-small",
52+
"awkward-small",
53+
"awkward-small",
54+
"awkward-small",
55+
"awkward-small",
56+
"scalar",
57+
"oned",
58+
"twod",
59+
"twod-multiples",
60+
"threed-multiples",
61+
"fourd",
62+
"fourd-multiples",
63+
],
64+
)
65+
def test_prioritized_axes_heuristic_small(
66+
axes: tuple[int, ...], expected: tuple[int, ...] | bool
67+
):
68+
array = np.zeros((8, 250, 1000), np.float32)
69+
assert prioritized_axes_heuristic(array, axes) == expected
70+
71+
72+
@pytest.mark.parametrize(
73+
"axes, expected",
74+
[
75+
((0, 1, 2), (1, 125, 2000)),
76+
((0, 2, 1), (1, 1000, 250)),
77+
((1, 2, 0), (128, 1, 2000)),
78+
((1, 0, 2), (128, 1, 2000)),
79+
((2, 0, 1), (87, 1000, 2)),
80+
((2, 1, 0), (87, 1000, 2)),
81+
],
82+
ids=[
83+
"intentional-large",
84+
"awkward-large",
85+
"awkward-large",
86+
"awkward-large",
87+
"awkward-large",
88+
"awkward-large",
89+
],
90+
)
91+
def test_prioritized_axes_heuristic_large(
92+
axes: tuple[int, ...], expected: tuple[int, ...]
93+
):
94+
array = np.zeros((128, 1000, 2000), np.float32)
95+
assert prioritized_axes_heuristic(array, axes) == expected
96+
97+
98+
# unlimited axis

0 commit comments

Comments
 (0)