Skip to content

Commit 88543e6

Browse files
committed
zarr-python tests for datatypes
1 parent d37c355 commit 88543e6

File tree

6 files changed

+210
-84
lines changed

6 files changed

+210
-84
lines changed

src/test/java/dev/zarr/zarrjava/ZarrPythonTests.java

Lines changed: 180 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.junit.jupiter.api.Test;
1515
import org.junit.jupiter.params.ParameterizedTest;
1616
import org.junit.jupiter.params.provider.CsvSource;
17+
import org.junit.jupiter.params.provider.MethodSource;
1718

1819
import java.io.*;
1920
import java.nio.ByteBuffer;
@@ -24,6 +25,7 @@
2425
import java.util.Map;
2526
import java.util.stream.Stream;
2627

28+
2729
public class ZarrPythonTests extends ZarrTest {
2830

2931
final static Path PYTHON_TEST_PATH = Paths.get("src/test/python-scripts/");
@@ -69,56 +71,151 @@ public void run_python_script(String scriptName, String... args) throws IOExcept
6971
assert exitCode == 0;
7072
}
7173

74+
static ucar.ma2.Array testdata(dev.zarr.zarrjava.core.DataType dt){
75+
ucar.ma2.DataType ma2Type = dt.getMA2DataType();
76+
ucar.ma2.Array array = ucar.ma2.Array.factory(ma2Type, new int[]{16, 16, 16});
77+
for (int i = 0; i < array.getSize(); i++) {
78+
switch (ma2Type) {
79+
case BOOLEAN:
80+
array.setBoolean(i, i%2 == 0);
81+
break;
82+
case BYTE:
83+
case UBYTE:
84+
array.setByte(i, (byte) i);
85+
break;
86+
case SHORT:
87+
case USHORT:
88+
array.setShort(i, (short) i);
89+
break;
90+
case INT:
91+
array.setInt(i, i);
92+
break;
93+
case UINT:
94+
array.setLong(i, i & 0xFFFFFFFFL);
95+
break;
96+
case LONG:
97+
case ULONG:
98+
array.setLong(i, (long) i);
99+
break;
100+
case FLOAT:
101+
array.setFloat(i, (float) i);
102+
break;
103+
case DOUBLE:
104+
array.setDouble(i, (double) i);
105+
break;
106+
default:
107+
throw new IllegalArgumentException("Invalid DataType: " + dt);
108+
}
109+
}
110+
return array;
111+
}
112+
113+
static void assertIsTestdata(ucar.ma2.Array result, dev.zarr.zarrjava.core.DataType dt) {
114+
// expected values are i for index i
115+
ucar.ma2.DataType ma2Type = dt.getMA2DataType();
116+
for (int i = 0; i < result.getSize(); i++) {
117+
switch (ma2Type) {
118+
case BOOLEAN:
119+
Assertions.assertEquals(i % 2 == 0, result.getBoolean(i));
120+
break;
121+
case BYTE:
122+
case UBYTE:
123+
Assertions.assertEquals((byte) i, result.getByte(i));
124+
break;
125+
case SHORT:
126+
case USHORT:
127+
Assertions.assertEquals((short) i, result.getShort(i));
128+
break;
129+
case INT:
130+
Assertions.assertEquals(i, result.getInt(i));
131+
break;
132+
case UINT:
133+
Assertions.assertEquals(i & 0xFFFFFFFFL, result.getLong(i));
134+
break;
135+
case LONG:
136+
case ULONG:
137+
Assertions.assertEquals((long) i, result.getLong(i));
138+
break;
139+
case FLOAT:
140+
Assertions.assertEquals((float) i, result.getFloat(i), 1e-6);
141+
break;
142+
case DOUBLE:
143+
Assertions.assertEquals((double) i, result.getDouble(i), 1e-12);
144+
break;
145+
default:
146+
throw new IllegalArgumentException("Invalid DataType: " + dt);
147+
}
148+
}
149+
}
150+
151+
static Stream<Object[]> compressorAndDataTypeProviderV3() {
152+
Stream<Object[]> datatypeTests = Stream.of(
153+
// DataType.BOOL,
154+
// DataType.INT8,
155+
// DataType.UINT8, // -> BUG: see https://github.com/zarr-developers/zarr-java/issues/27
156+
DataType.INT16,
157+
DataType.UINT16,
158+
DataType.INT32,
159+
DataType.UINT32,
160+
DataType.INT64,
161+
DataType.UINT64,
162+
DataType.FLOAT32,
163+
DataType.FLOAT64
164+
).flatMap(dt -> Stream.of(
165+
new Object[]{"sharding", "end", dt},
166+
new Object[]{"blosc", "blosclz_shuffle_3", dt}
167+
));
168+
169+
Stream<Object[]> codecsTests = Stream.of(
170+
new Object[]{"blosc", "blosclz_noshuffle_0", DataType.INT32},
171+
new Object[]{"blosc", "lz4_shuffle_6", DataType.INT32},
172+
new Object[]{"blosc", "lz4hc_bitshuffle_3", DataType.INT32},
173+
new Object[]{"blosc", "zlib_shuffle_5", DataType.INT32},
174+
new Object[]{"blosc", "zstd_bitshuffle_9", DataType.INT32},
175+
new Object[]{"gzip", "0", DataType.INT32},
176+
new Object[]{"gzip", "5", DataType.INT32},
177+
new Object[]{"zstd", "0_true", DataType.INT32},
178+
new Object[]{"zstd", "5_true", DataType.INT32},
179+
new Object[]{"zstd", "0_false", DataType.INT32},
180+
new Object[]{"zstd", "5_false", DataType.INT32},
181+
new Object[]{"bytes", "BIG", DataType.INT32},
182+
new Object[]{"bytes", "LITTLE", DataType.INT32},
183+
new Object[]{"transpose", "_", DataType.INT32},
184+
new Object[]{"sharding", "start", DataType.INT32},
185+
new Object[]{"sharding_nested", "_", DataType.INT32},
186+
new Object[]{"crc32c", "_", DataType.INT32}
187+
);
188+
189+
return Stream.concat(datatypeTests, codecsTests);
190+
}
191+
192+
72193
@ParameterizedTest
73-
@CsvSource({
74-
"blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9",
75-
"gzip,0", "gzip,5",
76-
"zstd,0_true", "zstd,5_true", "zstd,0_false", "zstd,5_false",
77-
"bytes,BIG", "bytes,LITTLE",
78-
"transpose,_",
79-
"sharding,start", "sharding,end",
80-
"sharding_nested,_",
81-
"crc32c,_",
82-
})
83-
public void testReadFromZarrPythonV3(String codec, String codecParam) throws IOException, ZarrException, InterruptedException {
84-
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("read_from_zarr_python", codec, codecParam);
85-
run_python_script("zarr_python_write.py", codec, codecParam, storeHandle.toPath().toString());
194+
@MethodSource("compressorAndDataTypeProviderV3")
195+
public void testReadV3(String codec, String codecParam, DataType dataType) throws IOException, ZarrException, InterruptedException {
196+
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("testReadV3", codec, codecParam, dataType.name());
197+
run_python_script("zarr_python_write.py", codec, codecParam, dataType.name().toLowerCase(), storeHandle.toPath().toString());
86198
Array array = Array.open(storeHandle);
87199
ucar.ma2.Array result = array.read();
88200

89-
//for expected values see zarr_python_write.py
90201
Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape());
91-
Assertions.assertEquals(DataType.INT32, array.metadata.dataType);
202+
Assertions.assertEquals(dataType, array.metadata.dataType);
92203
Assertions.assertArrayEquals(new int[]{2, 4, 8}, array.metadata.chunkShape());
93204
Assertions.assertEquals(42, array.metadata.attributes.get("answer"));
94205

95-
int[] expectedData = new int[16 * 16 * 16];
96-
Arrays.setAll(expectedData, p -> p);
97-
Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT));
206+
assertIsTestdata(result, DataType.INT32);
98207
}
99208

100209
@ParameterizedTest
101-
@CsvSource({
102-
"blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9",
103-
"gzip,0", "gzip,5",
104-
"zstd,0_true", "zstd,5_true", "zstd,0_false", "zstd,5_false",
105-
"bytes,BIG", "bytes,LITTLE",
106-
"transpose,_",
107-
"sharding,start", "sharding,end",
108-
"sharding_nested,_",
109-
"crc32c,_",
110-
})
111-
public void testWriteReadWithZarrPythonV3(String codec, String codecParam) throws Exception {
112-
int[] testData = new int[16 * 16 * 16];
113-
Arrays.setAll(testData, p -> p);
114-
210+
@MethodSource("compressorAndDataTypeProviderV3")
211+
public void testWriteV3(String codec, String codecParam, DataType dataType) throws Exception {
115212
Map<String, Object> attributes = new HashMap<>();
116213
attributes.put("test_key", "test_value");
117-
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarr_python", codec, codecParam);
214+
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("testWriteV3", codec, codecParam, dataType.name());
118215

119216
ArrayMetadataBuilder builder = Array.metadataBuilder()
120217
.withShape(16, 16, 16)
121-
.withDataType(DataType.UINT32)
218+
.withDataType(dataType)
122219
.withChunkShape(2, 4, 8)
123220
.withFillValue(0)
124221
.withAttributes(attributes);
@@ -158,64 +255,81 @@ public void testWriteReadWithZarrPythonV3(String codec, String codecParam) throw
158255
}
159256

160257
Array writeArray = Array.create(storeHandle, builder.build());
161-
writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData));
258+
writeArray.write(testdata(dataType));
162259

163260
//read in zarr-java
164261
Array readArray = Array.open(storeHandle);
165262
ucar.ma2.Array result = readArray.read();
166263

167264
Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape());
168-
Assertions.assertEquals(DataType.UINT32, readArray.metadata.dataType);
265+
Assertions.assertEquals(dataType, readArray.metadata.dataType);
169266
Assertions.assertArrayEquals(new int[]{2, 4, 8}, readArray.metadata.chunkShape());
170267
Assertions.assertEquals("test_value", readArray.metadata.attributes.get("test_key"));
171268

172-
Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT));
269+
assertIsTestdata(result, DataType.INT32);
173270

174271
//read in zarr_python
175-
run_python_script("zarr_python_read.py", codec, codecParam, storeHandle.toPath().toString());
272+
run_python_script("zarr_python_read.py", codec, codecParam, dataType.name().toLowerCase(), storeHandle.toPath().toString());
176273
}
177274

178275

276+
static Stream<Object[]> compressorAndDataTypeProviderV2() {
277+
Stream<Object[]> datatypeTests = Stream.of(
278+
dev.zarr.zarrjava.v2.DataType.BOOL,
279+
dev.zarr.zarrjava.v2.DataType.INT8,
280+
dev.zarr.zarrjava.v2.DataType.UINT8,
281+
dev.zarr.zarrjava.v2.DataType.INT16,
282+
dev.zarr.zarrjava.v2.DataType.UINT16,
283+
dev.zarr.zarrjava.v2.DataType.INT32,
284+
dev.zarr.zarrjava.v2.DataType.UINT32,
285+
dev.zarr.zarrjava.v2.DataType.INT64,
286+
dev.zarr.zarrjava.v2.DataType.UINT64,
287+
dev.zarr.zarrjava.v2.DataType.FLOAT32,
288+
dev.zarr.zarrjava.v2.DataType.FLOAT64
289+
).flatMap(dt -> Stream.of(
290+
new Object[]{"zlib", "0", dt},
291+
new Object[]{"blosc", "blosclz_shuffle_3", dt}
292+
));
293+
294+
Stream <Object[]> bloscTests = Stream.of(
295+
new Object[]{"blosc", "blosclz_noshuffle_0", dev.zarr.zarrjava.v2.DataType.INT32},
296+
new Object[]{"blosc", "lz4_shuffle_6", dev.zarr.zarrjava.v2.DataType.INT32},
297+
new Object[]{"blosc", "lz4hc_bitshuffle_3", dev.zarr.zarrjava.v2.DataType.INT32},
298+
new Object[]{"blosc", "zlib_shuffle_5", dev.zarr.zarrjava.v2.DataType.INT32},
299+
new Object[]{"blosc", "zstd_bitshuffle_9", dev.zarr.zarrjava.v2.DataType.INT32}
300+
);
301+
302+
return Stream.concat(datatypeTests, bloscTests);
303+
}
304+
179305
@ParameterizedTest
180-
@CsvSource({
181-
"zlib,0", "zlib,5",
182-
"blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9",
183-
})
184-
public void testReadFromZarrPythonV2(String compressor, String compressorParam) throws IOException, ZarrException, InterruptedException {
185-
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("read_from_zarr_python_v2", compressor, compressorParam);
186-
run_python_script("zarr_python_write_v2.py", compressor, compressorParam, storeHandle.toPath().toString());
306+
@MethodSource("compressorAndDataTypeProviderV2")
307+
public void testReadV2(String compressor, String compressorParam, dev.zarr.zarrjava.v2.DataType dt) throws IOException, ZarrException, InterruptedException {
308+
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("testReadV2", compressor, compressorParam, dt.name());
309+
run_python_script("zarr_python_write_v2.py", compressor, compressorParam, dt.name().toLowerCase(), storeHandle.toPath().toString());
187310

188311
dev.zarr.zarrjava.v2.Array array = dev.zarr.zarrjava.v2.Array.open(storeHandle);
189312
ucar.ma2.Array result = array.read();
190313

191-
//for expected values see zarr_python_write.py
192314
Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape());
193-
Assertions.assertEquals(dev.zarr.zarrjava.v2.DataType.INT32, array.metadata.dataType);
315+
Assertions.assertEquals(dt, array.metadata.dataType);
194316
Assertions.assertArrayEquals(new int[]{2, 4, 8}, array.metadata.chunkShape());
195317
// Assertions.assertEquals(42, array.metadata.attributes.get("answer"));
196318

197-
int[] expectedData = new int[16 * 16 * 16];
198-
Arrays.setAll(expectedData, p -> p);
199-
Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT));
319+
assertIsTestdata(result, dt);
200320
}
201321

202322

203323
@ParameterizedTest
204-
@CsvSource({
205-
"zlib,0", "zlib,5",
206-
"blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9",
207-
})
208-
public void testWriteReadWithZarrPythonV2(String compressor, String compressorParam) throws Exception {
209-
int[] testData = new int[16 * 16 * 16];
210-
Arrays.setAll(testData, p -> p);
211-
324+
@MethodSource("compressorAndDataTypeProviderV2")
325+
public void testWriteV2(String compressor, String compressorParam, dev.zarr.zarrjava.v2.DataType dt) throws Exception {
212326
// Map<String, Object> attributes = new HashMap<>();
213327
// attributes.put("test_key", "test_value");
214-
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarr_python_v2", compressor, compressorParam);
328+
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("testCodecsWriteV2", compressor, compressorParam, dt.name());
215329

216330
dev.zarr.zarrjava.v2.ArrayMetadataBuilder builder = dev.zarr.zarrjava.v2.Array.metadataBuilder()
217331
.withShape(16, 16, 16)
218-
.withDataType(dev.zarr.zarrjava.v2.DataType.UINT32)
332+
.withDataType(dt)
219333
.withChunks(2, 4, 8)
220334
// .withAttributes(attributes)
221335
.withFillValue(0);
@@ -235,21 +349,20 @@ public void testWriteReadWithZarrPythonV2(String compressor, String compressorPa
235349
}
236350

237351
dev.zarr.zarrjava.v2.Array writeArray = dev.zarr.zarrjava.v2.Array.create(storeHandle, builder.build());
238-
writeArray.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{16, 16, 16}, testData));
352+
writeArray.write(testdata(dt));
239353

240354
//read in zarr-java
241355
dev.zarr.zarrjava.v2.Array readArray = dev.zarr.zarrjava.v2.Array.open(storeHandle);
242356
ucar.ma2.Array result = readArray.read();
243357

244358
Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape());
245-
Assertions.assertEquals(dev.zarr.zarrjava.v2.DataType.UINT32, readArray.metadata.dataType);
359+
Assertions.assertEquals(dt, readArray.metadata.dataType);
246360
Assertions.assertArrayEquals(new int[]{2, 4, 8}, readArray.metadata.chunkShape());
247361
// Assertions.assertEquals("test_value", readArray.metadata.attributes.get("test_key"));
248-
249-
Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT));
362+
assertIsTestdata(result, dt);
250363

251364
//read in zarr_python
252-
run_python_script("zarr_python_read_v2.py", compressor, compressorParam, storeHandle.toPath().toString());
365+
run_python_script("zarr_python_read_v2.py", compressor, compressorParam, dt.name().toLowerCase(), storeHandle.toPath().toString());
253366
}
254367

255368
@CsvSource({"0,true", "0,false", "5, true", "10, false"})
@@ -288,9 +401,6 @@ public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException
288401

289402
@Test
290403
public void testGroupReadWriteV2() throws Exception {
291-
int[] testData = new int[16 * 16 * 16];
292-
Arrays.setAll(testData, p -> p);
293-
294404
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("group_write");
295405
StoreHandle storeHandle2 = new FilesystemStore(TESTOUTPUT).resolve("group_read");
296406
Group group = Group.create(storeHandle);
@@ -301,7 +411,7 @@ public void testGroupReadWriteV2() throws Exception {
301411
.withChunks(2, 4, 8)
302412
);
303413

304-
array.write(ucar.ma2.Array.factory(dataType.getMA2DataType(), new int[]{16, 16, 16}, testData));
414+
array.write(testdata(dataType));
305415

306416
run_python_script("zarr_python_group_v2.py", storeHandle.toPath().toString(), storeHandle2.toPath().toString());
307417

@@ -312,7 +422,6 @@ public void testGroupReadWriteV2() throws Exception {
312422
Assertions.assertNotNull(array2);
313423
ucar.ma2.Array result = array2.read();
314424
Assertions.assertArrayEquals(new int[]{16, 16, 16}, result.getShape());
315-
Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT));
425+
assertIsTestdata(result, dataType);
316426
}
317-
318427
}

src/test/python-scripts/parse_codecs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def parse_codecs_zarr_python(codec_string: str, param_string: str, zarr_version:
1515

1616
if codec_string == "blosc" and zarr_version == 3:
1717
cname, shuffle, clevel = param_string.split("_")
18-
compressor = BloscCodec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel))
18+
compressor = BloscCodec(cname=cname, shuffle=shuffle, clevel=int(clevel))
1919
elif codec_string == "blosc" and zarr_version == 2:
2020
cname, shuffle, clevel = param_string.split("_")
2121
if shuffle == "noshuffle":

src/test/python-scripts/zarr_python_read.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@
1010
codec_string = sys.argv[1]
1111
param_string = sys.argv[2]
1212
compressor, serializer, filters = parse_codecs_zarr_python(codec_string, param_string)
13-
store_path = Path(sys.argv[3])
13+
dtype = sys.argv[3]
14+
store_path = Path(sys.argv[4])
1415

15-
expected_data = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16)
16+
if dtype == 'bool':
17+
expected_data = np.arange(16 * 16 * 16, dtype='uint8').reshape(16, 16, 16) % 2 == 0
18+
else:
19+
expected_data = np.arange(16 * 16 * 16, dtype=dtype).reshape(16, 16, 16)
1620

1721
a = zarr.open_array(store=LocalStore(store_path))
1822
read_data = a[:, :]
@@ -22,7 +26,7 @@
2226
LocalStore(store_path / "expected"),
2327
shape=(16, 16, 16),
2428
chunks=(2, 4, 8),
25-
dtype="uint32",
29+
dtype=dtype,
2630
fill_value=0,
2731
filters=filters,
2832
serializer=serializer,

0 commit comments

Comments
 (0)