Skip to content

Commit db47786

Browse files
committed
add more variation to codec-configuration in tests with zarrita
1 parent fbb2b67 commit db47786

File tree

3 files changed

+85
-47
lines changed

3 files changed

+85
-47
lines changed

src/test/java/dev/zarr/zarrjava/ZarrTest.java

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import dev.zarr.zarrjava.v3.*;
1515
import dev.zarr.zarrjava.v3.codec.CodecBuilder;
1616
import dev.zarr.zarrjava.v3.codec.core.TransposeCodec;
17+
import jdk.jshell.spi.ExecutionControl;
1718
import org.junit.jupiter.api.Assertions;
1819
import org.junit.jupiter.api.BeforeAll;
1920
import org.junit.jupiter.api.Test;
@@ -32,7 +33,7 @@
3233
import java.util.Map;
3334
import java.util.stream.Stream;
3435

35-
import static org.junit.Assert.*;
36+
import static org.junit.Assert.assertThrows;
3637

3738
public class ZarrTest {
3839

@@ -58,11 +59,18 @@ public static void clearTestoutputFolder() throws IOException {
5859
}
5960

6061
@ParameterizedTest
61-
@ValueSource(strings = {"blosc", "gzip", "zstd", "bytes", "transpose", "sharding_start", "sharding_end", "crc32c"})
62-
public void testReadFromZarrita(String codec) throws IOException, ZarrException, InterruptedException {
63-
62+
@CsvSource({
63+
"blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9",
64+
"gzip,0", "gzip,5",
65+
"zstd,0_true", "zstd,5_true","zstd,0_false", "zstd,5_false",
66+
"bytes,BIG", "bytes,LITTLE",
67+
"transpose,_",
68+
"sharding,start", "sharding,end",
69+
"sharding_nested,_",
70+
"crc32c,_",
71+
}) public void testReadFromZarrita(String codec, String codecParam) throws IOException, ZarrException, InterruptedException {
6472
String command = pythonPath();
65-
ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_write.py").toString(), codec, TESTOUTPUT.toString());
73+
ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_write.py").toString(), codec, codecParam, TESTOUTPUT.toString());
6674
Process process = pb.start();
6775

6876
BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
@@ -79,7 +87,7 @@ public void testReadFromZarrita(String codec) throws IOException, ZarrException,
7987
int exitCode = process.waitFor();
8088
assert exitCode == 0;
8189

82-
Array array = Array.open(new FilesystemStore(TESTOUTPUT).resolve("read_from_zarrita", codec));
90+
Array array = Array.open(new FilesystemStore(TESTOUTPUT).resolve("read_from_zarrita", codec, codecParam));
8391
ucar.ma2.Array result = array.read();
8492

8593
//for expected values see zarrita_write.py
@@ -128,12 +136,21 @@ public void testZstdLibrary(int clevel, boolean checksumFlag) throws IOException
128136
}
129137

130138
@ParameterizedTest
131-
@ValueSource(strings = {"blosc", "gzip", "zstd", "bytes", "transpose", "sharding_start", "sharding_end", "crc32c"})
132-
public void testWriteRead(String codec) throws IOException, ZarrException, InterruptedException {
139+
@CsvSource({
140+
"blosc,blosclz_noshuffle_0", "blosc,lz4_shuffle_6", "blosc,lz4hc_bitshuffle_3", "blosc,zlib_shuffle_5", "blosc,zstd_bitshuffle_9",
141+
"gzip,0", "gzip,5",
142+
"zstd,0_true", "zstd,5_true","zstd,0_false", "zstd,5_false",
143+
"bytes,BIG", "bytes,LITTLE",
144+
"transpose,_",
145+
"sharding,start", "sharding,end",
146+
"sharding_nested,_",
147+
"crc32c,_",
148+
})
149+
public void testWriteReadWithZarrita(String codec, String codecParam) throws Exception {
133150
int[] testData = new int[16 * 16 * 16];
134151
Arrays.setAll(testData, p -> p);
135152

136-
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarrita", codec);
153+
StoreHandle storeHandle = new FilesystemStore(TESTOUTPUT).resolve("write_to_zarrita", codec, codecParam);
137154
ArrayMetadataBuilder builder = Array.metadataBuilder()
138155
.withShape(16, 16, 16)
139156
.withDataType(DataType.UINT32)
@@ -143,25 +160,30 @@ public void testWriteRead(String codec) throws IOException, ZarrException, Inter
143160

144161
switch (codec) {
145162
case "blosc":
146-
builder = builder.withCodecs(CodecBuilder::withBlosc);
163+
String cname = codecParam.split("_")[0];
164+
String shuffle = codecParam.split("_")[1];
165+
int clevel_blosc = Integer.parseInt(codecParam.split("_")[2]);
166+
builder = builder.withCodecs(c -> c.withBlosc(cname, shuffle, clevel_blosc));
147167
break;
148168
case "gzip":
149-
builder = builder.withCodecs(CodecBuilder::withGzip);
169+
builder = builder.withCodecs(c -> c.withGzip(Integer.parseInt(codecParam)));
150170
break;
151171
case "zstd":
152-
builder = builder.withCodecs(c -> c.withZstd(0));
172+
int clevel_zstd = Integer.parseInt(codecParam.split("_")[0]);
173+
boolean checksum = Boolean.parseBoolean(codecParam.split("_")[1]);
174+
builder = builder.withCodecs(c -> c.withZstd(clevel_zstd, checksum));
153175
break;
154176
case "bytes":
155-
builder = builder.withCodecs(c -> c.withBytes("LITTLE"));
177+
builder = builder.withCodecs(c -> c.withBytes(codecParam));
156178
break;
157179
case "transpose":
158180
builder = builder.withCodecs(c -> c.withTranspose(new int[]{1, 0, 2}));
159181
break;
160-
case "sharding_start":
161-
builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withBytes("LITTLE"), "start"));
182+
case "sharding":
183+
builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withBytes("LITTLE"), codecParam));
162184
break;
163-
case "sharding_end":
164-
builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withBytes("LITTLE"), "end"));
185+
case "sharding_nested":
186+
builder = builder.withCodecs(c -> c.withSharding(new int[]{2, 2, 4}, c1 -> c1.withSharding(new int[]{2, 1, 2}, c2 -> c2.withBytes("LITTLE"))));
165187
break;
166188
case "crc32c":
167189
builder = builder.withCodecs(CodecBuilder::withCrc32c);
@@ -187,7 +209,7 @@ public void testWriteRead(String codec) throws IOException, ZarrException, Inter
187209
//read in zarrita
188210
String command = pythonPath();
189211

190-
ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_read.py").toString(), codec, TESTOUTPUT.toString());
212+
ProcessBuilder pb = new ProcessBuilder(command, PYTHON_TEST_PATH.resolve("zarrita_read.py").toString(), codec, codecParam, TESTOUTPUT.toString());
191213
Process process = pb.start();
192214

193215
BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
@@ -225,7 +247,6 @@ public void testZstdCodecReadWrite(int clevel, boolean checksum) throws ZarrExce
225247
ucar.ma2.Array result = readArray.read();
226248

227249
Assertions.assertArrayEquals(testData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.INT));
228-
229250
}
230251

231252
@Test
@@ -417,7 +438,7 @@ public void testV3Group() throws IOException, ZarrException {
417438
}
418439

419440
@Test
420-
public void testV2() throws IOException{
441+
public void testV2() throws IOException {
421442
FilesystemStore fsStore = new FilesystemStore("");
422443
HttpStore httpStore = new HttpStore("https://static.webknossos.org/data");
423444

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,53 @@
1-
import zarrita
2-
import numpy as np
31
import sys
42

3+
import numpy as np
4+
import zarrita
5+
from zarrita.metadata import ShardingCodecIndexLocation
56

67
codec_string = sys.argv[1]
8+
param_string = sys.argv[2]
79
codec = []
810
if codec_string == "blosc":
9-
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.blosc_codec(typesize=4)]
11+
cname, shuffle, clevel = param_string.split("_")
12+
codec = [zarrita.codecs.bytes_codec(),
13+
zarrita.codecs.blosc_codec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel))]
1014
elif codec_string == "gzip":
11-
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec()]
15+
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec(level=int(param_string))]
1216
elif codec_string == "zstd":
13-
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=True)]
17+
level, checksum = param_string.split("_")
18+
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=checksum == 'true', level=int(level))]
1419
elif codec_string == "bytes":
15-
codec = [zarrita.codecs.bytes_codec()]
20+
codec = [zarrita.codecs.bytes_codec(endian=param_string.lower())]
1621
elif codec_string == "transpose":
1722
codec = [zarrita.codecs.transpose_codec((1, 0, 2)), zarrita.codecs.bytes_codec()]
18-
elif codec_string == "sharding_start":
19-
codec= zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), codecs=[zarrita.codecs.bytes_codec("little")], index_location= zarrita.metadata.ShardingCodecIndexLocation.start),
20-
elif codec_string == "sharding_end":
21-
codec= zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), codecs=[zarrita.codecs.bytes_codec("little")], index_location= zarrita.metadata.ShardingCodecIndexLocation.end),
23+
elif codec_string == "sharding":
24+
codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4), codecs=[zarrita.codecs.bytes_codec("little")],
25+
index_location=ShardingCodecIndexLocation.start if param_string == "start"
26+
else ShardingCodecIndexLocation.end),
27+
elif codec_string == "sharding_nested":
28+
codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 2, 4),
29+
codecs=[zarrita.codecs.sharding_codec(chunk_shape=(2, 1, 2), codecs=[
30+
zarrita.codecs.bytes_codec("little")])]),
2231
elif codec_string == "crc32c":
2332
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.crc32c_codec()]
2433
else:
2534
raise ValueError(f"Invalid {codec=}")
2635

36+
store = zarrita.LocalStore(sys.argv[3])
37+
expected_data = np.arange(16 * 16 * 16, dtype='int32').reshape(16, 16, 16)
2738

28-
store = zarrita.LocalStore(sys.argv[2])
29-
expected_data = np.arange(16*16*16, dtype='int32').reshape(16, 16, 16)
30-
31-
a = zarrita.Array.open(store / 'write_to_zarrita' / codec_string)
39+
a = zarrita.Array.open(store / 'write_to_zarrita' / codec_string / param_string)
3240
read_data = a[:, :]
3341
assert np.array_equal(read_data, expected_data), f"got:\n {read_data} \nbut expected:\n {expected_data}"
3442

3543
b = zarrita.Array.create(
36-
store / 'read_from_zarrita_expected' / codec_string,
44+
store / 'read_from_zarrita_expected' / codec_string / param_string,
3745
shape=(16, 16, 16),
3846
chunk_shape=(2, 4, 8),
3947
dtype="uint32",
4048
fill_value=0,
4149
attributes={'test_key': 'test_value'},
4250
codecs=codec
43-
)
51+
)
4452

45-
assert a.metadata == b.metadata, f"not equal: \n{a.metadata=}\n{b.metadata=}"
53+
assert a.metadata == b.metadata, f"not equal: \n{a.metadata=}\n{b.metadata=}"

src/test/python-scripts/zarrita_write.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,42 @@
22

33
import zarrita
44
import numpy as np
5+
from zarrita.metadata import ShardingCodecIndexLocation
56

67
codec_string = sys.argv[1]
8+
param_string = sys.argv[2]
79
codec = []
810
if codec_string == "blosc":
9-
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.blosc_codec(typesize=4)]
11+
cname, shuffle, clevel = param_string.split("_")
12+
codec = [zarrita.codecs.bytes_codec(),
13+
zarrita.codecs.blosc_codec(typesize=4, cname=cname, shuffle=shuffle, clevel=int(clevel))]
1014
elif codec_string == "gzip":
11-
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec()]
15+
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.gzip_codec(level=int(param_string))]
1216
elif codec_string == "zstd":
13-
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec()]
17+
level, checksum = param_string.split("_")
18+
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.zstd_codec(checksum=checksum == 'true', level=int(level))]
1419
elif codec_string == "bytes":
15-
codec = [zarrita.codecs.bytes_codec()]
20+
codec = [zarrita.codecs.bytes_codec(endian=param_string.lower())]
1621
elif codec_string == "transpose":
1722
codec = [zarrita.codecs.transpose_codec((0, 1)), zarrita.codecs.bytes_codec()]
18-
elif codec_string == "sharding_start":
19-
codec = [zarrita.codecs.sharding_codec(chunk_shape=(1, 2), codecs=[zarrita.codecs.bytes_codec()], index_location=zarrita.metadata.ShardingCodecIndexLocation.start)]
20-
elif codec_string == "sharding_end":
21-
codec = [zarrita.codecs.sharding_codec(chunk_shape=(1, 2), codecs=[zarrita.codecs.bytes_codec()], index_location=zarrita.metadata.ShardingCodecIndexLocation.end)]
23+
elif codec_string == "sharding":
24+
codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 4), codecs=[zarrita.codecs.bytes_codec("little")],
25+
index_location=ShardingCodecIndexLocation.start if param_string == "start"
26+
else ShardingCodecIndexLocation.end),
27+
elif codec_string == "sharding_nested":
28+
codec = zarrita.codecs.sharding_codec(chunk_shape=(2, 4),
29+
codecs=[zarrita.codecs.sharding_codec(chunk_shape=(1, 2), codecs=[
30+
zarrita.codecs.bytes_codec("little")])]),
2231
elif codec_string == "crc32c":
2332
codec = [zarrita.codecs.bytes_codec(), zarrita.codecs.crc32c_codec()]
2433
else:
2534
raise ValueError(f"Invalid {codec_string=}")
2635

27-
store = zarrita.LocalStore(sys.argv[2])
36+
store = zarrita.LocalStore(sys.argv[3])
2837
testdata = np.arange(16 * 16, dtype='int32').reshape((16, 16))
2938

3039
a = zarrita.Array.create(
31-
store / 'read_from_zarrita' / codec_string,
40+
store / 'read_from_zarrita' / codec_string / param_string,
3241
shape=(16, 16),
3342
dtype='int32',
3443
chunk_shape=(2, 8),

0 commit comments

Comments
 (0)