Skip to content

Commit 00a3e5b

Browse files
committed
fix codecs, adds zstd codec
1 parent 2df3c44 commit 00a3e5b

File tree

11 files changed

+240
-40
lines changed

11 files changed

+240
-40
lines changed

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
<jackson.version>2.14.2</jackson.version>
1616
<aws.version>1.12.477</aws.version>
1717
<netcdfJavaVersion>5.5.3</netcdfJavaVersion>
18+
<zstdVersion>1.5.5-5</zstdVersion>
1819
</properties>
1920

2021
<dependencies>
@@ -43,6 +44,11 @@
4344
<artifactId>blosc-java</artifactId>
4445
<version>0.1-1.21.4</version>
4546
</dependency>
47+
<dependency>
48+
<groupId>com.github.luben</groupId>
49+
<artifactId>zstd-jni</artifactId>
50+
<version>${zstdVersion}</version>
51+
</dependency>
4652
<dependency>
4753
<groupId>com.squareup.okhttp</groupId>
4854
<artifactId>okhttp</artifactId>

src/main/java/com/scalableminds/zarrjava/v3/Array.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ public static ArrayMetadataBuilder metadataBuilder() {
6464
return new ArrayMetadataBuilder();
6565
}
6666

67+
public static ArrayMetadataBuilder metadataBuilder(ArrayMetadata existingMetadata) {
68+
return new ArrayMetadataBuilder(existingMetadata);
69+
}
70+
6771
public ucar.ma2.Array read() throws ZarrException {
6872
return read(new long[metadata.ndim()], Utils.toIntArray(metadata.shape));
6973
}

src/main/java/com/scalableminds/zarrjava/v3/ArrayMetadataBuilder.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@ public class ArrayMetadataBuilder {
2929
protected ArrayMetadataBuilder() {
3030
}
3131

32+
protected ArrayMetadataBuilder(ArrayMetadata existingMetadata) {
33+
this.shape = existingMetadata.shape;
34+
this.dataType = existingMetadata.dataType;
35+
this.chunkGrid = existingMetadata.chunkGrid;
36+
this.chunkKeyEncoding = existingMetadata.chunkKeyEncoding;
37+
this.fillValue = existingMetadata.fillValue;
38+
this.codecs = existingMetadata.codecs;
39+
this.attributes = existingMetadata.attributes;
40+
this.dimensionNames = existingMetadata.dimensionNames;
41+
}
42+
3243
protected static ArrayMetadataBuilder fromArrayMetadata(ArrayMetadata arrayMetadata) {
3344
ArrayMetadataBuilder builder = new ArrayMetadataBuilder();
3445
builder.shape = arrayMetadata.shape;
@@ -97,7 +108,7 @@ public ArrayMetadataBuilder withCodecs(Codec... codecs) {
97108
}
98109

99110
public ArrayMetadataBuilder withCodecs(Function<CodecBuilder, CodecBuilder> codecBuilder) {
100-
if (dataType==null) {
111+
if (dataType == null) {
101112
throw new IllegalStateException("Please call `withDataType` first.");
102113
}
103114
CodecBuilder nestedCodecBuilder = new CodecBuilder(dataType);

src/main/java/com/scalableminds/zarrjava/v3/codec/CodecBuilder.java

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,14 @@
44
import com.scalableminds.zarrjava.ZarrException;
55
import com.scalableminds.zarrjava.v3.DataType;
66
import com.scalableminds.zarrjava.v3.codec.core.BloscCodec;
7-
import com.scalableminds.zarrjava.v3.codec.core.EndianCodec;
7+
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec;
8+
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec.Endian;
89
import com.scalableminds.zarrjava.v3.codec.core.GzipCodec;
910
import com.scalableminds.zarrjava.v3.codec.core.ShardingIndexedCodec;
1011
import com.scalableminds.zarrjava.v3.codec.core.TransposeCodec;
12+
import com.scalableminds.zarrjava.v3.codec.core.ZstdCodec;
1113
import java.util.ArrayList;
14+
import java.util.Collections;
1215
import java.util.List;
1316
import java.util.function.Function;
1417

@@ -66,13 +69,13 @@ public CodecBuilder withTranspose(String order) {
6669
return this;
6770
}
6871

69-
public CodecBuilder withEndian(EndianCodec.Endian endian) {
70-
codecs.add(new EndianCodec(new EndianCodec.Configuration(endian)));
72+
public CodecBuilder withBytes(BytesCodec.Endian endian) {
73+
codecs.add(new BytesCodec(new BytesCodec.Configuration(endian)));
7174
return this;
7275
}
7376

74-
public CodecBuilder withEndian(String endian) {
75-
return withEndian(EndianCodec.Endian.valueOf(endian));
77+
public CodecBuilder withBytes(String endian) {
78+
return withBytes(BytesCodec.Endian.valueOf(endian));
7679
}
7780

7881
public CodecBuilder withGzip(int clevel) {
@@ -88,6 +91,23 @@ public CodecBuilder withGzip() {
8891
return withGzip(5);
8992
}
9093

94+
public CodecBuilder withZstd(int clevel, boolean checksum) {
95+
try {
96+
codecs.add(new ZstdCodec(new ZstdCodec.Configuration(clevel, checksum)));
97+
} catch (ZarrException e) {
98+
throw new RuntimeException(e);
99+
}
100+
return this;
101+
}
102+
103+
public CodecBuilder withZstd() {
104+
return withZstd(5, true);
105+
}
106+
107+
public CodecBuilder withZstd(int clevel) {
108+
return withZstd(clevel, true);
109+
}
110+
91111
public CodecBuilder withSharding(int[] chunkShape) {
92112
try {
93113
codecs.add(
@@ -111,7 +131,22 @@ public CodecBuilder withSharding(int[] chunkShape,
111131
return this;
112132
}
113133

134+
private void autoInsertBytesCodec() {
135+
if (codecs.stream().noneMatch(c -> c instanceof ArrayBytesCodec)) {
136+
Codec[] arrayArrayCodecs = codecs.stream().filter(c -> c instanceof ArrayArrayCodec)
137+
.toArray(Codec[]::new);
138+
Codec[] bytesBytesCodecs = codecs.stream().filter(c -> c instanceof BytesBytesCodec)
139+
.toArray(Codec[]::new);
140+
this.codecs = new ArrayList<>();
141+
Collections.addAll(this.codecs, arrayArrayCodecs);
142+
this.codecs.add(new BytesCodec(new BytesCodec.Configuration(Endian.LITTLE)));
143+
Collections.addAll(this.codecs, bytesBytesCodecs);
144+
}
145+
}
146+
114147
public Codec[] build() {
148+
autoInsertBytesCodec();
149+
115150
return codecs.toArray(new Codec[0]);
116151
}
117152
}

src/main/java/com/scalableminds/zarrjava/v3/codec/CodecPipeline.java

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import com.scalableminds.zarrjava.ZarrException;
44
import com.scalableminds.zarrjava.store.StoreHandle;
55
import com.scalableminds.zarrjava.v3.ArrayMetadata;
6-
import com.scalableminds.zarrjava.v3.codec.core.EndianCodec;
76
import java.nio.ByteBuffer;
87
import java.util.Arrays;
98
import javax.annotation.Nonnull;
@@ -19,6 +18,12 @@ public CodecPipeline(@Nullable Codec[] codecs) throws ZarrException {
1918
if (codecs == null) {
2019
this.codecs = new Codec[0];
2120
} else {
21+
long arrayBytesCodecCount = Arrays.stream(codecs).filter(c -> c instanceof ArrayBytesCodec)
22+
.count();
23+
if (arrayBytesCodecCount != 1) {
24+
throw new ZarrException(
25+
"Exactly 1 ArrayBytesCodec is required. Found " + arrayBytesCodecCount + ".");
26+
}
2227
Codec prevCodec = null;
2328
for (Codec codec : codecs) {
2429
if (prevCodec != null) {
@@ -57,12 +62,13 @@ ArrayArrayCodec[] getArrayArrayCodecs() {
5762
}
5863

5964
ArrayBytesCodec getArrayBytesCodec() {
60-
for (Codec codec : codecs) {
61-
if (codec instanceof ArrayBytesCodec) {
62-
return (ArrayBytesCodec) codec;
63-
}
65+
for (Codec codec : codecs) {
66+
if (codec instanceof ArrayBytesCodec) {
67+
return (ArrayBytesCodec) codec;
6468
}
65-
return new EndianCodec(new EndianCodec.Configuration(EndianCodec.Endian.LITTLE));
69+
}
70+
throw new RuntimeException(
71+
"Unreachable because the existence of exactly 1 ArrayBytes codec is asserted upon construction.");
6672
}
6773

6874
BytesBytesCodec[] getBytesBytesCodecs() {
@@ -76,15 +82,26 @@ public Array decode(
7682
@Nonnull ByteBuffer chunkBytes,
7783
@Nonnull ArrayMetadata.CoreArrayMetadata arrayMetadata
7884
) throws ZarrException {
79-
for (BytesBytesCodec codec : getBytesBytesCodecs()) {
85+
if (chunkBytes == null) {
86+
throw new ZarrException("chunkBytes is null. Ohh nooo.");
87+
}
88+
for (BytesBytesCodec codec : getBytesBytesCodecs()) { // TODO iterate in reverse
8089
chunkBytes = codec.decode(chunkBytes, arrayMetadata);
8190
}
82-
91+
if (chunkBytes == null) {
92+
throw new ZarrException("chunkBytes is null. This is likely a bug in one of the codecs. " + Arrays.toString(
93+
getBytesBytesCodecs()));
94+
}
8395
Array chunkArray = getArrayBytesCodec().decode(chunkBytes, arrayMetadata);
84-
85-
for (ArrayArrayCodec codec : getArrayArrayCodecs()) {
96+
if (chunkArray == null) {
97+
throw new ZarrException("chunkArray is null. This is likely a bug in one of the codecs.");
98+
}
99+
for (ArrayArrayCodec codec : getArrayArrayCodecs()) { // TODO iterate in reverse
86100
chunkArray = codec.decode(chunkArray, arrayMetadata);
87101
}
102+
if (chunkArray == null) {
103+
throw new ZarrException("chunkArray is null. This is likely a bug in one of the codecs.");
104+
}
88105
return chunkArray;
89106
}
90107

src/main/java/com/scalableminds/zarrjava/v3/codec/CodecRegistry.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22

33
import com.fasterxml.jackson.databind.jsontype.NamedType;
44
import com.scalableminds.zarrjava.v3.codec.core.BloscCodec;
5-
import com.scalableminds.zarrjava.v3.codec.core.EndianCodec;
5+
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec;
66
import com.scalableminds.zarrjava.v3.codec.core.GzipCodec;
77
import com.scalableminds.zarrjava.v3.codec.core.ShardingIndexedCodec;
88
import com.scalableminds.zarrjava.v3.codec.core.TransposeCodec;
9+
import com.scalableminds.zarrjava.v3.codec.core.ZstdCodec;
910
import java.util.HashMap;
1011
import java.util.Map;
1112

@@ -15,9 +16,10 @@ public class CodecRegistry {
1516

1617
static {
1718
addType("transpose", TransposeCodec.class);
18-
addType("endian", EndianCodec.class);
19+
addType("bytes", BytesCodec.class);
1920
addType("blosc", BloscCodec.class);
2021
addType("gzip", GzipCodec.class);
22+
addType("zstd", ZstdCodec.class);
2123
addType("sharding_indexed", ShardingIndexedCodec.class);
2224
}
2325

src/main/java/com/scalableminds/zarrjava/v3/codec/core/BloscCodec.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,27 @@ public BloscCodec(
3232
}
3333

3434
@Override
35-
public ByteBuffer decode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata) {
36-
return ByteBuffer.wrap(Blosc.decompress(chunkBytes.array()));
35+
public ByteBuffer decode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata)
36+
throws ZarrException {
37+
try {
38+
return ByteBuffer.wrap(Blosc.decompress(chunkBytes.array()));
39+
} catch (Exception ex) {
40+
throw new ZarrException("Error in decoding blosc.", ex);
41+
}
3742
}
3843

3944
@Override
40-
public ByteBuffer encode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata) {
41-
42-
return ByteBuffer.wrap(
43-
Blosc.compress(chunkBytes.array(), configuration.typesize, configuration.cname,
44-
configuration.clevel,
45-
configuration.shuffle, configuration.blocksize
46-
));
45+
public ByteBuffer encode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata)
46+
throws ZarrException {
47+
try {
48+
return ByteBuffer.wrap(
49+
Blosc.compress(chunkBytes.array(), configuration.typesize, configuration.cname,
50+
configuration.clevel,
51+
configuration.shuffle, configuration.blocksize
52+
));
53+
} catch (Exception ex) {
54+
throw new ZarrException("Error in encoding blosc.", ex);
55+
}
4756
}
4857

4958
public static final class CustomShuffleSerializer extends StdSerializer<Blosc.Shuffle> {

src/main/java/com/scalableminds/zarrjava/v3/codec/core/EndianCodec.java renamed to src/main/java/com/scalableminds/zarrjava/v3/codec/core/BytesCodec.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
import javax.annotation.Nonnull;
1111
import ucar.ma2.Array;
1212

13-
public class EndianCodec implements ArrayBytesCodec {
13+
public class BytesCodec implements ArrayBytesCodec {
1414

15-
public final String name = "endian";
15+
public final String name = "bytes";
1616
@Nonnull
1717
public final Configuration configuration;
1818

1919
@JsonCreator
20-
public EndianCodec(
20+
public BytesCodec(
2121
@Nonnull @JsonProperty(value = "configuration", required = true) Configuration configuration
2222
) {
2323
this.configuration = configuration;
@@ -64,11 +64,11 @@ public ByteOrder getByteOrder() {
6464
public static final class Configuration {
6565

6666
@Nonnull
67-
public final EndianCodec.Endian endian;
67+
public final BytesCodec.Endian endian;
6868

6969
@JsonCreator
7070
public Configuration(
71-
@JsonProperty(value = "endian", defaultValue = "little") EndianCodec.Endian endian) {
71+
@JsonProperty(value = "endian", defaultValue = "little") BytesCodec.Endian endian) {
7272
this.endian = endian;
7373
}
7474
}

src/main/java/com/scalableminds/zarrjava/v3/codec/core/GzipCodec.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,25 +36,28 @@ private void copy(InputStream inputStream, OutputStream outputStream) throws IOE
3636
}
3737

3838
@Override
39-
public ByteBuffer decode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata) {
39+
public ByteBuffer decode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata)
40+
throws ZarrException {
4041
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); GZIPInputStream inputStream = new GZIPInputStream(
4142
new ByteArrayInputStream(chunkBytes.array()))) {
4243
copy(inputStream, outputStream);
44+
inputStream.close();
4345
return ByteBuffer.wrap(outputStream.toByteArray());
4446
} catch (IOException ex) {
45-
return null;
47+
throw new ZarrException("Error in decoding gzip.", ex);
4648
}
4749
}
4850

4951
@Override
50-
public ByteBuffer encode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata) {
52+
public ByteBuffer encode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata)
53+
throws ZarrException {
5154
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); GZIPOutputStream gzipStream = new GZIPOutputStream(
52-
outputStream); ByteArrayInputStream inputStream = new ByteArrayInputStream(
53-
chunkBytes.array())) {
54-
copy(inputStream, gzipStream);
55+
outputStream)) {
56+
gzipStream.write(chunkBytes.array());
57+
gzipStream.close();
5558
return ByteBuffer.wrap(outputStream.toByteArray());
5659
} catch (IOException ex) {
57-
return null;
60+
throw new ZarrException("Error in encoding gzip.", ex);
5861
}
5962
}
6063

0 commit comments

Comments
 (0)