Skip to content

Commit 3ce0fe8

Browse files
committed
fix sharding
1 parent 00a3e5b commit 3ce0fe8

File tree

12 files changed

+182
-129
lines changed

12 files changed

+182
-129
lines changed

src/main/java/com/scalableminds/zarrjava/store/Store.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package com.scalableminds.zarrjava.store;
22

3-
import java.io.IOException;
43
import java.nio.ByteBuffer;
54
import java.util.stream.Stream;
65
import javax.annotation.Nonnull;

src/main/java/com/scalableminds/zarrjava/v3/Array.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public static ArrayMetadataBuilder metadataBuilder() {
6565
}
6666

6767
public static ArrayMetadataBuilder metadataBuilder(ArrayMetadata existingMetadata) {
68-
return new ArrayMetadataBuilder(existingMetadata);
68+
return ArrayMetadataBuilder.fromArrayMetadata(existingMetadata);
6969
}
7070

7171
public ucar.ma2.Array read() throws ZarrException {

src/main/java/com/scalableminds/zarrjava/v3/ArrayMetadata.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ public final class ArrayMetadata {
4444
@JsonIgnore
4545
public final Object parsedFillValue;
4646

47-
@Nullable
4847
@JsonProperty("codecs")
4948
public final Codec[] codecs;
5049
@Nullable
@@ -60,7 +59,7 @@ public final class ArrayMetadata {
6059
public ArrayMetadata(
6160
long[] shape, DataType dataType, ChunkGrid chunkGrid, ChunkKeyEncoding chunkKeyEncoding,
6261
Object fillValue,
63-
@Nullable Codec[] codecs,
62+
@Nonnull Codec[] codecs,
6463
@Nullable String[] dimensionNames,
6564
@Nullable Map<String, Object> attributes
6665
) throws ZarrException {
@@ -79,7 +78,7 @@ public ArrayMetadata(
7978
@JsonProperty(value = "chunk_grid", required = true) ChunkGrid chunkGrid,
8079
@JsonProperty(value = "chunk_key_encoding", required = true) ChunkKeyEncoding chunkKeyEncoding,
8180
@JsonProperty(value = "fill_value", required = true) Object fillValue,
82-
@Nullable @JsonProperty(value = "codecs") Codec[] codecs,
81+
@Nonnull @JsonProperty(value = "codecs") Codec[] codecs,
8382
@Nullable @JsonProperty(value = "dimension_names") String[] dimensionNames,
8483
@Nullable @JsonProperty(value = "attributes") Map<String, Object> attributes
8584
) throws ZarrException {

src/main/java/com/scalableminds/zarrjava/v3/ArrayMetadataBuilder.java

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import com.scalableminds.zarrjava.v3.chunkkeyencoding.V2ChunkKeyEncoding;
1010
import com.scalableminds.zarrjava.v3.codec.Codec;
1111
import com.scalableminds.zarrjava.v3.codec.CodecBuilder;
12+
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec;
13+
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec.Endian;
1214
import java.util.HashMap;
1315
import java.util.Map;
1416
import java.util.function.Function;
@@ -22,24 +24,13 @@ public class ArrayMetadataBuilder {
2224
new DefaultChunkKeyEncoding(new DefaultChunkKeyEncoding.Configuration(Separator.SLASH));
2325

2426
Object fillValue = 0;
25-
Codec[] codecs = null;
27+
Codec[] codecs = new Codec[]{new BytesCodec(Endian.LITTLE)};
2628
Map<String, Object> attributes = new HashMap<>();
2729
String[] dimensionNames = null;
2830

2931
protected ArrayMetadataBuilder() {
3032
}
3133

32-
protected ArrayMetadataBuilder(ArrayMetadata existingMetadata) {
33-
this.shape = existingMetadata.shape;
34-
this.dataType = existingMetadata.dataType;
35-
this.chunkGrid = existingMetadata.chunkGrid;
36-
this.chunkKeyEncoding = existingMetadata.chunkKeyEncoding;
37-
this.fillValue = existingMetadata.fillValue;
38-
this.codecs = existingMetadata.codecs;
39-
this.attributes = existingMetadata.attributes;
40-
this.dimensionNames = existingMetadata.dimensionNames;
41-
}
42-
4334
protected static ArrayMetadataBuilder fromArrayMetadata(ArrayMetadata arrayMetadata) {
4435
ArrayMetadataBuilder builder = new ArrayMetadataBuilder();
4536
builder.shape = arrayMetadata.shape;

src/main/java/com/scalableminds/zarrjava/v3/Group.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public Array createArray(String key, ArrayMetadata arrayMetadata)
9696
throws IOException, ZarrException {
9797
return Array.create(storeHandle.resolve(key), arrayMetadata);
9898
}
99-
99+
100100
public Stream<Node> list() {
101101
return storeHandle.list()
102102
.map(key -> {

src/main/java/com/scalableminds/zarrjava/v3/codec/CodecBuilder.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import com.scalableminds.zarrjava.v3.DataType;
66
import com.scalableminds.zarrjava.v3.codec.core.BloscCodec;
77
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec;
8+
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec.Configuration;
89
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec.Endian;
10+
import com.scalableminds.zarrjava.v3.codec.core.Crc32cCodec;
911
import com.scalableminds.zarrjava.v3.codec.core.GzipCodec;
1012
import com.scalableminds.zarrjava.v3.codec.core.ShardingIndexedCodec;
1113
import com.scalableminds.zarrjava.v3.codec.core.TransposeCodec;
@@ -17,7 +19,7 @@
1719

1820
public class CodecBuilder {
1921

20-
private DataType dataType;
22+
final private DataType dataType;
2123
private List<Codec> codecs;
2224

2325
public CodecBuilder(DataType dataType) {
@@ -111,7 +113,9 @@ public CodecBuilder withZstd(int clevel) {
111113
public CodecBuilder withSharding(int[] chunkShape) {
112114
try {
113115
codecs.add(
114-
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape, null)));
116+
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
117+
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
118+
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()})));
115119
} catch (ZarrException e) {
116120
throw new RuntimeException(e);
117121
}
@@ -123,8 +127,9 @@ public CodecBuilder withSharding(int[] chunkShape,
123127
CodecBuilder nestedBuilder = new CodecBuilder(dataType);
124128
try {
125129
codecs.add(new ShardingIndexedCodec(
126-
new ShardingIndexedCodec.Configuration(chunkShape, codecBuilder.apply(nestedBuilder)
127-
.build())));
130+
new ShardingIndexedCodec.Configuration(chunkShape,
131+
codecBuilder.apply(nestedBuilder).build(),
132+
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()})));
128133
} catch (ZarrException e) {
129134
throw new RuntimeException(e);
130135
}

src/main/java/com/scalableminds/zarrjava/v3/codec/CodecPipeline.java

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,48 @@
66
import java.nio.ByteBuffer;
77
import java.util.Arrays;
88
import javax.annotation.Nonnull;
9-
import javax.annotation.Nullable;
109
import ucar.ma2.Array;
1110

1211
public class CodecPipeline {
1312

1413
@Nonnull
1514
final Codec[] codecs;
1615

17-
public CodecPipeline(@Nullable Codec[] codecs) throws ZarrException {
18-
if (codecs == null) {
19-
this.codecs = new Codec[0];
20-
} else {
21-
long arrayBytesCodecCount = Arrays.stream(codecs).filter(c -> c instanceof ArrayBytesCodec)
22-
.count();
23-
if (arrayBytesCodecCount != 1) {
24-
throw new ZarrException(
25-
"Exactly 1 ArrayBytesCodec is required. Found " + arrayBytesCodecCount + ".");
26-
}
27-
Codec prevCodec = null;
28-
for (Codec codec : codecs) {
29-
if (prevCodec != null) {
30-
if (codec instanceof ArrayBytesCodec && prevCodec instanceof ArrayBytesCodec) {
31-
throw new ZarrException(
32-
"ArrayBytesCodec '" + codec.getClass() + "' cannot follow after ArrayBytesCodec '" +
33-
prevCodec.getClass() + "' because only 1 ArrayBytesCodec is allowed.");
34-
}
35-
if (codec instanceof ArrayBytesCodec && prevCodec instanceof BytesBytesCodec) {
36-
throw new ZarrException(
37-
"ArrayBytesCodec '" + codec.getClass() + "' cannot follow after BytesBytesCodec '" +
38-
prevCodec.getClass() + "'.");
39-
}
40-
if (codec instanceof ArrayArrayCodec && prevCodec instanceof ArrayBytesCodec) {
41-
throw new ZarrException(
42-
"ArrayArrayCodec '" + codec.getClass() + "' cannot follow after ArrayBytesCodec '" +
43-
prevCodec.getClass() + "'.");
44-
}
45-
if (codec instanceof ArrayArrayCodec && prevCodec instanceof BytesBytesCodec) {
46-
throw new ZarrException(
47-
"ArrayArrayCodec '" + codec.getClass() + "' cannot follow after BytesBytesCodec '" +
48-
prevCodec.getClass() + "'.");
49-
}
16+
public CodecPipeline(@Nonnull Codec[] codecs) throws ZarrException {
17+
long arrayBytesCodecCount = Arrays.stream(codecs).filter(c -> c instanceof ArrayBytesCodec)
18+
.count();
19+
if (arrayBytesCodecCount != 1) {
20+
throw new ZarrException(
21+
"Exactly 1 ArrayBytesCodec is required. Found " + arrayBytesCodecCount + ".");
22+
}
23+
Codec prevCodec = null;
24+
for (Codec codec : codecs) {
25+
if (prevCodec != null) {
26+
if (codec instanceof ArrayBytesCodec && prevCodec instanceof ArrayBytesCodec) {
27+
throw new ZarrException(
28+
"ArrayBytesCodec '" + codec.getClass() + "' cannot follow after ArrayBytesCodec '" +
29+
prevCodec.getClass() + "' because only 1 ArrayBytesCodec is allowed.");
30+
}
31+
if (codec instanceof ArrayBytesCodec && prevCodec instanceof BytesBytesCodec) {
32+
throw new ZarrException(
33+
"ArrayBytesCodec '" + codec.getClass() + "' cannot follow after BytesBytesCodec '" +
34+
prevCodec.getClass() + "'.");
35+
}
36+
if (codec instanceof ArrayArrayCodec && prevCodec instanceof ArrayBytesCodec) {
37+
throw new ZarrException(
38+
"ArrayArrayCodec '" + codec.getClass() + "' cannot follow after ArrayBytesCodec '" +
39+
prevCodec.getClass() + "'.");
40+
}
41+
if (codec instanceof ArrayArrayCodec && prevCodec instanceof BytesBytesCodec) {
42+
throw new ZarrException(
43+
"ArrayArrayCodec '" + codec.getClass() + "' cannot follow after BytesBytesCodec '" +
44+
prevCodec.getClass() + "'.");
5045
}
51-
prevCodec = codec;
5246
}
53-
54-
this.codecs = codecs;
47+
prevCodec = codec;
5548
}
49+
50+
this.codecs = codecs;
5651
}
5752

5853
ArrayArrayCodec[] getArrayArrayCodecs() {
@@ -89,8 +84,9 @@ public Array decode(
8984
chunkBytes = codec.decode(chunkBytes, arrayMetadata);
9085
}
9186
if (chunkBytes == null) {
92-
throw new ZarrException("chunkBytes is null. This is likely a bug in one of the codecs. " + Arrays.toString(
93-
getBytesBytesCodecs()));
87+
throw new ZarrException(
88+
"chunkBytes is null. This is likely a bug in one of the codecs. " + Arrays.toString(
89+
getBytesBytesCodecs()));
9490
}
9591
Array chunkArray = getArrayBytesCodec().decode(chunkBytes, arrayMetadata);
9692
if (chunkArray == null) {

src/main/java/com/scalableminds/zarrjava/v3/codec/CodecRegistry.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.fasterxml.jackson.databind.jsontype.NamedType;
44
import com.scalableminds.zarrjava.v3.codec.core.BloscCodec;
55
import com.scalableminds.zarrjava.v3.codec.core.BytesCodec;
6+
import com.scalableminds.zarrjava.v3.codec.core.Crc32cCodec;
67
import com.scalableminds.zarrjava.v3.codec.core.GzipCodec;
78
import com.scalableminds.zarrjava.v3.codec.core.ShardingIndexedCodec;
89
import com.scalableminds.zarrjava.v3.codec.core.TransposeCodec;
@@ -20,6 +21,7 @@ public class CodecRegistry {
2021
addType("blosc", BloscCodec.class);
2122
addType("gzip", GzipCodec.class);
2223
addType("zstd", ZstdCodec.class);
24+
addType("crc32c", Crc32cCodec.class);
2325
addType("sharding_indexed", ShardingIndexedCodec.class);
2426
}
2527

src/main/java/com/scalableminds/zarrjava/v3/codec/core/BytesCodec.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ public BytesCodec(
2323
this.configuration = configuration;
2424
}
2525

26+
public BytesCodec(Endian endian) {
27+
this(new BytesCodec.Configuration(endian));
28+
}
29+
2630
@Override
2731
public Array decode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata) {
2832
chunkBytes.order(configuration.endian.getByteOrder());
@@ -35,6 +39,7 @@ public ByteBuffer encode(Array chunkArray, ArrayMetadata.CoreArrayMetadata array
3539
return chunkArray.getDataAsByteBuffer(configuration.endian.getByteOrder());
3640
}
3741

42+
3843
public enum Endian {
3944
LITTLE("little"),
4045
BIG("big");
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package com.scalableminds.zarrjava.v3.codec.core;
2+
3+
import com.fasterxml.jackson.annotation.JsonCreator;
4+
import com.scalableminds.zarrjava.ZarrException;
5+
import com.scalableminds.zarrjava.utils.CRC32C;
6+
import com.scalableminds.zarrjava.utils.Utils;
7+
import com.scalableminds.zarrjava.v3.ArrayMetadata;
8+
import com.scalableminds.zarrjava.v3.codec.BytesBytesCodec;
9+
import java.nio.ByteBuffer;
10+
import java.nio.ByteOrder;
11+
12+
public class Crc32cCodec implements BytesBytesCodec {
13+
14+
public final String name = "crc32c";
15+
16+
@JsonCreator
17+
public Crc32cCodec(
18+
) {
19+
}
20+
21+
@Override
22+
public ByteBuffer decode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata)
23+
throws ZarrException {
24+
ByteBuffer buffer = chunkBytes.slice();
25+
buffer.order(ByteOrder.LITTLE_ENDIAN);
26+
27+
buffer.limit(buffer.capacity() - 4);
28+
29+
final CRC32C crc32c = new CRC32C();
30+
crc32c.update(buffer);
31+
int computedCrc32c = (int) crc32c.getValue();
32+
33+
buffer.limit(buffer.capacity());
34+
int storedCrc32c = buffer.getInt();
35+
36+
if (computedCrc32c != storedCrc32c) {
37+
throw new ZarrException(
38+
"The checksum of the sharding index is invalid. Stored: " + storedCrc32c + " "
39+
+ "Computed: " +
40+
computedCrc32c);
41+
}
42+
buffer.rewind();
43+
buffer.limit(buffer.capacity() - 4);
44+
return buffer.slice();
45+
}
46+
47+
@Override
48+
public ByteBuffer encode(ByteBuffer chunkBytes, ArrayMetadata.CoreArrayMetadata arrayMetadata) {
49+
return Utils.makeByteBuffer(chunkBytes.capacity() + 4, b -> {
50+
final CRC32C crc32c = new CRC32C();
51+
crc32c.update(chunkBytes);
52+
int computedCrc32c = (int) crc32c.getValue();
53+
chunkBytes.rewind();
54+
b.put(chunkBytes);
55+
b.putInt(computedCrc32c);
56+
return b;
57+
});
58+
}
59+
}
60+

0 commit comments

Comments
 (0)