Skip to content

Commit e51ac9d

Browse files
authored
Merge pull request #2 from scalableminds/fix-sharding-codec
Fix sharding codec
2 parents d2d3e04 + 4f9dc72 commit e51ac9d

File tree

1,631 files changed

+316
-564032
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,631 files changed

+316
-564032
lines changed

.github/workflows/ci.yml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,30 +11,41 @@ jobs:
1111
build:
1212
strategy:
1313
matrix:
14-
os: [ ubuntu, windows, macos ]
14+
os: [ ubuntu-latest, windows-latest, macos-latest ]
1515
fail-fast: false
16-
runs-on: ${{ matrix.os }}-latest
16+
runs-on: ${{ matrix.os }}
1717
defaults:
1818
run:
1919
shell: bash
2020

2121
steps:
2222
- uses: actions/checkout@v3
23+
2324
- name: Set up JDK
2425
uses: actions/setup-java@v3
2526
with:
2627
java-version: '22'
2728
distribution: 'temurin'
2829
cache: maven
2930

31+
- name: Set up Python
32+
uses: actions/setup-python@v4
33+
with:
34+
python-version: '3.11'
35+
36+
- name: Install zarrita
37+
run: |
38+
python -m venv venv_zarrita
39+
if [ "${{ runner.os }}" = "Windows" ]; then venv_zarrita/Scripts/pip install zarrita; else venv_zarrita/bin/pip install zarrita; fi
40+
3041
- name: Download blosc jar
3142
run: |
3243
mkdir -p ../blosc-java/target
3344
curl https://static.webknossos.org/misc/blosc-java-0.1-1.21.4-SNAPSHOT.jar -o ../blosc-java/target/blosc-java-0.1-1.21.4-SNAPSHOT.jar
3445
3546
- name: Download testdata
3647
run: |
37-
mkdir testdata testoutput
48+
mkdir testoutput
3849
curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
3950
cd testdata
4051
unzip l4_sample.zip
@@ -45,7 +56,7 @@ jobs:
4556
- name: Test
4657
env:
4758
MAVEN_OPTS: "-Xmx6g"
48-
run: mvn test -DargLine="-Xmx6g"
59+
run: mvn --no-transfer-progress test -DargLine="-Xmx6g"
4960

5061
- name: Assemble JAR
5162
run: mvn package -DskipTests

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,5 +36,6 @@ build/
3636

3737

3838
### Custom ###
39-
/testdata
39+
/testdata/l4_sample
4040
/testoutput
41+
/venv_zarrita

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,19 @@ array.write(
3838
ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1, 1024, 1024, 1024})
3939
);
4040
```
41+
## Development Start-Guide
42+
43+
### Run Tests Locally
44+
To be able to run the tests locally, make sure to have `python3.11` installed.
45+
Also, you need to set up a venv for zarrita at the root of the project:
46+
`python3.11 -m venv venv_zarrita`.
47+
48+
Then install zarrita there with `venv_zarrita/Scripts/pip install zarrita`
49+
for Windows and `venv_zarrita/bin/pip install zarrita` for Linux.
50+
51+
Furthermore, you will need the `l4_sample` test data:
52+
53+
`curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
54+
&& cd testdata
55+
&& unzip l4_sample.zip
56+
`

pom.xml

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,31 @@
1616
<aws.version>1.12.477</aws.version>
1717
<netcdfJavaVersion>5.5.3</netcdfJavaVersion>
1818
<zstdVersion>1.5.5-5</zstdVersion>
19-
<junit-jupiter-params.version>5.10.2</junit-jupiter-params.version>
19+
<junit-jupiter-version>5.10.2</junit-jupiter-version>
2020
</properties>
2121

2222
<dependencies>
23-
<!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
23+
<!-- JUnit 5 dependencies -->
24+
<dependency>
25+
<groupId>org.junit.jupiter</groupId>
26+
<artifactId>junit-jupiter-api</artifactId>
27+
<version>${junit-jupiter-version}</version>
28+
<scope>test</scope>
29+
</dependency>
30+
<dependency>
31+
<groupId>org.junit.jupiter</groupId>
32+
<artifactId>junit-jupiter-engine</artifactId>
33+
<version>${junit-jupiter-version}</version>
34+
<scope>test</scope>
35+
</dependency>
2436
<dependency>
2537
<groupId>org.junit.jupiter</groupId>
2638
<artifactId>junit-jupiter-params</artifactId>
27-
<version>${junit-jupiter-params.version}</version>
39+
<version>${junit-jupiter-version}</version>
2840
<scope>test</scope>
2941
</dependency>
42+
43+
<!-- Other dependencies -->
3044
<dependency>
3145
<groupId>com.fasterxml.jackson.core</groupId>
3246
<artifactId>jackson-databind</artifactId>
@@ -62,6 +76,7 @@
6276
<artifactId>okhttp</artifactId>
6377
<version>2.7.5</version>
6478
</dependency>
79+
<!-- JUnit 4 dependency for backward compatibility if needed -->
6580
<dependency>
6681
<groupId>junit</groupId>
6782
<artifactId>junit</artifactId>
@@ -79,6 +94,15 @@
7994
</repositories>
8095

8196
<build>
82-
<testSourceDirectory>src/test/java/dev/zarr/zarrjava</testSourceDirectory>
97+
<plugins>
98+
<plugin>
99+
<groupId>org.apache.maven.plugins</groupId>
100+
<artifactId>maven-surefire-plugin</artifactId>
101+
<version>3.2.5</version>
102+
<configuration>
103+
<useSystemClassLoader>false</useSystemClassLoader>
104+
</configuration>
105+
</plugin>
106+
</plugins>
83107
</build>
84-
</project>
108+
</project>

src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,23 +109,30 @@ public CodecBuilder withZstd(int clevel) {
109109
public CodecBuilder withSharding(int[] chunkShape) {
110110
try {
111111
codecs.add(
112-
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
113-
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
114-
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()})));
112+
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
113+
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
114+
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()},
115+
"end")));
115116
} catch (ZarrException e) {
116117
throw new RuntimeException(e);
117118
}
118119
return this;
119120
}
120121

121122
public CodecBuilder withSharding(int[] chunkShape,
122-
Function<CodecBuilder, CodecBuilder> codecBuilder) {
123+
Function<CodecBuilder, CodecBuilder> codecBuilder) {
124+
return withSharding(chunkShape, codecBuilder, "end");
125+
}
126+
127+
public CodecBuilder withSharding(int[] chunkShape,
128+
Function<CodecBuilder, CodecBuilder> codecBuilder, String indexLocation) {
123129
CodecBuilder nestedBuilder = new CodecBuilder(dataType);
124130
try {
125131
codecs.add(new ShardingIndexedCodec(
126-
new ShardingIndexedCodec.Configuration(chunkShape,
127-
codecBuilder.apply(nestedBuilder).build(),
128-
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()})));
132+
new ShardingIndexedCodec.Configuration(chunkShape,
133+
codecBuilder.apply(nestedBuilder).build(),
134+
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()},
135+
indexLocation)));
129136
} catch (ZarrException e) {
130137
throw new RuntimeException(e);
131138
}

src/main/java/dev/zarr/zarrjava/v3/codec/core/ShardingIndexedCodec.java

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,11 @@ public ByteBuffer encode(final Array shardArray) throws ZarrException {
133133
final ByteBuffer chunkBytes = codecPipeline.encode(chunkArray);
134134
synchronized (chunkBytesList) {
135135
int chunkByteOffset = chunkBytesList.stream()
136-
.mapToInt(ByteBuffer::capacity)
137-
.sum();
136+
.mapToInt(ByteBuffer::capacity)
137+
.sum();
138+
if (configuration.indexLocation.equals("start")) {
139+
chunkByteOffset += (int) getShardIndexSize(arrayMetadata);
140+
}
138141
setValueFromShardIndexArray(shardIndexArray, chunkCoords, 0, chunkByteOffset);
139142
setValueFromShardIndexArray(shardIndexArray, chunkCoords, 1,
140143
chunkBytes.capacity());
@@ -149,11 +152,15 @@ public ByteBuffer encode(final Array shardArray) throws ZarrException {
149152
.mapToInt(ByteBuffer::capacity)
150153
.sum() + (int) getShardIndexSize(arrayMetadata);
151154
final ByteBuffer shardBytes = ByteBuffer.allocate(shardBytesLength);
155+
if(configuration.indexLocation.equals("start")){
156+
shardBytes.put(indexCodecPipeline.encode(shardIndexArray));
157+
}
152158
for (final ByteBuffer chunkBytes : chunkBytesList) {
153159
shardBytes.put(chunkBytes);
154160
}
155-
shardBytes.put(
156-
indexCodecPipeline.encode(shardIndexArray));
161+
if(configuration.indexLocation.equals("end")){
162+
shardBytes.put(indexCodecPipeline.encode(shardIndexArray));
163+
}
157164
shardBytes.rewind();
158165
return shardBytes;
159166
}
@@ -179,8 +186,14 @@ private Array decodeInternal(
179186

180187
final Array outputArray = Array.factory(arrayMetadata.dataType.getMA2DataType(), shape);
181188
final int shardIndexByteLength = (int) getShardIndexSize(arrayMetadata);
182-
ByteBuffer shardIndexBytes = dataProvider.readSuffix(shardIndexByteLength);
183-
189+
ByteBuffer shardIndexBytes;
190+
if (this.configuration.indexLocation.equals("start")) {
191+
shardIndexBytes = dataProvider.readPrefix(shardIndexByteLength);
192+
}else if(this.configuration.indexLocation.equals("end")){
193+
shardIndexBytes = dataProvider.readSuffix(shardIndexByteLength);
194+
}else{
195+
throw new ZarrException("Only index_location \"start\" or \"end\" are supported.");
196+
}
184197
if (shardIndexBytes == null) {
185198
throw new ZarrException("Could not read shard index.");
186199
}
@@ -243,6 +256,8 @@ interface DataProvider {
243256
ByteBuffer read(long start, long length);
244257

245258
ByteBuffer readSuffix(long suffixLength);
259+
260+
ByteBuffer readPrefix(long prefixLength);
246261
}
247262

248263
public static final class Configuration {
@@ -255,16 +270,27 @@ public static final class Configuration {
255270
@Nonnull
256271
@JsonProperty("index_codecs")
257272
public final Codec[] indexCodecs;
273+
@Nonnull
274+
@JsonProperty("index_location")
275+
public String indexLocation;
258276

259277
@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
260278
public Configuration(
261-
@JsonProperty(value = "chunk_shape", required = true) int[] chunkShape,
262-
@Nonnull @JsonProperty("codecs") Codec[] codecs,
263-
@Nonnull @JsonProperty("index_codecs") Codec[] indexCodecs
264-
) {
279+
@JsonProperty(value = "chunk_shape", required = true) int[] chunkShape,
280+
@Nonnull @JsonProperty("codecs") Codec[] codecs,
281+
@Nonnull @JsonProperty("index_codecs") Codec[] indexCodecs,
282+
@JsonProperty(value = "index_location", defaultValue = "end") String indexLocation
283+
) throws ZarrException {
284+
if (indexLocation == null) {
285+
indexLocation = "end";
286+
}
287+
if (!indexLocation.equals("start") && !indexLocation.equals("end")) {
288+
throw new ZarrException("Only index_location \"start\" or \"end\" are supported.");
289+
}
265290
this.chunkShape = chunkShape;
266291
this.codecs = codecs;
267292
this.indexCodecs = indexCodecs;
293+
this.indexLocation = indexLocation;
268294
}
269295
}
270296

@@ -285,6 +311,12 @@ public ByteBuffer readSuffix(long suffixLength) {
285311
return bufferSlice.slice();
286312
}
287313

314+
public ByteBuffer readPrefix(long prefixLength) {
315+
ByteBuffer bufferSlice = buffer.slice();
316+
bufferSlice.limit((int) (prefixLength));
317+
return bufferSlice.slice();
318+
}
319+
288320
@Override
289321
public ByteBuffer read(long start, long length) {
290322
ByteBuffer bufferSlice = buffer.slice();
@@ -309,6 +341,11 @@ public ByteBuffer readSuffix(long suffixLength) {
309341
return storeHandle.read(-suffixLength);
310342
}
311343

344+
@Override
345+
public ByteBuffer readPrefix(long prefixLength) {
346+
return storeHandle.read(0, prefixLength);
347+
}
348+
312349
@Override
313350
public ByteBuffer read(long start, long length) {
314351
return storeHandle.read(start, start + length);

src/main/java/dev/zarr/zarrjava/v3/codec/core/ZstdCodec.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public ByteBuffer encode(ByteBuffer chunkBytes)
5959
zstdStream.close();
6060
return ByteBuffer.wrap(outputStream.toByteArray());
6161
} catch (IOException ex) {
62-
throw new ZarrException("Error in decoding zstd.", ex);
62+
throw new ZarrException("Error in encoding zstd.", ex);
6363
}
6464
}
6565

src/test/java/dev/zarr/zarrjava/TestUtils.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
package dev.zarr.zarrjava;
22

3+
4+
import org.junit.Test;
35
import org.junit.jupiter.api.Assertions;
4-
import org.junit.jupiter.api.Test;
56

67
import java.util.Arrays;
78

0 commit comments

Comments
 (0)