Skip to content

Commit 7d4f487

Browse files
committed
inherit zipstores from common parent and reduce buffers in memory in loadBuffer
1 parent 086d3f8 commit 7d4f487

File tree

5 files changed

+138
-122
lines changed

5 files changed

+138
-122
lines changed

src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java

Lines changed: 51 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,58 +8,53 @@
88
import java.nio.ByteBuffer;
99
import java.nio.file.Path;
1010
import java.nio.file.Paths;
11+
import java.util.Comparator;
1112
import java.util.stream.Stream;
1213

13-
import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream;
1414
import org.apache.commons.compress.archivers.zip.*;
1515

1616
import java.util.zip.CRC32;
1717
import java.util.zip.ZipEntry; // for STORED constant
1818

19-
import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer;
20-
2119

2220
/** A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file.
2321
*/
24-
public class BufferedZipStore implements Store, Store.ListableStore {
22+
public class BufferedZipStore extends ZipStore {
2523

26-
private final StoreHandle underlyingStore;
2724
private final Store.ListableStore bufferStore;
2825
private String archiveComment;
29-
private boolean flushOnWrite;
26+
private final boolean flushOnWrite;
27+
28+
private final Comparator<String[]> zipEntryComparator = (a, b) -> {
29+
boolean aIsZarr = a.length > 0 && a[a.length - 1].equals("zarr.json");
30+
boolean bIsZarr = b.length > 0 && b[b.length - 1].equals("zarr.json");
31+
// first all zarr.json files
32+
if (aIsZarr && !bIsZarr) {
33+
return -1;
34+
} else if (!aIsZarr && bIsZarr) {
35+
return 1;
36+
} else if (aIsZarr && bIsZarr) {
37+
// sort zarr.json in BFS order within same depth by lexicographical order
38+
if (a.length != b.length) {
39+
return Integer.compare(a.length, b.length);
40+
} else {
41+
return String.join("/", a).compareTo(String.join("/", b));
42+
}
43+
} else {
44+
// then all other files in lexicographical order
45+
return String.join("/", a).compareTo(String.join("/", b));
46+
}
47+
};
3048

31-
private void writeBuffer() throws IOException{
49+
private void writeBuffer() throws IOException {
3250
// create zip file bytes from buffer store and write to underlying store
3351
ByteArrayOutputStream baos = new ByteArrayOutputStream();
3452
try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(baos)) {
3553
zos.setUseZip64(Zip64Mode.AsNeeded);
3654
if (archiveComment != null) {
3755
zos.setComment(archiveComment);
3856
}
39-
Stream<String[]> entries = bufferStore.list().sorted(
40-
(a, b) -> {
41-
boolean aIsZarr = a.length > 0 && a[a.length - 1].equals("zarr.json");
42-
boolean bIsZarr = b.length > 0 && b[b.length - 1].equals("zarr.json");
43-
// first all zarr.json files
44-
if (aIsZarr && !bIsZarr) {
45-
return -1;
46-
} else if (!aIsZarr && bIsZarr) {
47-
return 1;
48-
} else if (aIsZarr && bIsZarr) {
49-
// sort zarr.json in BFS order within same depth by lexicographical order
50-
if (a.length != b.length) {
51-
return Integer.compare(a.length, b.length);
52-
} else {
53-
return String.join("/", a).compareTo(String.join("/", b));
54-
}
55-
} else {
56-
// then all other files in lexicographical order
57-
return String.join("/", a).compareTo(String.join("/", b));
58-
}
59-
}
60-
);
61-
62-
entries.forEach(keys -> {
57+
bufferStore.list().sorted(zipEntryComparator).forEach(keys -> {
6358
try {
6459
if (keys == null || keys.length == 0) {
6560
// skip root entry
@@ -116,22 +111,32 @@ private void writeBuffer() throws IOException{
116111
underlyingStore.set(ByteBuffer.wrap(zipBytes));
117112
}
118113

114+
public void setArchiveComment(@Nullable String archiveComment) throws IOException {
115+
this.archiveComment = archiveComment;
116+
if (flushOnWrite) {
117+
writeBuffer();
118+
}
119+
}
120+
121+
public void deleteArchiveComment() throws IOException {
122+
this.setArchiveComment(null);
123+
}
119124

120-
private void loadBuffer() throws IOException{
121-
// read zip file bytes from underlying store and populate buffer store
122-
ByteBuffer buffer = underlyingStore.read();
123-
if (buffer == null) {
124-
return;
125+
/**
126+
* Loads the buffer from the underlying store zip file.
127+
*/
128+
private void loadBuffer() throws IOException {
129+
String loadedArchiveComment = super.getArchiveComment();
130+
if (loadedArchiveComment != null && this.archiveComment == null) {
131+
// don't overwrite existing archiveComment
132+
this.archiveComment = loadedArchiveComment;
125133
}
126-
byte[] bufArray;
127-
if (buffer.hasArray()) {
128-
bufArray = buffer.array();
129-
} else {
130-
bufArray = new byte[buffer.remaining()];
131-
buffer.duplicate().get(bufArray);
134+
135+
InputStream inputStream = underlyingStore.getInputStream();
136+
if (inputStream == null) {
137+
return;
132138
}
133-
this.archiveComment = getZipCommentFromBuffer(bufArray);
134-
try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) {
139+
try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) {
135140
ZipArchiveEntry entry;
136141
while ((entry = zis.getNextEntry()) != null) {
137142
if (entry.isDirectory()) {
@@ -150,7 +155,7 @@ private void loadBuffer() throws IOException{
150155
}
151156

152157
public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment, boolean flushOnWrite) {
153-
this.underlyingStore = underlyingStore;
158+
super(underlyingStore);
154159
this.bufferStore = bufferStore;
155160
this.archiveComment = archiveComment;
156161
this.flushOnWrite = flushOnWrite;
@@ -229,6 +234,7 @@ public void flush() throws IOException {
229234
writeBuffer();
230235
}
231236

237+
@Override
232238
public String getArchiveComment() {
233239
return archiveComment;
234240
}

src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ public String toString() {
153153
public InputStream getInputStream(String[] keys, long start, long end) {
154154
Path keyPath = resolveKeys(keys);
155155
try {
156+
if (!Files.exists(keyPath)) {
157+
return null;
158+
}
156159
InputStream inputStream = Files.newInputStream(keyPath);
157160
if (start > 0) {
158161
long skipped = inputStream.skip(start);
@@ -170,6 +173,7 @@ public InputStream getInputStream(String[] keys, long start, long end) {
170173
throw new RuntimeException(e);
171174
}
172175
}
176+
173177
public long getSize(String[] keys) {
174178
try {
175179
return Files.size(resolveKeys(keys));

src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,12 @@
1515
import java.nio.file.Paths;
1616
import java.util.stream.Stream;
1717

18-
import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer;
19-
2018

2119
/** A Store implementation that provides read-only access to a zip archive stored in an underlying Store.
2220
* Compared to BufferedZipStore, this implementation reads directly from the zip archive without parsing
2321
* its contents into a buffer store first making it more efficient for read-only access to large zip archives.
2422
*/
25-
public class ReadOnlyZipStore implements Store, Store.ListableStore {
26-
27-
private final StoreHandle underlyingStore;
23+
public class ReadOnlyZipStore extends ZipStore {
2824

2925
String resolveKeys(String[] keys) {
3026
return String.join("/", keys);
@@ -51,42 +47,6 @@ public ByteBuffer get(String[] keys, long start) {
5147
return get(keys, start, -1);
5248
}
5349

54-
public String getArchiveComment() throws IOException {
55-
// Attempt to read from the end of the file to find the EOCD record.
56-
// We try a small chunk first (1KB) which covers most short comments (or no comment),
57-
// then the maximum possible EOCD size (approx 65KB).
58-
int[] readSizes = {1024, 65535 + 22};
59-
60-
for (int size : readSizes) {
61-
ByteBuffer buffer;
62-
long fileSize = underlyingStore.getSize();
63-
64-
if (fileSize < size){
65-
buffer = underlyingStore.read();
66-
}
67-
else {
68-
buffer = underlyingStore.read(fileSize - size);
69-
}
70-
71-
if (buffer == null) {
72-
return null;
73-
}
74-
75-
byte[] bufArray;
76-
if (buffer.hasArray()) {
77-
bufArray = buffer.array();
78-
} else {
79-
bufArray = new byte[buffer.remaining()];
80-
buffer.duplicate().get(bufArray);
81-
}
82-
83-
String comment = getZipCommentFromBuffer(bufArray);
84-
if (comment != null) {
85-
return comment;
86-
}
87-
}
88-
return null;
89-
}
9050
@Nullable
9151
@Override
9252
public ByteBuffer get(String[] keys, long start, long end) {
@@ -153,7 +113,7 @@ public String toString() {
153113
}
154114

155115
public ReadOnlyZipStore(@Nonnull StoreHandle underlyingStore) {
156-
this.underlyingStore = underlyingStore;
116+
super(underlyingStore);
157117
}
158118

159119
public ReadOnlyZipStore(@Nonnull Path underlyingStore) {
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package dev.zarr.zarrjava.store;
2+
3+
import javax.annotation.Nonnull;
4+
import javax.annotation.Nullable;
5+
import java.io.IOException;
6+
import java.nio.ByteBuffer;
7+
8+
public abstract class ZipStore implements Store, Store.ListableStore {
9+
protected final StoreHandle underlyingStore;
10+
11+
public ZipStore(@Nonnull StoreHandle underlyingStore) {
12+
this.underlyingStore = underlyingStore;
13+
}
14+
15+
public String getArchiveComment() throws IOException {
16+
// Attempt to read from the end of the file to find the EOCD record.
17+
// We try a small chunk first (1KB) which covers most short comments (or no comment),
18+
// then the maximum possible EOCD size (approx 65KB).
19+
if (!underlyingStore.exists()) {
20+
return null;
21+
}
22+
int[] readSizes = {1024, 65535 + 22};
23+
24+
for (int size : readSizes) {
25+
ByteBuffer buffer;
26+
long fileSize = underlyingStore.getSize();
27+
28+
if (fileSize < size){
29+
buffer = underlyingStore.read();
30+
}
31+
else {
32+
buffer = underlyingStore.read(fileSize - size);
33+
}
34+
35+
if (buffer == null) {
36+
return null;
37+
}
38+
39+
byte[] bufArray;
40+
if (buffer.hasArray()) {
41+
bufArray = buffer.array();
42+
} else {
43+
bufArray = new byte[buffer.remaining()];
44+
buffer.duplicate().get(bufArray);
45+
}
46+
47+
String comment = getZipCommentFromBuffer(bufArray);
48+
if (comment != null) {
49+
return comment;
50+
}
51+
}
52+
return null;
53+
}
54+
55+
// adopted from https://stackoverflow.com/a/9918966
56+
@Nullable
57+
public static String getZipCommentFromBuffer(byte[] bufArray) throws IOException {
58+
// End of Central Directory (EOCD) record magic number
59+
byte[] EOCD = {0x50, 0x4b, 0x05, 0x06};
60+
int buffLen = bufArray.length;
61+
// Check the buffer from the end
62+
search:
63+
for (int i = buffLen - EOCD.length - 22; i >= 0; i--) {
64+
for (int k = 0; k < EOCD.length; k++) {
65+
if (bufArray[i + k] != EOCD[k]) {
66+
continue search;
67+
}
68+
}
69+
// End of Central Directory found!
70+
int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256;
71+
int realLen = buffLen - i - 22;
72+
if (commentLen != realLen) {
73+
throw new IOException("ZIP comment size mismatch: "
74+
+ "directory says len is " + commentLen
75+
+ ", but file ends after " + realLen + " bytes!");
76+
}
77+
return new String(bufArray, i + 22, commentLen);
78+
}
79+
return null;
80+
}
81+
}

src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java

Lines changed: 0 additions & 35 deletions
This file was deleted.

0 commit comments

Comments
 (0)