Skip to content

Commit 8eb0a4c

Browse files
committed
add gcs integration test for range reading to get the list of files in the zip file
1 parent f667e68 commit 8eb0a4c

File tree

5 files changed

+95
-0
lines changed

5 files changed

+95
-0
lines changed

zipserver/gcs_storage.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,10 @@ func (r *gcsReaderAt) Close() error {
211211
return nil // No resources to release
212212
}
213213

214+
func (r *gcsReaderAt) BytesRead() uint64 {
215+
return r.bytesRead
216+
}
217+
214218
// GetReaderAt returns a ReaderAt for the file, suitable for random access reads.
215219
// This is more efficient than GetFile for operations that only need partial file access.
216220
// maxBytes limits the total bytes that can be read (0 = unlimited).

zipserver/gcs_storage_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package zipserver
22

33
import (
4+
"archive/zip"
5+
"bytes"
46
"context"
57
"crypto/md5"
68
"fmt"
@@ -92,3 +94,83 @@ func TestPutAndDeleteFile(t *testing.T) {
9294
}
9395
})
9496
}
97+
98+
func TestGetReaderAtRangeEfficiency(t *testing.T) {
99+
ctx := context.Background()
100+
101+
withGoogleCloudStorage(t, func(storage Storage, config *Config) {
102+
// Create a zip with uncompressed data to make it large enough to see efficiency gains
103+
var buf bytes.Buffer
104+
zw := zip.NewWriter(&buf)
105+
106+
// Add files with Store (no compression) to ensure predictable size
107+
for i := 0; i < 10; i++ {
108+
header := &zip.FileHeader{
109+
Name: fmt.Sprintf("file%d.bin", i),
110+
Method: zip.Store, // No compression
111+
}
112+
f, err := zw.CreateHeader(header)
113+
if err != nil {
114+
t.Fatalf("create file: %v", err)
115+
}
116+
// Write 100KB of pseudo-random data per file (1MB total)
117+
padding := make([]byte, 100*1024)
118+
for j := range padding {
119+
padding[j] = byte((j * 17) % 256) // Pseudo-random pattern
120+
}
121+
if _, err := f.Write(padding); err != nil {
122+
t.Fatalf("write: %v", err)
123+
}
124+
}
125+
if err := zw.Close(); err != nil {
126+
t.Fatalf("close zip: %v", err)
127+
}
128+
129+
zipData := buf.Bytes()
130+
zipSize := int64(len(zipData))
131+
t.Logf("Test zip size: %d bytes", zipSize)
132+
133+
// Upload the test zip
134+
testKey := "zipserver_range_test.zip"
135+
_, err := storage.PutFile(ctx, config.Bucket, testKey, bytes.NewReader(zipData), PutOptions{
136+
ContentType: "application/zip",
137+
})
138+
if err != nil {
139+
t.Fatalf("upload test zip: %v", err)
140+
}
141+
defer storage.DeleteFile(ctx, config.Bucket, testKey)
142+
143+
// Get a ReaderAt and list the zip contents
144+
readerAt, size, err := storage.GetReaderAt(ctx, config.Bucket, testKey, 0)
145+
if err != nil {
146+
t.Fatalf("GetReaderAt: %v", err)
147+
}
148+
defer readerAt.Close()
149+
150+
if size != zipSize {
151+
t.Fatalf("size mismatch: got %d, expected %d", size, zipSize)
152+
}
153+
154+
// Use zip.NewReader which should only read the central directory
155+
zipReader, err := zip.NewReader(readerAt, size)
156+
if err != nil {
157+
t.Fatalf("zip.NewReader: %v", err)
158+
}
159+
160+
// Verify we got the right files
161+
if len(zipReader.File) != 10 {
162+
t.Fatalf("expected 10 files, got %d", len(zipReader.File))
163+
}
164+
165+
bytesRead := readerAt.BytesRead()
166+
t.Logf("Bytes read: %d / %d (%.2f%%)", bytesRead, zipSize, float64(bytesRead)/float64(zipSize)*100)
167+
168+
// The central directory + EOCD should be much smaller than the full zip
169+
// For a 1MB zip with 10 files, we expect to read only a few KB
170+
// Use 5% as threshold - actual should be < 1%
171+
maxExpectedBytes := uint64(zipSize / 20)
172+
if bytesRead > maxExpectedBytes {
173+
t.Errorf("Read too many bytes: %d > %d (expected < 5%% of file)", bytesRead, maxExpectedBytes)
174+
}
175+
})
176+
}

zipserver/mem_storage.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ func (r *memReaderAt) Close() error {
117117
return nil
118118
}
119119

120+
func (r *memReaderAt) BytesRead() uint64 {
121+
return r.bytesRead
122+
}
123+
120124
func (fs *MemStorage) GetReaderAt(ctx context.Context, bucket, key string, maxBytes uint64) (ReaderAtCloser, int64, error) {
121125
fs.mutex.Lock()
122126
defer fs.mutex.Unlock()

zipserver/s3_storage.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,10 @@ func (r *s3ReaderAt) Close() error {
185185
return nil // No resources to release
186186
}
187187

188+
func (r *s3ReaderAt) BytesRead() uint64 {
189+
return r.bytesRead
190+
}
191+
188192
// GetReaderAt returns a ReaderAt for the file, suitable for random access reads.
189193
// This is more efficient than GetFile for operations that only need partial file access.
190194
// maxBytes limits the total bytes that can be read (0 = unlimited).

zipserver/storage.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ type PutResult struct {
3131
type ReaderAtCloser interface {
3232
io.ReaderAt
3333
io.Closer
34+
BytesRead() uint64 // Returns total bytes read so far
3435
}
3536

3637
// Storage is a place we can get files from, put files into, or delete files from

0 commit comments

Comments
 (0)