Skip to content

Commit a0c35da

Browse files
levbclaude
andcommitted
refactor(storage): address PR #2246 review feedback
Addresses dobrac's review comments on the compression primitives PR: Serialization: - Split serialization.go into serialization_v3.go and serialization_v4.go - Each version's wire format is self-contained in one file - Move Metadata types/constants to metadata.go - Remove unused exported Serialize() wrapper V4 wire format: - Replace packed CompressionTypeNumFrames uint64 with separate CompressionType uint32 + NumFrames uint32 (no bit-shifting) - Remove MaxCompressedHeaderSize limit (uint32 prefix + LZ4 frame boundary are sufficient) Naming and reuse: - Rename AddFrames → SetFrames (replaces, not appends) - Use SetFrames in MergeMappings instead of inline Subset calls - Remove unnecessary maps.Clone in ToDiffHeader Formatting: - Use decimal (%d) instead of hex (%#x) in error messages and String() methods for consistency with the rest of the codebase Tests: - Add test for uploadPartSlices (MD5 hashing, body concatenation) Documentation: - Flag BuildFiles incompleteness: only contains builds from current upload session, not upstream dependencies Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6a6c30e commit a0c35da

File tree

14 files changed

+478
-446
lines changed

14 files changed

+478
-446
lines changed

packages/orchestrator/pkg/sandbox/block/chunk.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ func (c *Chunker) fetch(ctx context.Context, off int64, ft *storage.FrameTable)
213213
if ft.IsCompressed() {
214214
frameStarts, frameSize, err := ft.FrameFor(off)
215215
if err != nil {
216-
return fmt.Errorf("failed to get frame for offset %#x: %w", off, err)
216+
return fmt.Errorf("failed to get frame for offset %d: %w", off, err)
217217
}
218218

219219
chunkOff = frameStarts.U
@@ -302,7 +302,7 @@ func (c *Chunker) runFetch(ctx context.Context, session *fetchSession, offsetU i
302302
_, err = file.GetFrame(ctx, offsetU, ft, compressed, mmapSlice[:session.chunkLen], readSize, onRead)
303303
if err != nil {
304304
timer.RecordRaw(ctx, session.chunkLen, attrs.remoteFailure)
305-
session.setError(fmt.Errorf("failed to fetch data at %#x: %w", offsetU, err), false)
305+
session.setError(fmt.Errorf("failed to fetch data at %d: %w", offsetU, err), false)
306306

307307
return
308308
}

packages/orchestrator/pkg/sandbox/build_upload.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,8 @@ func (p *PendingBuildInfo) applyToHeader(h *headers.Header, fileType string) err
390390
continue
391391
}
392392

393-
if err := mapping.AddFrames(info.ft); err != nil {
394-
return fmt.Errorf("apply frames to mapping at offset %#x for build %s: %w",
393+
if err := mapping.SetFrames(info.ft); err != nil {
394+
return fmt.Errorf("apply frames to mapping at offset %d for build %s: %w",
395395
mapping.Offset, mapping.BuildId.String(), err)
396396
}
397397
}

packages/shared/pkg/storage/compress_frame_table.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ type FrameOffset struct {
5353
}
5454

5555
func (o *FrameOffset) String() string {
56-
return fmt.Sprintf("U:%#x/C:%#x", o.U, o.C)
56+
return fmt.Sprintf("U:%d/C:%d", o.U, o.C)
5757
}
5858

5959
func (o *FrameOffset) Add(f FrameSize) {
@@ -67,7 +67,7 @@ type FrameSize struct {
6767
}
6868

6969
func (s FrameSize) String() string {
70-
return fmt.Sprintf("U:%#x/C:%#x", s.U, s.C)
70+
return fmt.Sprintf("U:%d/C:%d", s.U, s.C)
7171
}
7272

7373
type Range struct {
@@ -76,7 +76,7 @@ type Range struct {
7676
}
7777

7878
func (r Range) String() string {
79-
return fmt.Sprintf("%#x/%#x", r.Start, r.Length)
79+
return fmt.Sprintf("%d/%d", r.Start, r.Length)
8080
}
8181

8282
type FrameTable struct {
@@ -196,7 +196,7 @@ func (ft *FrameTable) FrameFor(offset int64) (starts FrameOffset, size FrameSize
196196
currentOffset.Add(frame)
197197
}
198198

199-
return FrameOffset{}, FrameSize{}, fmt.Errorf("offset %#x is beyond the end of the frame table", offset)
199+
return FrameOffset{}, FrameSize{}, fmt.Errorf("offset %d is beyond the end of the frame table", offset)
200200
}
201201

202202
// GetFetchRange translates a U-space range to C-space using the frame table.
@@ -205,12 +205,12 @@ func (ft *FrameTable) GetFetchRange(rangeU Range) (Range, error) {
205205
if ft.IsCompressed() {
206206
start, size, err := ft.FrameFor(rangeU.Start)
207207
if err != nil {
208-
return Range{}, fmt.Errorf("getting frame for offset %#x: %w", rangeU.Start, err)
208+
return Range{}, fmt.Errorf("getting frame for offset %d: %w", rangeU.Start, err)
209209
}
210210
endOffset := rangeU.Start + int64(rangeU.Length)
211211
frameEnd := start.U + int64(size.U)
212212
if endOffset > frameEnd {
213-
return Range{}, fmt.Errorf("range %v spans beyond frame ending at %#x", rangeU, frameEnd)
213+
return Range{}, fmt.Errorf("range %v spans beyond frame ending at %d", rangeU, frameEnd)
214214
}
215215
fetchRange = Range{
216216
Start: start.C,

packages/shared/pkg/storage/compress_upload.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,6 @@ import (
1414
"golang.org/x/sync/errgroup"
1515
)
1616

17-
// MaxCompressedHeaderSize is the maximum allowed decompressed header size (64 MiB).
18-
// Headers are typically a few hundred KiB (e.g., 100 layers × 256 frames × 32 bytes/frame ≈ 800 KB).
19-
// This is a safety bound to prevent unbounded allocation from corrupt data.
20-
const MaxCompressedHeaderSize = 64 << 20
21-
2217
const (
2318
// DefaultCompressFrameSize is the default uncompressed size of each compression
2419
// frame (2 MiB). Overridable via CompressConfig.FrameSizeKB.

packages/shared/pkg/storage/gcp_multipart_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package storage
22

33
import (
4+
"crypto/md5"
5+
"encoding/base64"
46
"encoding/xml"
57
"fmt"
68
"io"
@@ -115,6 +117,42 @@ func TestMultipartUploader_UploadPart_Success(t *testing.T) {
115117
require.Equal(t, expectedETag, etag)
116118
}
117119

120+
func TestMultipartUploader_UploadPartSlices_Success(t *testing.T) {
121+
t.Parallel()
122+
expectedETag := `"slice-etag"`
123+
slices := [][]byte{[]byte("hello "), []byte("world"), []byte("!")}
124+
125+
// Compute expected MD5 over all slices.
126+
h := md5.New()
127+
for _, s := range slices {
128+
h.Write(s)
129+
}
130+
expectedMD5 := base64.StdEncoding.EncodeToString(h.Sum(nil))
131+
132+
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
133+
assert.Equal(t, "PUT", r.Method)
134+
assert.Contains(t, r.URL.RawQuery, "partNumber=3")
135+
assert.Contains(t, r.URL.RawQuery, "uploadId=test-upload-id")
136+
137+
// Verify MD5 matches the expected hash of all slices.
138+
assert.Equal(t, expectedMD5, r.Header.Get("Content-MD5"))
139+
140+
// Verify body is the concatenation of all slices.
141+
body, err := io.ReadAll(r.Body)
142+
assert.NoError(t, err)
143+
assert.Equal(t, []byte("hello world!"), body)
144+
145+
w.Header().Set("ETag", expectedETag)
146+
w.WriteHeader(http.StatusOK)
147+
})
148+
149+
uploader := createTestMultipartUploader(t, handler)
150+
etag, err := uploader.uploadPartSlices(t.Context(), "test-upload-id", 3, slices)
151+
152+
require.NoError(t, err)
153+
require.Equal(t, expectedETag, etag)
154+
}
155+
118156
func TestMultipartUploader_UploadPart_MissingETag(t *testing.T) {
119157
t.Parallel()
120158
handler := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {

packages/shared/pkg/storage/header/header.go

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ type BuildFileInfo struct {
2525
const NormalizeFixVersion = 3
2626

2727
type Header struct {
28-
Metadata *Metadata
29-
BuildFiles map[uuid.UUID]BuildFileInfo // V4 only: per-build file size + checksum
28+
Metadata *Metadata
29+
// BuildFiles maps build IDs to their file metadata (size + checksum).
30+
// NOTE: This is currently incomplete — it only contains entries for builds
31+
// uploaded within the same layered upload session. Upstream dependency builds
32+
// (from parent templates) are missing, causing a Size() RPC fallback on first
33+
// access. TODO: populate from the orchestrator's template cache at upload time
34+
// so all builds referenced in Mapping have entries here.
35+
BuildFiles map[uuid.UUID]BuildFileInfo
3036
blockStarts *bitset.BitSet
3137
startMap map[int64]*BuildMap
3238

@@ -135,7 +141,7 @@ func (t *Header) Mappings(all bool) string {
135141
if m.FrameTable != nil {
136142
frames = len(m.FrameTable.Frames)
137143
}
138-
result += fmt.Sprintf(" - Offset: %#x, Length: %#x, BuildId: %s, BuildStorageOffset: %#x, numFrames: %d\n",
144+
result += fmt.Sprintf(" - Offset: %d, Length: %d, BuildId: %s, BuildStorageOffset: %d, numFrames: %d\n",
139145
m.Offset,
140146
m.Length,
141147
m.BuildId.String(),
@@ -272,7 +278,7 @@ func ValidateHeader(h *Header) error {
272278

273279
// Check that first mapping starts at 0
274280
if sortedMappings[0].Offset != 0 {
275-
return fmt.Errorf("mappings don't start at 0: first mapping starts at %#x for buildId %s",
281+
return fmt.Errorf("mappings don't start at 0: first mapping starts at %d for buildId %s",
276282
sortedMappings[0].Offset, h.Metadata.BuildId.String())
277283
}
278284

@@ -282,11 +288,11 @@ func ValidateHeader(h *Header) error {
282288
nextStart := sortedMappings[i+1].Offset
283289

284290
if currentEnd < nextStart {
285-
return fmt.Errorf("gap in mappings: mapping[%d] ends at %#x but mapping[%d] starts at %#x (gap=%d bytes) for buildId %s",
291+
return fmt.Errorf("gap in mappings: mapping[%d] ends at %d but mapping[%d] starts at %d (gap=%d bytes) for buildId %s",
286292
i, currentEnd, i+1, nextStart, nextStart-currentEnd, h.Metadata.BuildId.String())
287293
}
288294
if currentEnd > nextStart {
289-
return fmt.Errorf("overlap in mappings: mapping[%d] ends at %#x but mapping[%d] starts at %#x (overlap=%d bytes) for buildId %s",
295+
return fmt.Errorf("overlap in mappings: mapping[%d] ends at %d but mapping[%d] starts at %d (overlap=%d bytes) for buildId %s",
290296
i, currentEnd, i+1, nextStart, currentEnd-nextStart, h.Metadata.BuildId.String())
291297
}
292298
}
@@ -295,42 +301,42 @@ func ValidateHeader(h *Header) error {
295301
lastMapping := sortedMappings[len(sortedMappings)-1]
296302
lastEnd := lastMapping.Offset + lastMapping.Length
297303
if lastEnd < h.Metadata.Size {
298-
return fmt.Errorf("mappings don't cover entire file: last mapping ends at %#x but file size is %#x (missing %d bytes) for buildId %s",
304+
return fmt.Errorf("mappings don't cover entire file: last mapping ends at %d but file size is %d (missing %d bytes) for buildId %s",
299305
lastEnd, h.Metadata.Size, h.Metadata.Size-lastEnd, h.Metadata.BuildId.String())
300306
}
301307

302308
// Allow last mapping to extend up to one block past size (for alignment)
303309
if lastEnd > h.Metadata.Size+h.Metadata.BlockSize {
304-
return fmt.Errorf("last mapping extends too far: ends at %#x but file size is %#x (overhang=%d bytes, max allowed=%d) for buildId %s",
310+
return fmt.Errorf("last mapping extends too far: ends at %d but file size is %d (overhang=%d bytes, max allowed=%d) for buildId %s",
305311
lastEnd, h.Metadata.Size, lastEnd-h.Metadata.Size, h.Metadata.BlockSize, h.Metadata.BuildId.String())
306312
}
307313

308314
// Validate individual mapping bounds
309315
for i, m := range h.Mapping {
310316
if m.Offset > h.Metadata.Size {
311-
return fmt.Errorf("mapping[%d] has Offset %#x beyond header size %#x for buildId %s",
317+
return fmt.Errorf("mapping[%d] has Offset %d beyond header size %d for buildId %s",
312318
i, m.Offset, h.Metadata.Size, m.BuildId.String())
313319
}
314320
if m.Length == 0 {
315-
return fmt.Errorf("mapping[%d] has zero length at offset %#x for buildId %s",
321+
return fmt.Errorf("mapping[%d] has zero length at offset %d for buildId %s",
316322
i, m.Offset, m.BuildId.String())
317323
}
318324
}
319325

320326
return nil
321327
}
322328

323-
// AddFrames associates compression frame information with this header's mappings.
329+
// SetFrames associates compression frame information with this header's mappings.
324330
//
325331
// Only mappings matching this header's BuildId will be updated. Returns nil if frameTable is nil.
326-
func (t *Header) AddFrames(frameTable *storage.FrameTable) error {
332+
func (t *Header) SetFrames(frameTable *storage.FrameTable) error {
327333
if frameTable == nil {
328334
return nil
329335
}
330336

331337
for _, mapping := range t.Mapping {
332338
if mapping.BuildId == t.Metadata.BuildId {
333-
if err := mapping.AddFrames(frameTable); err != nil {
339+
if err := mapping.SetFrames(frameTable); err != nil {
334340
return err
335341
}
336342
}

packages/shared/pkg/storage/header/mapping.go

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func (mapping *BuildMap) Copy() *BuildMap {
3232
}
3333
}
3434

35-
// AddFrames associates compression frame information with this mapping.
35+
// SetFrames associates compression frame information with this mapping.
3636
//
3737
// When a file is uploaded with compression, the compressor produces a FrameTable
3838
// that describes how the compressed data is organized into frames. This method
@@ -41,7 +41,7 @@ func (mapping *BuildMap) Copy() *BuildMap {
4141
//
4242
// Returns nil if frameTable is nil. Returns an error if the mapping's range
4343
// cannot be found in the frame table.
44-
func (mapping *BuildMap) AddFrames(frameTable *storage.FrameTable) error {
44+
func (mapping *BuildMap) SetFrames(frameTable *storage.FrameTable) error {
4545
if frameTable == nil {
4646
return nil
4747
}
@@ -53,7 +53,7 @@ func (mapping *BuildMap) AddFrames(frameTable *storage.FrameTable) error {
5353

5454
subset, err := frameTable.Subset(mappedRange)
5555
if err != nil {
56-
return fmt.Errorf("mapping at virtual offset %#x (storage offset %#x, length %#x): %w",
56+
return fmt.Errorf("mapping at virtual offset %d (storage offset %d, length %d): %w",
5757
mapping.Offset, mapping.BuildStorageOffset, mapping.Length, err)
5858
}
5959

@@ -131,7 +131,6 @@ func MergeMappings(
131131

132132
mappings := make([]*BuildMap, 0)
133133

134-
var err error
135134
var baseIdx int
136135
var diffIdx int
137136

@@ -195,9 +194,8 @@ func MergeMappings(
195194
// the build storage offset is the same as the base mapping
196195
BuildStorageOffset: base.BuildStorageOffset,
197196
}
198-
leftBase.FrameTable, err = base.FrameTable.Subset(storage.Range{Start: int64(leftBase.BuildStorageOffset), Length: int(leftBase.Length)})
199-
if err != nil {
200-
return nil, fmt.Errorf("subset frame table for left split at offset %#x: %w", leftBase.Offset, err)
197+
if err := leftBase.SetFrames(base.FrameTable); err != nil {
198+
return nil, fmt.Errorf("set frames for left split at offset %d: %w", leftBase.Offset, err)
201199
}
202200

203201
mappings = append(mappings, leftBase)
@@ -217,9 +215,8 @@ func MergeMappings(
217215
BuildId: base.BuildId,
218216
BuildStorageOffset: base.BuildStorageOffset + uint64(rightBaseShift),
219217
}
220-
rightBase.FrameTable, err = base.FrameTable.Subset(storage.Range{Start: int64(rightBase.BuildStorageOffset), Length: int(rightBase.Length)})
221-
if err != nil {
222-
return nil, fmt.Errorf("subset frame table for right split at offset %#x: %w", rightBase.Offset, err)
218+
if err := rightBase.SetFrames(base.FrameTable); err != nil {
219+
return nil, fmt.Errorf("set frames for right split at offset %d: %w", rightBase.Offset, err)
223220
}
224221

225222
baseMapping[baseIdx] = rightBase
@@ -248,9 +245,8 @@ func MergeMappings(
248245
BuildId: base.BuildId,
249246
BuildStorageOffset: base.BuildStorageOffset + uint64(rightBaseShift),
250247
}
251-
rightBase.FrameTable, err = base.FrameTable.Subset(storage.Range{Start: int64(rightBase.BuildStorageOffset), Length: int(rightBase.Length)})
252-
if err != nil {
253-
return nil, fmt.Errorf("subset frame table for right split at offset %#x: %w", rightBase.Offset, err)
248+
if err := rightBase.SetFrames(base.FrameTable); err != nil {
249+
return nil, fmt.Errorf("set frames for right split at offset %d: %w", rightBase.Offset, err)
254250
}
255251

256252
baseMapping[baseIdx] = rightBase
@@ -273,9 +269,8 @@ func MergeMappings(
273269
BuildId: base.BuildId,
274270
BuildStorageOffset: base.BuildStorageOffset,
275271
}
276-
leftBase.FrameTable, err = base.FrameTable.Subset(storage.Range{Start: int64(leftBase.BuildStorageOffset), Length: int(leftBase.Length)})
277-
if err != nil {
278-
return nil, fmt.Errorf("subset frame table for left split at offset %#x: %w", leftBase.Offset, err)
272+
if err := leftBase.SetFrames(base.FrameTable); err != nil {
273+
return nil, fmt.Errorf("set frames for left split at offset %d: %w", leftBase.Offset, err)
279274
}
280275

281276
mappings = append(mappings, leftBase)

0 commit comments

Comments
 (0)