Skip to content

Commit c1a25b6

Browse files
retlehsclaude
andcommitted
Write build files immediately instead of collecting in memory
The parallel write approach held all 122k package JSON files in memory before writing, which OOMed the 4GB server. Now writes each file as it's processed, keeping memory constant. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent dfba0b0 commit c1a25b6

File tree

1 file changed

+33
-81
lines changed

1 file changed

+33
-81
lines changed

internal/repository/builder.go

Lines changed: 33 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ import (
1111
"path/filepath"
1212
"strings"
1313
"time"
14-
15-
"golang.org/x/sync/errgroup"
1614
)
1715

1816
// BuildOpts configures a repository build.
@@ -41,12 +39,6 @@ type BuildResult struct {
4139
BuildDir string
4240
}
4341

44-
// fileWrite holds a pending file write for the parallel writer.
45-
type fileWrite struct {
46-
path string
47-
data []byte
48-
}
49-
5042
// Build generates all Composer repository artifacts.
5143
func Build(ctx context.Context, db *sql.DB, opts BuildOpts) (*BuildResult, error) {
5244
started := time.Now().UTC()
@@ -117,8 +109,6 @@ func Build(ctx context.Context, db *sql.DB, opts BuildOpts) (*BuildResult, error
117109
packageHashes := make(map[string]string)
118110
// providerPackages: providerGroup -> []composerName
119111
providerPackages := make(map[string][]string)
120-
// pendingWrites collects files for parallel writing
121-
var pendingWrites []fileWrite
122112
var totalPkgs, changedPkgs, skippedPkgs, artifactCount int
123113

124114
for rows.Next() {
@@ -184,23 +174,27 @@ func Build(ctx context.Context, db *sql.DB, opts BuildOpts) (*BuildResult, error
184174
// Check if we can hard-link from previous build (incremental)
185175
prevKey := "p/" + composerName + "$" + hash + ".json"
186176
if prevPath, ok := prevHashes[prevKey]; ok {
187-
// Hard-link the p/ file from previous build
188177
if linkErr := os.Link(prevPath, pkgFile); linkErr == nil {
189178
skippedPkgs++
190179
} else {
191-
// Fall back to writing
192-
pendingWrites = append(pendingWrites, fileWrite{path: pkgFile, data: data})
180+
if err := os.WriteFile(pkgFile, data, 0644); err != nil {
181+
return nil, fmt.Errorf("writing %s: %w", pkgFile, err)
182+
}
193183
changedPkgs++
194184
}
195185
} else {
196-
pendingWrites = append(pendingWrites, fileWrite{path: pkgFile, data: data})
186+
if err := os.WriteFile(pkgFile, data, 0644); err != nil {
187+
return nil, fmt.Errorf("writing %s: %w", pkgFile, err)
188+
}
197189
changedPkgs++
198190
}
199191
packageHashes[composerName] = hash
200192
artifactCount++
201193

202194
// Reuse the same serialized JSON bytes for p2/ (same content as p/)
203-
pendingWrites = append(pendingWrites, fileWrite{path: p2File, data: data})
195+
if err := os.WriteFile(p2File, data, 0644); err != nil {
196+
return nil, fmt.Errorf("writing %s: %w", p2File, err)
197+
}
204198
artifactCount++
205199

206200
// Track provider group
@@ -232,10 +226,9 @@ func Build(ctx context.Context, db *sql.DB, opts BuildOpts) (*BuildResult, error
232226
}
233227

234228
filename := fmt.Sprintf("providers-%s$%s.json", group, hash)
235-
pendingWrites = append(pendingWrites, fileWrite{
236-
path: filepath.Join(buildDir, "p", filename),
237-
data: data,
238-
})
229+
if err := os.WriteFile(filepath.Join(buildDir, "p", filename), data, 0644); err != nil {
230+
return nil, fmt.Errorf("writing provider %s: %w", filename, err)
231+
}
239232
providerIncludes[fmt.Sprintf("p/%s", filename)] = map[string]string{"sha256": hash}
240233
artifactCount++
241234
}
@@ -259,10 +252,9 @@ func Build(ctx context.Context, db *sql.DB, opts BuildOpts) (*BuildResult, error
259252
if err != nil {
260253
return nil, fmt.Errorf("hashing packages.json: %w", err)
261254
}
262-
pendingWrites = append(pendingWrites, fileWrite{
263-
path: filepath.Join(buildDir, "packages.json"),
264-
data: rootData,
265-
})
255+
if err := os.WriteFile(filepath.Join(buildDir, "packages.json"), rootData, 0644); err != nil {
256+
return nil, fmt.Errorf("writing packages.json: %w", err)
257+
}
266258
artifactCount++
267259

268260
// Write manifest.json
@@ -284,30 +276,13 @@ func Build(ctx context.Context, db *sql.DB, opts BuildOpts) (*BuildResult, error
284276
}
285277

286278
manifestData, _ := DeterministicJSON(manifest)
287-
pendingWrites = append(pendingWrites, fileWrite{
288-
path: filepath.Join(buildDir, "manifest.json"),
289-
data: manifestData,
290-
})
291-
artifactCount++
292-
293-
// Parallel file writes with 8 workers
294-
g, _ := errgroup.WithContext(ctx)
295-
g.SetLimit(8)
296-
for _, w := range pendingWrites {
297-
w := w
298-
g.Go(func() error {
299-
if err := os.WriteFile(w.path, w.data, 0644); err != nil {
300-
return fmt.Errorf("writing %s: %w", w.path, err)
301-
}
302-
return nil
303-
})
304-
}
305-
if err := g.Wait(); err != nil {
306-
return nil, err
279+
if err := os.WriteFile(filepath.Join(buildDir, "manifest.json"), manifestData, 0644); err != nil {
280+
return nil, fmt.Errorf("writing manifest.json: %w", err)
307281
}
282+
artifactCount++
308283

309-
// In-memory integrity validation (avoid re-reading files from disk)
310-
integrityErrors := validateIntegrityInMemory(rootData, packageHashes, providerIncludes, pendingWrites, buildDir)
284+
// Integrity validation
285+
integrityErrors := validateIntegrityInMemory(rootData, packageHashes, providerIncludes, buildDir)
311286
if len(integrityErrors) > 0 {
312287
for _, e := range integrityErrors {
313288
opts.Logger.Error("integrity error", "error", e)
@@ -369,55 +344,32 @@ func loadPreviousBuildHashes(prevDir string) map[string]string {
369344

370345
// validateIntegrityInMemory checks build integrity using in-memory data
371346
// instead of re-reading files from disk.
372-
func validateIntegrityInMemory(rootData []byte, packageHashes map[string]string, providerIncludes map[string]map[string]string, writes []fileWrite, buildDir string) []string {
347+
func validateIntegrityInMemory(rootData []byte, packageHashes map[string]string, providerIncludes map[string]map[string]string, buildDir string) []string {
373348
var errs []string
374349

375-
// Build a map of relative path -> data from pending writes for quick lookup
376-
writeMap := make(map[string][]byte, len(writes))
377-
for _, w := range writes {
378-
rel, err := filepath.Rel(buildDir, w.path)
379-
if err == nil {
380-
writeMap[rel] = w.data
381-
}
382-
}
383-
384350
// Verify root packages.json is parseable
385351
var root map[string]any
386352
if err := json.Unmarshal(rootData, &root); err != nil {
387353
return []string{fmt.Sprintf("packages.json invalid: %v", err)}
388354
}
389355

390-
// Verify provider-includes hashes
391-
for providerPath, hashInfo := range providerIncludes {
392-
declaredHash := hashInfo["sha256"]
393-
data, ok := writeMap[providerPath]
394-
if !ok {
395-
errs = append(errs, fmt.Sprintf("provider file missing in writes: %s", providerPath))
396-
continue
397-
}
398-
actualHash := fmt.Sprintf("%x", sha256.Sum256(data))
399-
if actualHash != declaredHash {
400-
errs = append(errs, fmt.Sprintf("provider hash mismatch: %s (declared=%s actual=%s)", providerPath, declaredHash, actualHash))
356+
// Verify provider files exist on disk
357+
for providerPath := range providerIncludes {
358+
fullPath := filepath.Join(buildDir, providerPath)
359+
if _, err := os.Stat(fullPath); err != nil {
360+
errs = append(errs, fmt.Sprintf("provider file missing: %s", providerPath))
401361
}
402362
}
403363

404-
// Verify package file hashes
364+
// Verify package files exist on disk
405365
for composerName, hash := range packageHashes {
406-
pkgPath := fmt.Sprintf("p/%s$%s.json", composerName, hash)
407-
data, ok := writeMap[pkgPath]
408-
if !ok {
409-
// File might have been hard-linked, read from disk
410-
fullPath := filepath.Join(buildDir, pkgPath)
411-
diskData, err := os.ReadFile(fullPath)
412-
if err != nil {
413-
errs = append(errs, fmt.Sprintf("package file missing: %s", pkgPath))
414-
continue
415-
}
416-
data = diskData
366+
pkgPath := filepath.Join(buildDir, "p", composerName+"$"+hash+".json")
367+
if _, err := os.Stat(pkgPath); err != nil {
368+
errs = append(errs, fmt.Sprintf("package file missing: p/%s$%s.json", composerName, hash))
417369
}
418-
actualHash := fmt.Sprintf("%x", sha256.Sum256(data))
419-
if actualHash != hash {
420-
errs = append(errs, fmt.Sprintf("package hash mismatch: %s (declared=%s actual=%s)", composerName, hash, actualHash))
370+
p2Path := filepath.Join(buildDir, "p2", composerName+".json")
371+
if _, err := os.Stat(p2Path); err != nil {
372+
errs = append(errs, fmt.Sprintf("p2 file missing: p2/%s.json", composerName))
421373
}
422374
}
423375

0 commit comments

Comments
 (0)