Skip to content

Commit b50556c

Browse files
On demand blob downloads for patches (#17)
* Only download afterHash blobs, download beforeHash on-demand during rollback This change significantly reduces disk space usage by: - Only downloading afterHash blobs during download/apply/repair - Downloading beforeHash blobs on-demand during rollback operations - Removing beforeHash blobs during cleanup since they're fetched on-demand Changes: - Add getAfterHashBlobs() and getBeforeHashBlobs() to manifest/operations.ts - Update getMissingBlobs() to only check for afterHash blobs - Update savePatch() to skip saving beforeBlobContent - Update cleanupUnusedBlobs() to use getAfterHashBlobs() - Rename gc command to repair (with gc alias for backwards compatibility) - Add --offline flag to apply, rollback, and repair commands - Add blob-fetcher utility for on-demand blob downloading - Add fetchBlob() method to API client for downloading blobs by SHA256 hash - Add comprehensive tests for new functionality 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * feat: add authenticated blob download support - Add orgSlug option to APIClient for authenticated blob downloads - Use authenticated /v0/orgs/{org}/patches/blob/{hash} endpoint when token and orgSlug are available, fall back to public proxy otherwise - Add SOCKET_ORG_SLUG environment variable support This enables blob downloads during rollback to work in CI environments where the public proxy is not accessible. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * fix: use fetchBlobsByHash for rollback beforeHash blobs The rollback command now correctly uses fetchBlobsByHash to download the specific beforeHash blobs it needs, rather than fetchMissingBlobs which only downloads afterHash blobs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> --------- Co-authored-by: Claude <[email protected]>
1 parent aafb172 commit b50556c

17 files changed

+1320
-168
lines changed

src/cli.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { downloadCommand } from './commands/download.js'
77
import { listCommand } from './commands/list.js'
88
import { removeCommand } from './commands/remove.js'
99
import { rollbackCommand } from './commands/rollback.js'
10-
import { gcCommand } from './commands/gc.js'
10+
import { repairCommand } from './commands/repair.js'
1111
import { setupCommand } from './commands/setup.js'
1212

1313
async function main(): Promise<void> {
@@ -20,7 +20,7 @@ async function main(): Promise<void> {
2020
.command(downloadCommand)
2121
.command(listCommand)
2222
.command(removeCommand)
23-
.command(gcCommand)
23+
.command(repairCommand)
2424
.demandCommand(1, 'You must specify a command')
2525
.help()
2626
.alias('h', 'help')

src/commands/apply.ts

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,26 @@ import {
1515
cleanupUnusedBlobs,
1616
formatCleanupResult,
1717
} from '../utils/cleanup-blobs.js'
18+
import {
19+
getMissingBlobs,
20+
fetchMissingBlobs,
21+
formatFetchResult,
22+
} from '../utils/blob-fetcher.js'
1823

1924
interface ApplyArgs {
2025
cwd: string
2126
'dry-run': boolean
2227
silent: boolean
2328
'manifest-path': string
29+
offline: boolean
2430
}
2531

2632
async function applyPatches(
2733
cwd: string,
2834
manifestPath: string,
2935
dryRun: boolean,
3036
silent: boolean,
37+
offline: boolean,
3138
): Promise<{ success: boolean; results: ApplyResult[] }> {
3239
// Read and parse manifest
3340
const manifestContent = await fs.readFile(manifestPath, 'utf-8')
@@ -38,11 +45,48 @@ async function applyPatches(
3845
const socketDir = path.dirname(manifestPath)
3946
const blobsPath = path.join(socketDir, 'blobs')
4047

41-
// Verify blobs directory exists
42-
try {
43-
await fs.access(blobsPath)
44-
} catch {
45-
throw new Error(`Blobs directory not found at ${blobsPath}`)
48+
// Ensure blobs directory exists
49+
await fs.mkdir(blobsPath, { recursive: true })
50+
51+
// Check for and download missing blobs (unless offline)
52+
const missingBlobs = await getMissingBlobs(manifest, blobsPath)
53+
if (missingBlobs.size > 0) {
54+
if (offline) {
55+
if (!silent) {
56+
console.error(
57+
`Error: ${missingBlobs.size} blob(s) are missing and --offline mode is enabled.`,
58+
)
59+
console.error('Run "socket-patch repair" to download missing blobs.')
60+
}
61+
return { success: false, results: [] }
62+
}
63+
64+
if (!silent) {
65+
console.log(`Downloading ${missingBlobs.size} missing blob(s)...`)
66+
}
67+
68+
const fetchResult = await fetchMissingBlobs(manifest, blobsPath, undefined, {
69+
onProgress: silent
70+
? undefined
71+
: (hash, index, total) => {
72+
process.stdout.write(
73+
`\r Downloading ${index}/${total}: ${hash.slice(0, 12)}...`.padEnd(60),
74+
)
75+
},
76+
})
77+
78+
if (!silent) {
79+
// Clear progress line
80+
process.stdout.write('\r' + ' '.repeat(60) + '\r')
81+
console.log(formatFetchResult(fetchResult))
82+
}
83+
84+
if (fetchResult.failed > 0) {
85+
if (!silent) {
86+
console.error('Some blobs could not be downloaded. Cannot apply patches.')
87+
}
88+
return { success: false, results: [] }
89+
}
4690
}
4791

4892
// Find all node_modules directories
@@ -138,6 +182,11 @@ export const applyCommand: CommandModule<{}, ApplyArgs> = {
138182
type: 'string',
139183
default: DEFAULT_PATCH_MANIFEST_PATH,
140184
})
185+
.option('offline', {
186+
describe: 'Do not download missing blobs, fail if any are missing',
187+
type: 'boolean',
188+
default: false,
189+
})
141190
},
142191
handler: async argv => {
143192
try {
@@ -160,6 +209,7 @@ export const applyCommand: CommandModule<{}, ApplyArgs> = {
160209
manifestPath,
161210
argv['dry-run'],
162211
argv.silent,
212+
argv.offline,
163213
)
164214

165215
// Print results if not silent

src/commands/download.test.ts

Lines changed: 49 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import type { PatchResponse } from '../utils/api-client.js'
1212
/**
1313
* Simulates the savePatch function behavior to test blob saving logic
1414
* This mirrors the logic in download.ts
15+
* NOTE: Only saves afterHash blobs - beforeHash blobs are downloaded on-demand during rollback
1516
*/
1617
async function simulateSavePatch(
1718
patch: PatchResponse,
@@ -28,18 +29,12 @@ async function simulateSavePatch(
2829
}
2930

3031
// Save after blob content if provided
32+
// Note: beforeHash blobs are NOT saved here - they are downloaded on-demand during rollback
3133
if (fileInfo.blobContent && fileInfo.afterHash) {
3234
const blobPath = path.join(blobsDir, fileInfo.afterHash)
3335
const blobBuffer = Buffer.from(fileInfo.blobContent, 'base64')
3436
await fs.writeFile(blobPath, blobBuffer)
3537
}
36-
37-
// Save before blob content if provided (for rollback support)
38-
if (fileInfo.beforeBlobContent && fileInfo.beforeHash) {
39-
const blobPath = path.join(blobsDir, fileInfo.beforeHash)
40-
const blobBuffer = Buffer.from(fileInfo.beforeBlobContent, 'base64')
41-
await fs.writeFile(blobPath, blobBuffer)
42-
}
4338
}
4439

4540
return files
@@ -57,7 +52,7 @@ describe('download command', () => {
5752
})
5853

5954
describe('savePatch blob storage', () => {
60-
it('should save both before and after blobs when provided', async () => {
55+
it('should only save after blobs (before blobs are downloaded on-demand)', async () => {
6156
const blobsDir = path.join(testDir, 'blobs1')
6257
await fs.mkdir(blobsDir, { recursive: true })
6358

@@ -87,15 +82,18 @@ describe('download command', () => {
8782

8883
await simulateSavePatch(patch, blobsDir)
8984

90-
// Verify both blobs are saved
85+
// Verify only after blob is saved (before blobs are downloaded on-demand during rollback)
9186
const beforeBlobPath = path.join(blobsDir, beforeHash)
9287
const afterBlobPath = path.join(blobsDir, afterHash)
9388

94-
const beforeBlobContent = await fs.readFile(beforeBlobPath, 'utf-8')
9589
const afterBlobContent = await fs.readFile(afterBlobPath, 'utf-8')
96-
97-
assert.equal(beforeBlobContent, beforeContent)
9890
assert.equal(afterBlobContent, afterContent)
91+
92+
// Before blob should NOT exist (downloaded on-demand during rollback)
93+
await assert.rejects(
94+
async () => fs.access(beforeBlobPath),
95+
/ENOENT/,
96+
)
9997
})
10098

10199
it('should only save after blob when before blob content is not provided', async () => {
@@ -141,7 +139,7 @@ describe('download command', () => {
141139
)
142140
})
143141

144-
it('should handle multiple files with blobs', async () => {
142+
it('should handle multiple files with blobs (only after blobs saved)', async () => {
145143
const blobsDir = path.join(testDir, 'blobs3')
146144
await fs.mkdir(blobsDir, { recursive: true })
147145

@@ -178,26 +176,27 @@ describe('download command', () => {
178176

179177
await simulateSavePatch(patch, blobsDir)
180178

181-
// Verify all blobs are saved
179+
// Verify only after blobs are saved (before blobs are downloaded on-demand)
182180
for (const [, { before, after }] of Object.entries(files)) {
183181
const beforeHash = computeTestHash(before)
184182
const afterHash = computeTestHash(after)
185183

186-
const beforeBlobContent = await fs.readFile(
187-
path.join(blobsDir, beforeHash),
188-
'utf-8',
189-
)
184+
// After blob should exist
190185
const afterBlobContent = await fs.readFile(
191186
path.join(blobsDir, afterHash),
192187
'utf-8',
193188
)
194-
195-
assert.equal(beforeBlobContent, before)
196189
assert.equal(afterBlobContent, after)
190+
191+
// Before blob should NOT exist
192+
await assert.rejects(
193+
async () => fs.access(path.join(blobsDir, beforeHash)),
194+
/ENOENT/,
195+
)
197196
}
198197
})
199198

200-
it('should handle binary file content', async () => {
199+
it('should handle binary file content (only after blob saved)', async () => {
201200
const blobsDir = path.join(testDir, 'blobs4')
202201
await fs.mkdir(blobsDir, { recursive: true })
203202

@@ -228,41 +227,44 @@ describe('download command', () => {
228227

229228
await simulateSavePatch(patch, blobsDir)
230229

231-
// Verify binary blobs are saved correctly
232-
const beforeBlobBuffer = await fs.readFile(path.join(blobsDir, beforeHash))
230+
// Verify only after binary blob is saved
233231
const afterBlobBuffer = await fs.readFile(path.join(blobsDir, afterHash))
234-
235-
assert.deepEqual(beforeBlobBuffer, beforeContent)
236232
assert.deepEqual(afterBlobBuffer, afterContent)
233+
234+
// Before blob should NOT exist
235+
await assert.rejects(
236+
async () => fs.access(path.join(blobsDir, beforeHash)),
237+
/ENOENT/,
238+
)
237239
})
238240

239-
it('should deduplicate blobs with same content', async () => {
241+
it('should deduplicate after blobs with same content', async () => {
240242
const blobsDir = path.join(testDir, 'blobs5')
241243
await fs.mkdir(blobsDir, { recursive: true })
242244

243-
// Same before content for two different files
244-
const sharedContent = 'shared content'
245-
const afterContent1 = 'after1'
246-
const afterContent2 = 'after2'
245+
// Same after content for two different files (to test deduplication)
246+
const sharedAfterContent = 'shared after content'
247+
const beforeContent1 = 'before1'
248+
const beforeContent2 = 'before2'
247249

248-
const sharedHash = computeTestHash(sharedContent)
250+
const sharedAfterHash = computeTestHash(sharedAfterContent)
249251

250252
const patch: PatchResponse = {
251253
uuid: 'test-uuid-5',
252254
purl: 'pkg:npm/[email protected]',
253255
publishedAt: new Date().toISOString(),
254256
files: {
255257
'package/file1.js': {
256-
beforeHash: sharedHash,
257-
afterHash: computeTestHash(afterContent1),
258-
blobContent: Buffer.from(afterContent1).toString('base64'),
259-
beforeBlobContent: Buffer.from(sharedContent).toString('base64'),
258+
beforeHash: computeTestHash(beforeContent1),
259+
afterHash: sharedAfterHash, // Same after hash
260+
blobContent: Buffer.from(sharedAfterContent).toString('base64'),
261+
beforeBlobContent: Buffer.from(beforeContent1).toString('base64'),
260262
},
261263
'package/file2.js': {
262-
beforeHash: sharedHash, // Same before hash
263-
afterHash: computeTestHash(afterContent2),
264-
blobContent: Buffer.from(afterContent2).toString('base64'),
265-
beforeBlobContent: Buffer.from(sharedContent).toString('base64'),
264+
beforeHash: computeTestHash(beforeContent2),
265+
afterHash: sharedAfterHash, // Same after hash
266+
blobContent: Buffer.from(sharedAfterContent).toString('base64'),
267+
beforeBlobContent: Buffer.from(beforeContent2).toString('base64'),
266268
},
267269
},
268270
vulnerabilities: {},
@@ -273,14 +275,18 @@ describe('download command', () => {
273275

274276
await simulateSavePatch(patch, blobsDir)
275277

276-
// Shared blob should exist only once (content-addressable)
278+
// Shared after blob should exist only once (content-addressable)
277279
const blobFiles = await fs.readdir(blobsDir)
278-
const sharedBlobCount = blobFiles.filter(f => f === sharedHash).length
280+
const sharedBlobCount = blobFiles.filter(f => f === sharedAfterHash).length
279281
assert.equal(sharedBlobCount, 1)
280282

283+
// Only 1 blob should be saved (the shared after blob)
284+
// Before blobs are NOT saved
285+
assert.equal(blobFiles.length, 1)
286+
281287
// Content should be correct
282-
const blobContent = await fs.readFile(path.join(blobsDir, sharedHash), 'utf-8')
283-
assert.equal(blobContent, sharedContent)
288+
const blobContent = await fs.readFile(path.join(blobsDir, sharedAfterHash), 'utf-8')
289+
assert.equal(blobContent, sharedAfterContent)
284290
})
285291
})
286292
})

src/commands/download.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ function displaySearchResults(
337337

338338
/**
339339
* Save a patch to the manifest and blobs directory
340+
* Only saves afterHash blobs - beforeHash blobs are downloaded on-demand during rollback
340341
*/
341342
async function savePatch(
342343
patch: PatchResponse,
@@ -349,7 +350,7 @@ async function savePatch(
349350
return false
350351
}
351352

352-
// Save blob contents
353+
// Save blob contents (only afterHash blobs to save disk space)
353354
const files: Record<string, { beforeHash?: string; afterHash?: string }> = {}
354355
for (const [filePath, fileInfo] of Object.entries(patch.files)) {
355356
if (fileInfo.afterHash) {
@@ -360,18 +361,12 @@ async function savePatch(
360361
}
361362

362363
// Save after blob content if provided
364+
// Note: beforeHash blobs are NOT saved here - they are downloaded on-demand during rollback
363365
if (fileInfo.blobContent && fileInfo.afterHash) {
364366
const blobPath = path.join(blobsDir, fileInfo.afterHash)
365367
const blobBuffer = Buffer.from(fileInfo.blobContent, 'base64')
366368
await fs.writeFile(blobPath, blobBuffer)
367369
}
368-
369-
// Save before blob content if provided (for rollback support)
370-
if (fileInfo.beforeBlobContent && fileInfo.beforeHash) {
371-
const blobPath = path.join(blobsDir, fileInfo.beforeHash)
372-
const blobBuffer = Buffer.from(fileInfo.beforeBlobContent, 'base64')
373-
await fs.writeFile(blobPath, blobBuffer)
374-
}
375370
}
376371

377372
// Add/update patch in manifest

0 commit comments

Comments
 (0)