-
Notifications
You must be signed in to change notification settings - Fork 176
fix(s3): respect prefix parameter and add pagination for getKeys() #760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -127,12 +127,34 @@ const driver: DriverFactory<S3DriverOptions> = (options) => { | |
|
|
||
| // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html | ||
| const listObjects = async (prefix?: string) => { | ||
| const res = await awsFetch(baseURL).then((r) => r?.text()); | ||
| if (!res) { | ||
| console.log("no list", prefix ? `${baseURL}?prefix=${prefix}` : baseURL); | ||
| return null; | ||
| } | ||
| return parseList(res); | ||
| const allKeys: string[] = []; | ||
| let continuationToken: string | undefined; | ||
|
|
||
| do { | ||
| const params = new URLSearchParams(); | ||
| params.set("list-type", "2"); | ||
| if (prefix) { | ||
| params.set("prefix", prefix); | ||
| } | ||
| if (continuationToken) { | ||
| params.set("continuation-token", continuationToken); | ||
| } | ||
|
|
||
| const listURL = `${baseURL}?${params.toString()}`; | ||
| const res = await awsFetch(listURL).then((r) => r?.text()); | ||
| if (!res) { | ||
| break; | ||
| } | ||
|
|
||
| const result = parseListResponse(res); | ||
| allKeys.push(...result.keys); | ||
|
|
||
| continuationToken = result.isTruncated | ||
| ? result.nextContinuationToken | ||
| : undefined; | ||
| } while (continuationToken); | ||
|
|
||
| return allKeys.length > 0 ? allKeys : null; | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| // https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html | ||
|
|
@@ -167,22 +189,35 @@ const driver: DriverFactory<S3DriverOptions> = (options) => { | |
| }; | ||
|
|
||
| // https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html | ||
| // S3 DeleteObjects API supports max 1000 keys per request | ||
| const MAX_BULK_DELETE = 1000; | ||
| // Bounded concurrency for per-object fallback deletes | ||
| const MAX_CONCURRENT_DELETES = 10; | ||
|
|
||
| const deleteObjects = async (base: string) => { | ||
| const keys = await listObjects(base); | ||
| if (!keys?.length) { | ||
| return null; | ||
| } | ||
| if (options.bulkDelete === false) { | ||
| await Promise.all(keys.map((key) => deleteObject(key))); | ||
| // Bounded concurrency: process MAX_CONCURRENT_DELETES at a time | ||
| for (let i = 0; i < keys.length; i += MAX_CONCURRENT_DELETES) { | ||
| const batch = keys.slice(i, i + MAX_CONCURRENT_DELETES); | ||
| await Promise.all(batch.map((key) => deleteObject(key))); | ||
| } | ||
| } else { | ||
| const body = deleteKeysReq(keys); | ||
| await awsFetch(`${baseURL}?delete`, { | ||
| method: "POST", | ||
| headers: { | ||
| "x-amz-checksum-sha256": await sha256Base64(body), | ||
| }, | ||
| body, | ||
| }); | ||
| // Chunk into batches of MAX_BULK_DELETE for S3 API limit | ||
| for (let i = 0; i < keys.length; i += MAX_BULK_DELETE) { | ||
| const chunk = keys.slice(i, i + MAX_BULK_DELETE); | ||
| const body = deleteKeysReq(chunk); | ||
| await awsFetch(`${baseURL}?delete`, { | ||
| method: "POST", | ||
| headers: { | ||
| "x-amz-checksum-sha256": await sha256Base64(body), | ||
| }, | ||
| body, | ||
| }); | ||
|
Comment on lines
+209
to
+219
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's examine the context around the code in question
wc -l src/drivers/s3.tsRepository: unjs/unstorage Length of output: 79 🏁 Script executed: # Read the relevant section and surrounding context
cat -n src/drivers/s3.ts | head -250 | tail -100Repository: unjs/unstorage Length of output: 3861 🏁 Script executed: # Search for the deleteKeysReq function to understand the request format
rg -A 10 "deleteKeysReq" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 653 🏁 Script executed: # Search for awsFetch definition and how it's used
rg -B 5 -A 5 "awsFetch" src/drivers/s3.ts | head -50Repository: unjs/unstorage Length of output: 1525 🏁 Script executed: # Look for any existing response handling patterns in the file
rg "\.text\(\)|\.json\(\)|parseXML|deleteErrors" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 284 🏁 Script executed: # Search for XML parsing functions and response handling patterns
rg "parseListResponse|parseDeleteResponse|parseXML|<Error>|<Deleted>" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 145 🏁 Script executed: # Search for decodeXmlText function mentioned in the fix
rg "decodeXmlText|decodeXml" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 194 🏁 Script executed: # Get more context on parseListResponse to understand the pattern
rg -B 5 -A 15 "parseListResponse" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 1090 🏁 Script executed: # Check if there's any error handling import or utility
rg "createError" src/drivers/s3.ts | head -5Repository: unjs/unstorage Length of output: 164 🏁 Script executed: # Get the full parseListResponse implementation to understand the parsing pattern
rg -A 40 "^function parseListResponse" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 1240 🏁 Script executed: # Look for any existing DeleteObjects response handling
rg -B 5 -A 15 "DeleteObjects|deleteObjects" src/drivers/s3.ts | head -80Repository: unjs/unstorage Length of output: 2229 🏁 Script executed: # Check the full awsFetch implementation to understand error handling
rg -B 5 -A 20 "const awsFetch = " src/drivers/s3.tsRepository: unjs/unstorage Length of output: 856 🏁 Script executed: # Check if there's any response parsing in the bulk delete elsewhere
rg -C 10 "bulkDelete" src/drivers/s3.tsRepository: unjs/unstorage Length of output: 1533 🏁 Script executed: # Look for any tests that might show expected behavior for bulk delete errors
find . -name "*.test.*" -o -name "*.spec.*" | head -10Repository: unjs/unstorage Length of output: 472 🏁 Script executed: # Verify the actual AWS DeleteObjects response format in comments or docs
rg "DeleteResult|<Error>" src/Repository: unjs/unstorage Length of output: 40 🏁 Script executed: # Find S3 test files
find . -name "*s3*test*" -o -name "*s3*spec*"Repository: unjs/unstorage Length of output: 83 🏁 Script executed: # Look for common test patterns for S3 driver
fd "s3" test/Repository: unjs/unstorage Length of output: 81 🏁 Script executed: # Check if there's a test for S3 driver
find test -name "*.ts" | xargs grep -l "s3\|S3Driver" 2>/dev/null | head -5Repository: unjs/unstorage Length of output: 189 🏁 Script executed: # Examine the S3 test file for bulk delete test coverage
cat -n test/drivers/s3.test.ts | head -150Repository: unjs/unstorage Length of output: 3698 🏁 Script executed: # Search for any bulk delete or DeleteObjects tests
rg "clear\|bulk|DeleteObjects" test/drivers/s3.test.tsRepository: unjs/unstorage Length of output: 40 🌐 Web query:
💡 Result: The AWS S3 DeleteObjects API returns an HTTP 200 response with XML in the body containing per-key results. The response format includes: - Deleted array: For each successfully deleted object, contains Key, VersionId (if versioned), DeleteMarker (boolean), and DeleteMarkerVersionId (if applicable). - Errors array: For each failed deletion, contains Key, VersionId (if provided), Code (error code like "NoSuchKey", "AccessDenied"), and Message (error description). Two response modes exist: - Verbose (default): Returns both Deleted and Errors arrays with results for all keys. - Quiet (set Quiet=true in request): Returns only the Errors array; successful deletions are not listed in the response body. Error handling per key: Individual key failures do not fail the entire request. Check the Errors array for specific failures. Always inspect the response, as SDKs may not throw exceptions for partial failures (e.g., Java SDK returns DeleteObjectsResponse with getErrors list; Boto3 returns dict with 'Errors' key). Example response syntax (verbose): example-object QUpfdndhfd8438mnLLU-TXJh0937jS error-object Citations:
Parse S3 The chunking is good, but this code treats any The codebase already has the building blocks: Example fix- await awsFetch(`${baseURL}?delete`, {
+ const res = await awsFetch(`${baseURL}?delete`, {
method: "POST",
headers: {
"x-amz-checksum-sha256": await sha256Base64(body),
},
body,
});
+ const xml = await res!.text();
+ const deleteErrors = [...xml.matchAll(
+ /<Error>[\s\S]*?<Key>([\s\S]*?)<\/Key>[\s\S]*?<Code>([\s\S]*?)<\/Code>/g,
+ )];
+ if (deleteErrors.length > 0) {
+ const [_, key, code] = deleteErrors[0]!;
+ throw createError(
+ DRIVER_NAME,
+ `Failed to delete "${decodeXmlText(key)}" (${code})`,
+ );
+ }🤖 Prompt for AI Agents |
||
| } | ||
| } | ||
| }; | ||
|
|
||
|
|
@@ -239,24 +274,61 @@ async function sha256Base64(str: string) { | |
| return btoa(binaryString); | ||
| } | ||
|
|
||
| function parseList(xml: string) { | ||
| function decodeXmlText(s: string): string { | ||
| return s | ||
| .replace(/</g, "<") | ||
| .replace(/>/g, ">") | ||
| .replace(/"/g, '"') | ||
| .replace(/'/g, "'") | ||
| .replace(/&/g, "&"); | ||
| } | ||
|
|
||
| function parseListResponse(xml: string): { | ||
| keys: string[]; | ||
| isTruncated: boolean; | ||
| nextContinuationToken?: string; | ||
| } { | ||
| if (!xml.startsWith("<?xml")) { | ||
| throw new Error("Invalid XML"); | ||
| } | ||
| const listBucketResult = xml.match(/<ListBucketResult[^>]*>([\s\S]*)<\/ListBucketResult>/)?.[1]; | ||
| const listBucketResult = xml.match( | ||
| /<ListBucketResult[^>]*>([\s\S]*)<\/ListBucketResult>/ | ||
| )?.[1]; | ||
| if (!listBucketResult) { | ||
| throw new Error("Missing <ListBucketResult>"); | ||
| } | ||
| const contents = listBucketResult.match(/<Contents[^>]*>([\s\S]*?)<\/Contents>/g); | ||
| if (!contents?.length) { | ||
| return []; | ||
|
|
||
| const isTruncated = | ||
| listBucketResult.match(/<IsTruncated>([\s\S]*?)<\/IsTruncated>/)?.[1] === | ||
| "true"; | ||
| const nextContinuationToken = listBucketResult.match( | ||
| /<NextContinuationToken>([\s\S]*?)<\/NextContinuationToken>/ | ||
| )?.[1]; | ||
|
|
||
| if (isTruncated && !nextContinuationToken) { | ||
| throw new Error( | ||
| "S3 returned IsTruncated=true but no NextContinuationToken — " + | ||
| "pagination cannot continue. Check bucket/prefix configuration.", | ||
| ); | ||
| } | ||
| return contents | ||
| .map((content) => { | ||
| const key = content.match(/<Key>([\s\S]+?)<\/Key>/)?.[1]; | ||
| return key; | ||
| }) | ||
| .filter(Boolean) as string[]; | ||
|
|
||
| const contents = listBucketResult.match( | ||
| /<Contents[^>]*>([\s\S]*?)<\/Contents>/g | ||
| ); | ||
| const keys = contents | ||
| ? contents | ||
| .map((content) => content.match(/<Key>([\s\S]+?)<\/Key>/)?.[1]) | ||
| .filter(Boolean) | ||
| .map((k) => decodeXmlText(k as string)) | ||
| : []; | ||
|
|
||
| return { | ||
| keys: keys as string[], | ||
| isTruncated, | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| nextContinuationToken: nextContinuationToken | ||
| ? decodeXmlText(nextContinuationToken) | ||
| : undefined, | ||
| }; | ||
| } | ||
|
|
||
| export default driver; | ||
Uh oh!
There was an error while loading. Please reload this page.