From c2ea3dad53f63264f638378c884ba435f20d8ab2 Mon Sep 17 00:00:00 2001 From: guoyangzhen Date: Fri, 20 Mar 2026 14:49:11 +0800 Subject: [PATCH 1/3] fix(s3): respect prefix parameter and add pagination for getKeys() (#728) Two bugs fixed: 1. listObjects() accepted a prefix parameter but never used it in the S3 ListObjectsV2 API call. Now appends ?list-type=2&prefix=... to URL. 2. S3 returns max 1000 objects per response. Added pagination loop using NextContinuationToken to fetch all matching keys. --- src/drivers/s3.ts | 76 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/src/drivers/s3.ts b/src/drivers/s3.ts index 170f7aac..a9803435 100644 --- a/src/drivers/s3.ts +++ b/src/drivers/s3.ts @@ -127,12 +127,34 @@ const driver: DriverFactory = (options) => { // https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html const listObjects = async (prefix?: string) => { - const res = await awsFetch(baseURL).then((r) => r?.text()); - if (!res) { - console.log("no list", prefix ? `${baseURL}?prefix=${prefix}` : baseURL); - return null; - } - return parseList(res); + const allKeys: string[] = []; + let continuationToken: string | undefined; + + do { + const params = new URLSearchParams(); + params.set("list-type", "2"); + if (prefix) { + params.set("prefix", prefix); + } + if (continuationToken) { + params.set("continuation-token", continuationToken); + } + + const listURL = `${baseURL}?${params.toString()}`; + const res = await awsFetch(listURL).then((r) => r?.text()); + if (!res) { + break; + } + + const result = parseListResponse(res); + allKeys.push(...result.keys); + + continuationToken = result.isTruncated + ? result.nextContinuationToken + : undefined; + } while (continuationToken); + + return allKeys.length > 0 ? allKeys : null; }; // https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html @@ -239,24 +261,42 @@ async function sha256Base64(str: string) { return btoa(binaryString); } -function parseList(xml: string) { +function parseListResponse(xml: string): { + keys: string[]; + isTruncated: boolean; + nextContinuationToken?: string; +} { if (!xml.startsWith("]*>([\s\S]*)<\/ListBucketResult>/)?.[1]; + const listBucketResult = xml.match( + /]*>([\s\S]*)<\/ListBucketResult>/ + )?.[1]; if (!listBucketResult) { throw new Error("Missing "); } - const contents = listBucketResult.match(/]*>([\s\S]*?)<\/Contents>/g); - if (!contents?.length) { - return []; - } - return contents - .map((content) => { - const key = content.match(/([\s\S]+?)<\/Key>/)?.[1]; - return key; - }) - .filter(Boolean) as string[]; + + const isTruncated = + listBucketResult.match(/([\s\S]*?)<\/IsTruncated>/)?.[1] === + "true"; + const nextContinuationToken = listBucketResult.match( + /([\s\S]*?)<\/NextContinuationToken>/ + )?.[1]; + + const contents = listBucketResult.match( + /]*>([\s\S]*?)<\/Contents>/g + ); + const keys = contents + ? contents + .map((content) => content.match(/([\s\S]+?)<\/Key>/)?.[1]) + .filter(Boolean) + : []; + + return { + keys: keys as string[], + isTruncated, + nextContinuationToken, + }; } export default driver; From e29b17ff26037c9d345a9d183e4a0612c253f7c3 Mon Sep 17 00:00:00 2001 From: guoyangzhen Date: Fri, 20 Mar 2026 20:20:18 +0800 Subject: [PATCH 2/3] fix: add fail-fast for truncated pagination + decode XML-escaped S3 response values - Throw error when IsTruncated=true but NextContinuationToken is missing - Decode XML entities (&lt;, &gt;, &quot;, &apos;, &amp;) in keys and continuation tokens Addresses CodeRabbit review feedback. --- src/drivers/s3.ts | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/drivers/s3.ts b/src/drivers/s3.ts index a9803435..893dc252 100644 --- a/src/drivers/s3.ts +++ b/src/drivers/s3.ts @@ -261,6 +261,15 @@ async function sha256Base64(str: string) { return btoa(binaryString); } +function decodeXmlText(s: string): string { + return s + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/&/g, "&"); +} + function parseListResponse(xml: string): { keys: string[]; isTruncated: boolean; @@ -283,6 +292,13 @@ function parseListResponse(xml: string): { /([\s\S]*?)<\/NextContinuationToken>/ )?.[1]; + if (isTruncated && !nextContinuationToken) { + throw new Error( + "S3 returned IsTruncated=true but no NextContinuationToken — " + + "pagination cannot continue. Check bucket/prefix configuration.", + ); + } + const contents = listBucketResult.match( /]*>([\s\S]*?)<\/Contents>/g ); @@ -290,12 +306,15 @@ function parseListResponse(xml: string): { ? contents .map((content) => content.match(/([\s\S]+?)<\/Key>/)?.[1]) .filter(Boolean) + .map((k) => decodeXmlText(k as string)) : []; return { keys: keys as string[], isTruncated, - nextContinuationToken, + nextContinuationToken: nextContinuationToken + ? decodeXmlText(nextContinuationToken) + : undefined, }; } From db8e8979c56b1715deb5bc96bfbf859b3fd7ad3b Mon Sep 17 00:00:00 2001 From: guoyangzhen Date: Sat, 21 Mar 2026 12:12:13 +0800 Subject: [PATCH 3/3] fix(s3): batch DeleteObjects to respect 1000-key API limit --- src/drivers/s3.ts | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/drivers/s3.ts b/src/drivers/s3.ts index 893dc252..2ce6d701 100644 --- a/src/drivers/s3.ts +++ b/src/drivers/s3.ts @@ -189,22 +189,35 @@ const driver: DriverFactory = (options) => { }; // https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html + // S3 DeleteObjects API supports max 1000 keys per request + const MAX_BULK_DELETE = 1000; + // Bounded concurrency for per-object fallback deletes + const MAX_CONCURRENT_DELETES = 10; + const deleteObjects = async (base: string) => { const keys = await listObjects(base); if (!keys?.length) { return null; } if (options.bulkDelete === false) { - await Promise.all(keys.map((key) => deleteObject(key))); + // Bounded concurrency: process MAX_CONCURRENT_DELETES at a time + for (let i = 0; i < keys.length; i += MAX_CONCURRENT_DELETES) { + const batch = keys.slice(i, i + MAX_CONCURRENT_DELETES); + await Promise.all(batch.map((key) => deleteObject(key))); + } } else { - const body = deleteKeysReq(keys); - await awsFetch(`${baseURL}?delete`, { - method: "POST", - headers: { - "x-amz-checksum-sha256": await sha256Base64(body), - }, - body, - }); + // Chunk into batches of MAX_BULK_DELETE for S3 API limit + for (let i = 0; i < keys.length; i += MAX_BULK_DELETE) { + const chunk = keys.slice(i, i + MAX_BULK_DELETE); + const body = deleteKeysReq(chunk); + await awsFetch(`${baseURL}?delete`, { + method: "POST", + headers: { + "x-amz-checksum-sha256": await sha256Base64(body), + }, + body, + }); + } } };