Skip to content

Commit 9842b47

Browse files
committed
feat[r2]: add gc routine
1 parent 402f182 commit 9842b47

File tree

4 files changed

+116
-7
lines changed

4 files changed

+116
-7
lines changed

src/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,17 @@ import { handleOptions, corsWrapResponse } from "./handlers/handleCors.js"
44
import { handlePostOrPut } from "./handlers/handleWrite.js"
55
import { handleGet } from "./handlers/handleRead.js"
66
import { handleDelete } from "./handlers/handleDelete.js"
7+
import { cleanExpiredInR2 } from "./storage/storage.js"
78

89
export default {
910
async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> {
1011
return await handleRequest(request, env, ctx)
1112
},
13+
14+
// eslint-disable-next-line @typescript-eslint/require-await
15+
async scheduled(controller: ScheduledController, env, ctx) {
16+
ctx.waitUntil(cleanExpiredInR2(env, controller))
17+
},
1218
} satisfies ExportedHandler<Env>
1319

1420
async function handleRequest(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> {

src/storage/storage.ts

Lines changed: 64 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@ import { parseSize } from "../shared.js"
33

44
export type PasteLocation = "KV" | "R2"
55

6+
// since CF does not allow expiration shorter than 60s, extend the expiration to 70s
7+
const PASTE_EXPIRE_SPECIFIED_MIN = 70
8+
9+
/* Since we need the metadata stored in KV to perform R2 cleanup,
10+
the paste in KV should not be deleted until it is cleaned in R2.
11+
We extend the lifetime by 2 days to avoid it being cleaned in VK too early
12+
*/
13+
const PASTE_EXPIRE_EXTENSION_FOR_R2 = 2 * 24 * 60 * 60
14+
615
// TODO: allow admin to upload permanent paste
716
// TODO: add filename length check
817
export type PasteMetadata = {
@@ -102,7 +111,7 @@ export async function getPaste(env: Env, short: string, ctx: ExecutionContext):
102111
if (metadata.location === "R2") {
103112
const object = await env.R2.get(short)
104113
if (object === null) {
105-
throw new WorkerError(404, `cannot find R2 bucket of name '${short}'`)
114+
return null
106115
}
107116
return { paste: object.body, metadata }
108117
} else {
@@ -143,10 +152,12 @@ export async function updatePaste(
143152
},
144153
) {
145154
const expirationUnix = dateToUnix(options.now) + options.expirationSeconds
146-
// since CF does not allow expiration shorter than 60s, extend the expiration to 70s
147-
const expirationUnixSpecified = dateToUnix(options.now) + Math.max(options.expirationSeconds, 70)
155+
let expirationUnixSpecified =
156+
dateToUnix(options.now) + Math.max(options.expirationSeconds, PASTE_EXPIRE_SPECIFIED_MIN)
148157

149158
if (originalMetadata.location === "R2") {
159+
expirationUnixSpecified = expirationUnixSpecified + PASTE_EXPIRE_EXTENSION_FOR_R2
160+
150161
await env.R2.put(pasteName, content)
151162
}
152163
const metadata: PasteMetadata = {
@@ -182,11 +193,13 @@ export async function createPaste(
182193
) {
183194
const expirationUnix = dateToUnix(options.now) + options.expirationSeconds
184195

185-
// since CF does not allow expiration shorter than 60s, extend the expiration to 70s
186-
const expirationUnixSpecified = dateToUnix(options.now) + Math.max(options.expirationSeconds, 70)
196+
let expirationUnixSpecified =
197+
dateToUnix(options.now) + Math.max(options.expirationSeconds, PASTE_EXPIRE_SPECIFIED_MIN)
187198

188199
const location = options.contentLength > parseSize(env.R2_THRESHOLD)! ? "R2" : "KV"
189200
if (location === "R2") {
201+
expirationUnixSpecified = expirationUnixSpecified + PASTE_EXPIRE_EXTENSION_FOR_R2
202+
190203
await env.R2.put(pasteName, content)
191204
}
192205

@@ -226,3 +239,49 @@ export async function deletePaste(env: Env, pasteName: string, originalMetadata:
226239
await env.R2.delete(pasteName)
227240
}
228241
}
242+
243+
export async function cleanExpiredInR2(env: Env, controller: ScheduledController) {
244+
// types generated by wrangler somehow not working, so cast manually
245+
type Listed = {
246+
list_complete: false
247+
keys: KVNamespaceListKey<PasteMetadataInStorage, string>[]
248+
cursor: string
249+
cacheStatus: string | null
250+
}
251+
252+
const nowUnix = controller.scheduledTime / 1000
253+
254+
let numCleaned = 0
255+
const r2NamesToClean: string[] = []
256+
257+
async function clean() {
258+
await env.R2.delete(r2NamesToClean)
259+
numCleaned += r2NamesToClean.length
260+
r2NamesToClean.length = 0
261+
}
262+
263+
let cursor: string | null = null
264+
while (true) {
265+
const listed = (await env.PB.list<PasteMetadataInStorage>({ cursor })) as Listed
266+
267+
cursor = listed.cursor
268+
269+
for (const key of listed.keys) {
270+
if (key.metadata !== undefined) {
271+
const metadata = migratePasteMetadata(key.metadata)
272+
if (metadata.location === "R2" && metadata.willExpireAtUnix < nowUnix) {
273+
r2NamesToClean.push(key.name)
274+
275+
if (r2NamesToClean.length === 1000) {
276+
await clean()
277+
}
278+
}
279+
}
280+
}
281+
282+
if (listed.list_complete) break
283+
}
284+
await clean()
285+
286+
console.log(`${numCleaned} buckets cleaned`)
287+
}

test/r2.spec.ts

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
1-
import { expect, test } from "vitest"
1+
import { expect, test, vi, beforeEach, afterEach } from "vitest"
22
import { areBlobsEqual, genRandomBlob, upload, workerFetch } from "./testUtils"
33
import { parseSize } from "../src/shared"
4-
import { createExecutionContext, env } from "cloudflare:test"
4+
import { createExecutionContext, createScheduledController, env, waitOnExecutionContext } from "cloudflare:test"
5+
import worker from "../src/index.js"
6+
7+
beforeEach(() => {
8+
vi.useFakeTimers()
9+
})
10+
11+
afterEach(() => {
12+
vi.useRealTimers()
13+
})
514

615
test("basic", async () => {
716
const blob1 = genRandomBlob(parseSize(env.R2_THRESHOLD)! * 2)
@@ -26,3 +35,34 @@ test("basic", async () => {
2635
expect(revisitResp.status).toStrictEqual(200)
2736
expect(areBlobsEqual(await revisitResp.blob(), blob2)).toBeTruthy()
2837
})
38+
39+
test("schedule", async () => {
40+
const ctx = createExecutionContext()
41+
42+
// upload
43+
vi.setSystemTime(new Date(2035, 0, 0))
44+
const blob1 = genRandomBlob(parseSize(env.R2_THRESHOLD)! * 2)
45+
const uploadResponse = await upload(ctx, { c: blob1, e: "7d" })
46+
const url = uploadResponse.url
47+
48+
// test get
49+
const getResp = await workerFetch(ctx, url)
50+
expect(getResp.status).toStrictEqual(200)
51+
await getResp.blob() // we must consume body to prevent breaking isolated storage
52+
53+
// go to past, nothing will be cleaned
54+
await worker.scheduled(createScheduledController({ scheduledTime: new Date(2000, 0, 0) }), env, ctx)
55+
await waitOnExecutionContext(ctx)
56+
57+
// test get after cleanup
58+
const getResp1 = await workerFetch(ctx, url)
59+
expect(getResp.status).toStrictEqual(200)
60+
await getResp1.blob()
61+
62+
// jump to 1 year later, now all pastes are expired
63+
await worker.scheduled(createScheduledController({ scheduledTime: new Date(2040, 0, 0) }), env, ctx)
64+
await waitOnExecutionContext(ctx)
65+
66+
// test get after cleanup
67+
expect((await workerFetch(ctx, url)).status).toStrictEqual(404)
68+
})

wrangler.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ directory = "dist/frontend"
1414
run_worker_first = true
1515
binding = "ASSETS"
1616

17+
[triggers]
18+
# clean r2 garbage every day
19+
crons = ["0 0 * * *"]
20+
1721
#----------------------------------------
1822
# lines below are what you should modify
1923
#----------------------------------------

0 commit comments

Comments
 (0)