Skip to content

Commit 7cdf228

Browse files
authored
Encode binary cache keys as ucs-2 strings (#71224)
Stacked on top of #71221 Originally I had intended for CacheHandlers to be able to pick their ideal encoding for performance and size. When binary cache keys are involved. However, we sometimes have to add our own caching on top of the handler anyway. Since there's no built-in binary hash map implementation in JS itself, it's easier to use strings for this. It's also a simpler protocol if it's always a string type. It's possible to use UCS-2 encoding in JS which allows for arbitrary bytes. So that's what I'm doing in this PR. In the common case, it just looks the same as ~UTF-8~ ASCII strings so it's more compact than Base64. As long as this doesn't pass a UTF-8 encoded boundary which would then error. That's easy to mess up because they often default to serializing invalid characters with the same character - which would lead to potential cache poisoning because different arguments would look the same. However, it is possible to get UCS-2 sequences into JS in other ways, so maybe this issue already exists anyway? With this PR a CacheHandler just have to handle strings - it just have to be aware that they might not be valid Unicode strings.
1 parent 7e232c5 commit 7cdf228

File tree

1 file changed

+21
-32
lines changed

1 file changed

+21
-32
lines changed

packages/next/src/server/use-cache/use-cache-wrapper.ts

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -47,25 +47,18 @@ type CacheEntry = {
4747
}
4848

4949
interface CacheHandler {
50-
get(
51-
cacheKey: string | ArrayBuffer,
52-
implicitTags: string[]
53-
): Promise<undefined | CacheEntry>
54-
set(cacheKey: string | ArrayBuffer, value: Promise<CacheEntry>): Promise<void>
50+
get(cacheKey: string, implicitTags: string[]): Promise<undefined | CacheEntry>
51+
set(cacheKey: string, value: Promise<CacheEntry>): Promise<void>
5552
}
5653

5754
const cacheHandlerMap: Map<string, CacheHandler> = new Map()
5855

5956
// TODO: Move default implementation to be injectable.
6057
const defaultCacheStorage: Map<string, Promise<CacheEntry>> = new Map()
6158
cacheHandlerMap.set('default', {
62-
async get(cacheKey: string | ArrayBuffer): Promise<undefined | CacheEntry> {
59+
async get(cacheKey: string): Promise<undefined | CacheEntry> {
6360
// TODO: Implement proper caching.
64-
const stringCacheKey =
65-
typeof cacheKey === 'string'
66-
? cacheKey
67-
: Buffer.from(cacheKey).toString('base64')
68-
const entry = await defaultCacheStorage.get(stringCacheKey)
61+
const entry = await defaultCacheStorage.get(cacheKey)
6962
if (entry !== undefined) {
7063
const [returnStream, newSaved] = entry.value.tee()
7164
entry.value = newSaved
@@ -78,13 +71,9 @@ cacheHandlerMap.set('default', {
7871
}
7972
return undefined
8073
},
81-
async set(cacheKey: string | ArrayBuffer, promise: Promise<CacheEntry>) {
82-
const stringCacheKey =
83-
typeof cacheKey === 'string'
84-
? cacheKey
85-
: Buffer.from(cacheKey).toString('base64')
74+
async set(cacheKey: string, promise: Promise<CacheEntry>) {
8675
// TODO: Implement proper caching.
87-
defaultCacheStorage.set(stringCacheKey, promise)
76+
defaultCacheStorage.set(cacheKey, promise)
8877
await promise
8978
},
9079
})
@@ -95,7 +84,7 @@ function generateCacheEntry(
9584
cacheScope: undefined | CacheScopeStore,
9685
clientReferenceManifest: DeepReadonly<ClientReferenceManifest>,
9786
cacheHandler: CacheHandler,
98-
serializedCacheKey: string | ArrayBuffer,
87+
serializedCacheKey: string,
9988
encodedArguments: FormData | string,
10089
fn: any
10190
): Promise<ReadableStream> {
@@ -123,7 +112,7 @@ function generateCacheEntryWithRestoredWorkStore(
123112
cacheScope: undefined | CacheScopeStore,
124113
clientReferenceManifest: DeepReadonly<ClientReferenceManifest>,
125114
cacheHandler: CacheHandler,
126-
serializedCacheKey: string | ArrayBuffer,
115+
serializedCacheKey: string,
127116
encodedArguments: FormData | string,
128117
fn: any
129118
) {
@@ -167,7 +156,7 @@ function generateCacheEntryWithCacheContext(
167156
outerWorkUnitStore: WorkUnitStore | undefined,
168157
clientReferenceManifest: DeepReadonly<ClientReferenceManifest>,
169158
cacheHandler: CacheHandler,
170-
serializedCacheKey: string | ArrayBuffer,
159+
serializedCacheKey: string,
171160
encodedArguments: FormData | string,
172161
fn: any
173162
) {
@@ -298,7 +287,7 @@ async function generateCacheEntryImpl(
298287
innerCacheStore: UseCacheStore,
299288
clientReferenceManifest: DeepReadonly<ClientReferenceManifest>,
300289
cacheHandler: CacheHandler,
301-
serializedCacheKey: string | ArrayBuffer,
290+
serializedCacheKey: string,
302291
encodedArguments: FormData | string,
303292
fn: any
304293
): Promise<ReadableStream> {
@@ -360,7 +349,7 @@ async function loadCacheEntry(
360349
cacheScope: undefined | CacheScopeStore,
361350
clientReferenceManifest: DeepReadonly<ClientReferenceManifest>,
362351
cacheHandler: CacheHandler,
363-
serializedCacheKey: string | ArrayBuffer,
352+
serializedCacheKey: string,
364353
encodedArguments: FormData | string,
365354
fn: any
366355
): Promise<ReadableStream> {
@@ -494,10 +483,14 @@ export function cache(kind: string, id: string, fn: any) {
494483
? // Fast path for the simple case for simple inputs. We let the CacheHandler
495484
// Convert it to an ArrayBuffer if it wants to.
496485
encodedArguments
497-
: // The FormData might contain binary data that is not valid UTF-8 so this
498-
// cannot be a string in this case. I.e. .text() is not valid here and it
499-
// is not valid to use TextDecoder on this result.
500-
await new Response(encodedArguments).arrayBuffer()
486+
: // The FormData might contain binary data that is not valid UTF-8 so this cache
487+
// key may generate a UCS-2 string. Passing this to another service needs to be
488+
// aware that the key might not be compatible.
489+
String.fromCodePoint(
490+
...new Uint8Array(
491+
await new Response(encodedArguments).arrayBuffer()
492+
)
493+
)
501494

502495
let stream: ReadableStream
503496

@@ -507,15 +500,11 @@ export function cache(kind: string, id: string, fn: any) {
507500
// String cache key for easier hash mapping.
508501
// Note that we're not worried about collisions between string and base64
509502
// since the string form will always be JSON which doesn't overlap with base64.
510-
const stringCacheKey =
511-
typeof serializedCacheKey === 'string'
512-
? serializedCacheKey
513-
: Buffer.from(serializedCacheKey).toString('base64')
514503
const cachedStream:
515504
| undefined
516505
| Promise<{
517506
value: ReadableStream
518-
}> = cacheScope.cache.get(stringCacheKey)
507+
}> = cacheScope.cache.get(serializedCacheKey)
519508
if (cachedStream !== undefined) {
520509
const entry = await cachedStream
521510
// Get a clone out of the cache.
@@ -538,7 +527,7 @@ export function cache(kind: string, id: string, fn: any) {
538527
)
539528
const streams = teePromiseOfStream(loadedStream)
540529
cacheScope.cache.set(
541-
stringCacheKey,
530+
serializedCacheKey,
542531
cacheScopeEntryFromSecondStream(streams)
543532
)
544533
stream = (await streams)[0]

0 commit comments

Comments
 (0)