Skip to content

Commit 1f3050b

Browse files
authored
perf: experiment with an alternate binary search implementation (#9852)
* Optimize the binary search for assets Optimizations: - remove recursion to use a while loop instead - do not allocate unnecessary array (to reduce GC) - convert the manifest to a Uint8Array once upfront - added a faster vesrion of compare (compareSearchValueWithEntry). The compare implementatiopn was moved to the tests where it is used Other changes - BS: use indexes instead of offest (IMO easier to read/debug) - BS: return the content hash instead of the whole entry * Add an experiment for the update search algo * fixup! fallback to the current implementation on error
1 parent 3bdec6b commit 1f3050b

File tree

4 files changed

+397
-3
lines changed

4 files changed

+397
-3
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import {
2+
CONTENT_HASH_OFFSET,
3+
CONTENT_HASH_SIZE,
4+
ENTRY_SIZE,
5+
HEADER_SIZE,
6+
PATH_HASH_SIZE,
7+
} from "../../utils/constants";
8+
9+
export class AssetsManifest {
10+
private data: Uint8Array;
11+
12+
constructor(data: ArrayBuffer) {
13+
this.data = new Uint8Array(data);
14+
}
15+
16+
async get(pathname: string) {
17+
const pathHash = await hashPath(pathname);
18+
const entry = binarySearch(this.data, pathHash);
19+
return entry ? Uint8ToHexString(entry) : null;
20+
}
21+
}
22+
23+
export const hashPath = async (path: string) => {
24+
const encoder = new TextEncoder();
25+
const data = encoder.encode(path);
26+
const hashBuffer = await crypto.subtle.digest(
27+
"SHA-256",
28+
data.buffer as ArrayBuffer
29+
);
30+
return new Uint8Array(hashBuffer, 0, PATH_HASH_SIZE);
31+
};
32+
33+
/**
34+
* Search for an entry with the given hash path.
35+
*
36+
* @param manifest the manifest bytes
37+
* @param pathHash the path hash to find in the manifest
38+
* @returns The content hash when the entry is found and `false` otherwise
39+
*/
40+
export const binarySearch = (
41+
manifest: Uint8Array,
42+
pathHash: Uint8Array
43+
): Uint8Array | false => {
44+
if (pathHash.byteLength !== PATH_HASH_SIZE) {
45+
throw new TypeError(
46+
`Search value should have a length of ${PATH_HASH_SIZE}`
47+
);
48+
}
49+
50+
const numberOfEntries = (manifest.byteLength - HEADER_SIZE) / ENTRY_SIZE;
51+
52+
if (numberOfEntries === 0) {
53+
return false;
54+
}
55+
56+
let lowIndex = 0;
57+
let highIndex = numberOfEntries - 1;
58+
59+
while (lowIndex <= highIndex) {
60+
const middleIndex = (lowIndex + highIndex) >> 1;
61+
62+
const cmp = comparePathHashWithEntry(pathHash, manifest, middleIndex);
63+
64+
if (cmp < 0) {
65+
highIndex = middleIndex - 1;
66+
continue;
67+
}
68+
69+
if (cmp > 0) {
70+
lowIndex = middleIndex + 1;
71+
continue;
72+
}
73+
74+
return new Uint8Array(
75+
manifest.buffer,
76+
HEADER_SIZE + middleIndex * ENTRY_SIZE + CONTENT_HASH_OFFSET,
77+
CONTENT_HASH_SIZE
78+
);
79+
}
80+
81+
return false;
82+
};
83+
84+
/**
85+
* Compares a search value with an entry in the manifest
86+
*
87+
* @param searchValue a `Uint8Array` of size `PATH_HASH_SIZE`
88+
* @param manifest the manifest bytes
89+
* @param entryIndex the index in the manifest of the entry to compare
90+
*/
91+
function comparePathHashWithEntry(
92+
searchValue: Uint8Array,
93+
manifest: Uint8Array,
94+
entryIndex: number
95+
) {
96+
let entryOffset = HEADER_SIZE + entryIndex * ENTRY_SIZE;
97+
for (let offset = 0; offset < PATH_HASH_SIZE; offset++, entryOffset++) {
98+
// We know that both values could not be undefined
99+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
100+
const s = searchValue[offset]!;
101+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
102+
const e = manifest[entryOffset]!;
103+
if (s < e) {
104+
return -1;
105+
}
106+
if (s > e) {
107+
return 1;
108+
}
109+
}
110+
111+
return 0;
112+
}
113+
114+
/**
115+
* Converts an Uint8Array to an hex string
116+
*
117+
* @param array The content hash
118+
* @returns padded hex string
119+
*/
120+
const Uint8ToHexString = (array: Uint8Array) => {
121+
return [...array].map((b) => b.toString(16).padStart(2, "0")).join("");
122+
};

packages/workers-shared/asset-worker/src/experiment-analytics.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ type Data = {
1414
manifestReadTime?: number;
1515

1616
// -- Blobs --
17+
// blob1 current or perfTest version of binary search
18+
binarySearchVersion?: "current" | "perfTest" | "current-fallback";
1719
};
1820

1921
export class ExperimentAnalytics {
@@ -44,7 +46,9 @@ export class ExperimentAnalytics {
4446
doubles: [
4547
this.data.manifestReadTime ?? -1, // double1
4648
],
47-
blobs: [],
49+
blobs: [
50+
this.data.binarySearchVersion, // blob1
51+
],
4852
});
4953
}
5054
}

packages/workers-shared/asset-worker/src/worker.ts

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { setupSentry } from "../../utils/sentry";
44
import { mockJaegerBinding } from "../../utils/tracing";
55
import { Analytics } from "./analytics";
66
import { AssetsManifest } from "./assets-manifest";
7+
import { AssetsManifest as AssetsManifest2 } from "./assets-manifest.2";
78
import { normalizeConfiguration } from "./configuration";
89
import { ExperimentAnalytics } from "./experiment-analytics";
910
import { canFetch, handleRequest } from "./handler";
@@ -231,6 +232,12 @@ export default class<TEnv extends Env = Env> extends WorkerEntrypoint<TEnv> {
231232
pathname: string,
232233
_request?: Request
233234
): Promise<string | null> {
235+
const BINARY_SEARCH_EXPERIMENT_SAMPLE_RATE = 0.05;
236+
const binarySearchVersion =
237+
Math.random() < BINARY_SEARCH_EXPERIMENT_SAMPLE_RATE
238+
? "current"
239+
: "perfTest";
240+
234241
const analytics = new ExperimentAnalytics(this.env.EXPERIMENT_ANALYTICS);
235242
const performance = new PerformanceTimer(this.env.UNSAFE_PERFORMANCE);
236243
const jaeger = this.env.JAEGER ?? mockJaegerBinding();
@@ -243,13 +250,31 @@ export default class<TEnv extends Env = Env> extends WorkerEntrypoint<TEnv> {
243250
analytics.setData({
244251
accountId: this.env.CONFIG.account_id,
245252
experimentName: "manifest-read-timing",
253+
binarySearchVersion,
246254
});
247255
}
248256

249257
const startTimeMs = performance.now();
250258
try {
251-
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
252-
const eTag = await assetsManifest.get(pathname);
259+
let eTag: string | null;
260+
261+
if (binarySearchVersion === "perfTest") {
262+
try {
263+
const assetsManifest = new AssetsManifest2(
264+
this.env.ASSETS_MANIFEST
265+
);
266+
eTag = await assetsManifest.get(pathname);
267+
} catch {
268+
// Fallback to the "current" impl if the new one throws.
269+
// We use "current-fallback" to surface errors in the analytics data
270+
analytics.setData({ binarySearchVersion: "current-fallback" });
271+
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
272+
eTag = await assetsManifest.get(pathname);
273+
}
274+
} else {
275+
const assetsManifest = new AssetsManifest(this.env.ASSETS_MANIFEST);
276+
eTag = await assetsManifest.get(pathname);
277+
}
253278

254279
span.setTags({
255280
path: pathname,

0 commit comments

Comments
 (0)