Skip to content

Commit 5e1e217

Browse files
committed
conat: cache subject splitting and consistent hashing for stricky routing
1 parent b4f77d5 commit 5e1e217

File tree

19 files changed

+680
-814
lines changed

19 files changed

+680
-814
lines changed

src/packages/conat/benchmark.ts

Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
#!/usr/bin/env node
2+
3+
import { AsciiTable3 } from "ascii-table3";
4+
5+
import { Patterns } from "./core/patterns";
6+
import { getSplitCacheStats, setSplitCacheEnabled } from "./core/split-cache";
7+
import {
8+
clearConsistentHashCache,
9+
consistentHashingChoice,
10+
getConsistentHashCacheStats,
11+
setConsistentHashCacheEnabled,
12+
} from "./core/sticky";
13+
14+
const ITERATIONS = 10;
15+
const MESSAGE_COUNT = 100_000;
16+
const PATTERN_COUNT = 1_000; // will be proportional to that number
17+
const NS_TO_MS = 1_000_000;
18+
19+
// Helper functions for statistics
20+
function calculateMean(values: number[]): number {
21+
return values.reduce((sum, val) => sum + val, 0) / values.length;
22+
}
23+
24+
function calculateStdDev(values: number[], mean: number): number {
25+
const variance =
26+
values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) /
27+
values.length;
28+
return Math.sqrt(variance);
29+
}
30+
31+
function formatStat(avg: number, std: number): string {
32+
const relativeStd = (std / avg) * 100;
33+
const n = Math.round(avg).toString().padStart(6);
34+
const s = Math.round(relativeStd).toString().padStart(4);
35+
return `${n}±${s}%`;
36+
}
37+
38+
// Generate realistic CoCalc patterns based on CLAUDE.md patterns
39+
function generateRealisticPatterns(num: number): string[] {
40+
const patterns: string[] = [];
41+
42+
// Generate 10000 accounts with 10 interests each
43+
for (let i = 0; i < num; i++) {
44+
const accountId = `${i
45+
.toString()
46+
.padStart(8, "0")}-e89b-12d3-a456-426614174000`;
47+
const services = [
48+
"api",
49+
"projects",
50+
"db",
51+
"purchases",
52+
"jupyter",
53+
"sync",
54+
"org",
55+
"messages",
56+
"llm",
57+
"billing",
58+
];
59+
60+
for (const service of services) {
61+
patterns.push(`hub.account.${accountId}.${service}`);
62+
}
63+
}
64+
65+
// Generate 10000 projects with 3 interests each
66+
for (let i = 0; i < num; i++) {
67+
const projectId = `${i
68+
.toString()
69+
.padStart(8, "0")}-proj-12d3-a456-426614174001`;
70+
const services = ["api", "sync"];
71+
const computeServices = ["terminal"];
72+
73+
// Hub project patterns
74+
for (const service of services) {
75+
patterns.push(`hub.project.${projectId}.${service}`);
76+
}
77+
78+
// Project compute patterns
79+
for (const service of computeServices) {
80+
patterns.push(`project.${projectId}.1.${service}.-`);
81+
}
82+
}
83+
84+
// Additional realistic patterns (1,000 patterns)
85+
const additionalPatterns = [
86+
"time.account-*.api",
87+
"llm.project-*.api",
88+
"system.stats.>",
89+
"browser.session.*.sync",
90+
"notifications.account.*.alerts",
91+
];
92+
93+
for (let i = 0; i < Math.floor(num / 100); i++) {
94+
for (const pattern of additionalPatterns) {
95+
patterns.push(pattern.replace("*", `${i.toString().padStart(6, "0")}`));
96+
}
97+
}
98+
99+
return patterns;
100+
}
101+
102+
// Generate realistic message subjects for testing
103+
function generateRealisticMessages(count: number): string[] {
104+
const messages: string[] = [];
105+
106+
for (let i = 0; i < count; i++) {
107+
const rand = Math.random();
108+
109+
if (rand < 0.7) {
110+
// 70% exact account/project matches
111+
if (Math.random() < 0.6) {
112+
const accountId = `${Math.floor(Math.random() * PATTERN_COUNT)
113+
.toString()
114+
.padStart(8, "0")}-e89b-12d3-a456-426614174000`;
115+
const services = [
116+
"api",
117+
"projects",
118+
"db",
119+
"purchases",
120+
"jupyter",
121+
"sync",
122+
"org",
123+
"messages",
124+
];
125+
const service = services[Math.floor(Math.random() * services.length)];
126+
messages.push(`hub.account.${accountId}.${service}`);
127+
} else {
128+
const projectId = `${Math.floor(Math.random() * PATTERN_COUNT)
129+
.toString()
130+
.padStart(8, "0")}-proj-12d3-a456-426614174001`;
131+
const services = ["api", "sync", "terminal"];
132+
const service = services[Math.floor(Math.random() * services.length)];
133+
if (service === "terminal") {
134+
messages.push(`project.${projectId}.1.${service}.-`);
135+
} else {
136+
messages.push(`hub.project.${projectId}.${service}`);
137+
}
138+
}
139+
} else if (rand < 0.9) {
140+
// 20% stream subjects (multiple matches)
141+
const streamId = Math.floor(Math.random() * Math.floor(PATTERN_COUNT / 100))
142+
.toString()
143+
.padStart(6, "0");
144+
const services = ["time", "llm", "notifications", "browser", "system"];
145+
const service = services[Math.floor(Math.random() * services.length)];
146+
messages.push(`${service}.account-${streamId}.api`);
147+
} else {
148+
// 10% completely random subjects
149+
const segments = Math.floor(Math.random() * 5) + 2;
150+
const parts: string[] = [];
151+
for (let j = 0; j < segments; j++) {
152+
parts.push(`seg${Math.floor(Math.random() * 1000)}`);
153+
}
154+
messages.push(parts.join("."));
155+
}
156+
}
157+
158+
return messages;
159+
}
160+
161+
function benchmark() {
162+
console.log("CoCalc Conat Routing Benchmark");
163+
console.log("===============================");
164+
165+
console.log(
166+
`Running ${ITERATIONS} iterations with ${MESSAGE_COUNT.toLocaleString()} messages each...`,
167+
);
168+
console.log();
169+
170+
// Data structures to collect results across iterations
171+
const variantNames = [
172+
"No Caching",
173+
"Split Cache",
174+
"Hash Cache",
175+
"Both Caches",
176+
];
177+
const variantConfigs = [
178+
[false, false], // No Caching
179+
[true, false], // Split Cache
180+
[false, true], // Hash Cache
181+
[true, true], // Both Caches
182+
];
183+
184+
const results: {
185+
name: string;
186+
setupTimes: number[];
187+
matchTimes: number[];
188+
throughputs: number[];
189+
splitCacheHitRates: number[];
190+
hashCacheHitRates: number[];
191+
}[] = variantNames.map((name) => ({
192+
name,
193+
setupTimes: [],
194+
matchTimes: [],
195+
throughputs: [],
196+
splitCacheHitRates: [],
197+
hashCacheHitRates: [],
198+
}));
199+
200+
// Run iterations
201+
for (let iter = 0; iter < ITERATIONS; iter++) {
202+
console.log(`Iteration ${iter + 1}/${ITERATIONS}...`);
203+
204+
// Generate fresh patterns and messages for each iteration
205+
const patterns = generateRealisticPatterns(PATTERN_COUNT);
206+
const messages = generateRealisticMessages(MESSAGE_COUNT);
207+
208+
// Run all 4 variants on the same data
209+
for (
210+
let variantIndex = 0;
211+
variantIndex < variantNames.length;
212+
variantIndex++
213+
) {
214+
const [splitCacheEnabled, hashCacheEnabled] =
215+
variantConfigs[variantIndex];
216+
const result = results[variantIndex];
217+
218+
// Configure caches
219+
setSplitCacheEnabled(splitCacheEnabled);
220+
setConsistentHashCacheEnabled(hashCacheEnabled);
221+
clearConsistentHashCache(); // Reset cache stats for accurate measurement
222+
223+
const p = new Patterns<string>();
224+
225+
// Setup timing
226+
const startSetup = process.hrtime.bigint();
227+
for (let i = 0; i < patterns.length; i++) {
228+
p.set(patterns[i], `handler-${i}`);
229+
}
230+
const endSetup = process.hrtime.bigint();
231+
const setupTime = Number(endSetup - startSetup) / NS_TO_MS;
232+
result.setupTimes.push(setupTime);
233+
234+
// Create a set of fake targets for consistent hashing simulation
235+
const targets = new Set([
236+
"target1",
237+
"target2",
238+
"target3",
239+
"target4",
240+
"target5",
241+
]);
242+
243+
// Realistic benchmark: pattern matching + target selection (when matches found)
244+
const startMatch = process.hrtime.bigint();
245+
let totalMatches = 0;
246+
let totalTargetSelections = 0;
247+
let messagesWithMatches = 0;
248+
249+
for (const message of messages) {
250+
// Step 1: Pattern matching (uses split cache)
251+
const matches = p.matches(message);
252+
totalMatches += matches.length;
253+
254+
// Step 2: Target selection for each match (simulates realistic routing)
255+
if (matches.length > 0) {
256+
messagesWithMatches++;
257+
// Always use consistent hashing - caching is controlled internally
258+
const selectedTarget = consistentHashingChoice(targets, message);
259+
totalTargetSelections++;
260+
// Use the result to avoid optimization
261+
if (selectedTarget.length === 0) totalTargetSelections--;
262+
}
263+
}
264+
const endMatch = process.hrtime.bigint();
265+
266+
// Consistency check: totalTargetSelections should equal messagesWithMatches
267+
if (totalTargetSelections !== messagesWithMatches) {
268+
console.error(
269+
`Consistency error in ${result.name}: totalTargetSelections=${totalTargetSelections}, messagesWithMatches=${messagesWithMatches}`,
270+
);
271+
}
272+
273+
const matchTime = Number(endMatch - startMatch) / NS_TO_MS;
274+
const throughput = messages.length / (matchTime / 1000);
275+
276+
result.matchTimes.push(matchTime);
277+
result.throughputs.push(throughput);
278+
279+
// Get cache hit rates
280+
const splitStats = getSplitCacheStats();
281+
const splitCacheHitRate =
282+
splitCacheEnabled && splitStats.enabled ? splitStats.hitRate || 0 : 0;
283+
const hashStats = getConsistentHashCacheStats();
284+
const hashCacheHitRate =
285+
hashCacheEnabled && hashStats.enabled ? hashStats.hitRate || 0 : 0;
286+
287+
result.splitCacheHitRates.push(splitCacheHitRate);
288+
result.hashCacheHitRates.push(hashCacheHitRate);
289+
}
290+
}
291+
292+
console.log();
293+
294+
// Calculate statistics and use average times for speedup calculation
295+
const variantAvgTimes: number[] = [];
296+
for (const result of results) {
297+
variantAvgTimes.push(calculateMean(result.matchTimes));
298+
}
299+
const baselineAvgTime = variantAvgTimes[0]; // No Caching average
300+
301+
// Create results table using AsciiTable3
302+
const table = new AsciiTable3("Benchmark Results").setHeading(
303+
"Variant",
304+
"Setup (ms)",
305+
"Match (ms)",
306+
"Throughput",
307+
"Split Hit %",
308+
"Hash Hit %",
309+
"Speedup",
310+
);
311+
312+
for (let i = 0; i < results.length; i++) {
313+
const result = results[i];
314+
const variantAvgTime = variantAvgTimes[i];
315+
316+
// Calculate averages and standard deviations
317+
const setupMean = calculateMean(result.setupTimes);
318+
const setupStd = calculateStdDev(result.setupTimes, setupMean);
319+
const matchMean = calculateMean(result.matchTimes);
320+
const matchStd = calculateStdDev(result.matchTimes, matchMean);
321+
const throughputMean = calculateMean(result.throughputs);
322+
const throughputStd = calculateStdDev(result.throughputs, throughputMean);
323+
const splitCacheMean = calculateMean(result.splitCacheHitRates);
324+
const hashCacheMean = calculateMean(result.hashCacheHitRates);
325+
326+
// Use average time for speedup calculation
327+
const speedup = baselineAvgTime / variantAvgTime;
328+
329+
table.addRow(
330+
result.name,
331+
formatStat(setupMean, setupStd),
332+
formatStat(matchMean, matchStd),
333+
formatStat(throughputMean, throughputStd),
334+
`${splitCacheMean.toFixed(1)}%`.padEnd(12),
335+
`${hashCacheMean.toFixed(1)}%`.padEnd(12),
336+
speedup.toFixed(2),
337+
);
338+
}
339+
340+
table.setStyle("unicode-round");
341+
console.log(table.toString());
342+
console.log();
343+
344+
345+
console.log(
346+
`✅ Completed ${ITERATIONS} iterations with ${
347+
ITERATIONS * 4
348+
} total benchmark runs`,
349+
);
350+
console.log(" All variants ran on identical data for fair comparison");
351+
}
352+
353+
if (require.main === module) {
354+
benchmark();
355+
}

0 commit comments

Comments
 (0)