Skip to content

Commit 71cf652

Browse files
committed
conat: cache subject splitting and consistent hashing for stricky routing
1 parent b4f77d5 commit 71cf652

File tree

19 files changed

+685
-815
lines changed

19 files changed

+685
-815
lines changed

src/packages/conat/benchmark.ts

Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
#!/usr/bin/env node
2+
3+
import { AsciiTable3 } from "ascii-table3";
4+
5+
import { Patterns } from "./core/patterns";
6+
import { getSplitCacheStats, setSplitCacheEnabled } from "./core/split-cache";
7+
import {
8+
clearConsistentHashCache,
9+
consistentHashingChoice,
10+
getConsistentHashCacheStats,
11+
setConsistentHashCacheEnabled,
12+
} from "./core/sticky";
13+
14+
const ITERATIONS = 10;
15+
const MESSAGE_COUNT = 100_000;
16+
const PATTERN_COUNT = 1_000; // will be proportional to that number
17+
const NS_TO_MS = 1_000_000;
18+
19+
// Helper functions for statistics
20+
function calculateMean(values: number[]): number {
21+
return values.reduce((sum, val) => sum + val, 0) / values.length;
22+
}
23+
24+
function calculateStdDev(values: number[], mean: number): number {
25+
const variance =
26+
values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) /
27+
values.length;
28+
return Math.sqrt(variance);
29+
}
30+
31+
function formatStat(avg: number, std: number): string {
32+
const relativeStd = (std / avg) * 100;
33+
const n = Math.round(avg).toString().padStart(6);
34+
if (isNaN(relativeStd) || !isFinite(relativeStd)) {
35+
return `${n}`.padEnd(10);
36+
}
37+
const s = Math.round(relativeStd).toString().padStart(4);
38+
return `${n} ±${s}%`;
39+
}
40+
41+
// Generate realistic CoCalc patterns based on CLAUDE.md patterns
42+
function generateRealisticPatterns(num: number): string[] {
43+
const patterns: string[] = [];
44+
45+
// Generate 10000 accounts with 10 interests each
46+
for (let i = 0; i < num; i++) {
47+
const accountId = `${i
48+
.toString()
49+
.padStart(8, "0")}-e89b-12d3-a456-426614174000`;
50+
const services = [
51+
"api",
52+
"projects",
53+
"db",
54+
"purchases",
55+
"jupyter",
56+
"sync",
57+
"org",
58+
"messages",
59+
"llm",
60+
"billing",
61+
];
62+
63+
for (const service of services) {
64+
patterns.push(`hub.account.${accountId}.${service}`);
65+
}
66+
}
67+
68+
// Generate 10000 projects with 3 interests each
69+
for (let i = 0; i < num; i++) {
70+
const projectId = `${i
71+
.toString()
72+
.padStart(8, "0")}-proj-12d3-a456-426614174001`;
73+
const services = ["api", "sync"];
74+
const computeServices = ["terminal"];
75+
76+
// Hub project patterns
77+
for (const service of services) {
78+
patterns.push(`hub.project.${projectId}.${service}`);
79+
}
80+
81+
// Project compute patterns
82+
for (const service of computeServices) {
83+
patterns.push(`project.${projectId}.1.${service}.-`);
84+
}
85+
}
86+
87+
// Additional realistic patterns (1,000 patterns)
88+
const additionalPatterns = [
89+
"time.account-*.api",
90+
"llm.project-*.api",
91+
"system.stats.>",
92+
"browser.session.*.sync",
93+
"notifications.account.*.alerts",
94+
];
95+
96+
for (let i = 0; i < Math.floor(num / 100); i++) {
97+
for (const pattern of additionalPatterns) {
98+
patterns.push(pattern.replace("*", `${i.toString().padStart(6, "0")}`));
99+
}
100+
}
101+
102+
return patterns;
103+
}
104+
105+
// Generate realistic message subjects for testing
106+
function generateRealisticMessages(count: number): string[] {
107+
const messages: string[] = [];
108+
109+
for (let i = 0; i < count; i++) {
110+
const rand = Math.random();
111+
112+
if (rand < 0.7) {
113+
// 70% exact account/project matches
114+
if (Math.random() < 0.6) {
115+
const accountId = `${Math.floor(Math.random() * PATTERN_COUNT)
116+
.toString()
117+
.padStart(8, "0")}-e89b-12d3-a456-426614174000`;
118+
const services = [
119+
"api",
120+
"projects",
121+
"db",
122+
"purchases",
123+
"jupyter",
124+
"sync",
125+
"org",
126+
"messages",
127+
];
128+
const service = services[Math.floor(Math.random() * services.length)];
129+
messages.push(`hub.account.${accountId}.${service}`);
130+
} else {
131+
const projectId = `${Math.floor(Math.random() * PATTERN_COUNT)
132+
.toString()
133+
.padStart(8, "0")}-proj-12d3-a456-426614174001`;
134+
const services = ["api", "sync", "terminal"];
135+
const service = services[Math.floor(Math.random() * services.length)];
136+
if (service === "terminal") {
137+
messages.push(`project.${projectId}.1.${service}.-`);
138+
} else {
139+
messages.push(`hub.project.${projectId}.${service}`);
140+
}
141+
}
142+
} else if (rand < 0.9) {
143+
// 20% stream subjects (multiple matches)
144+
const streamId = Math.floor(Math.random() * Math.floor(PATTERN_COUNT / 100))
145+
.toString()
146+
.padStart(6, "0");
147+
const services = ["time", "llm", "notifications", "browser", "system"];
148+
const service = services[Math.floor(Math.random() * services.length)];
149+
messages.push(`${service}.account-${streamId}.api`);
150+
} else {
151+
// 10% completely random subjects
152+
const segments = Math.floor(Math.random() * 5) + 2;
153+
const parts: string[] = [];
154+
for (let j = 0; j < segments; j++) {
155+
parts.push(`seg${Math.floor(Math.random() * 1000)}`);
156+
}
157+
messages.push(parts.join("."));
158+
}
159+
}
160+
161+
return messages;
162+
}
163+
164+
function benchmark() {
165+
console.log("CoCalc Conat Routing Benchmark");
166+
console.log("===============================");
167+
168+
console.log(
169+
`Running ${ITERATIONS} iterations with ${MESSAGE_COUNT.toLocaleString()} messages each...`,
170+
);
171+
console.log();
172+
173+
// Data structures to collect results across iterations
174+
const variantNames = [
175+
"No Caching",
176+
"Split Cache",
177+
"Hash Cache",
178+
"Both Caches",
179+
];
180+
const variantConfigs = [
181+
[false, false], // No Caching
182+
[true, false], // Split Cache
183+
[false, true], // Hash Cache
184+
[true, true], // Both Caches
185+
];
186+
187+
const results: {
188+
name: string;
189+
setupTimes: number[];
190+
matchTimes: number[];
191+
throughputs: number[];
192+
splitCacheHitRates: number[];
193+
hashCacheHitRates: number[];
194+
}[] = variantNames.map((name) => ({
195+
name,
196+
setupTimes: [],
197+
matchTimes: [],
198+
throughputs: [],
199+
splitCacheHitRates: [],
200+
hashCacheHitRates: [],
201+
}));
202+
203+
// Run iterations
204+
for (let iter = 0; iter < ITERATIONS; iter++) {
205+
console.log(`Iteration ${iter + 1}/${ITERATIONS}...`);
206+
207+
// Generate fresh patterns and messages for each iteration
208+
const patterns = generateRealisticPatterns(PATTERN_COUNT);
209+
const messages = generateRealisticMessages(MESSAGE_COUNT);
210+
211+
// Run all 4 variants on the same data
212+
for (
213+
let variantIndex = 0;
214+
variantIndex < variantNames.length;
215+
variantIndex++
216+
) {
217+
const [splitCacheEnabled, hashCacheEnabled] =
218+
variantConfigs[variantIndex];
219+
const result = results[variantIndex];
220+
221+
// Configure caches
222+
setSplitCacheEnabled(splitCacheEnabled);
223+
setConsistentHashCacheEnabled(hashCacheEnabled);
224+
clearConsistentHashCache(); // Reset cache stats for accurate measurement
225+
226+
const p = new Patterns<string>();
227+
228+
// Setup timing
229+
const startSetup = process.hrtime.bigint();
230+
for (let i = 0; i < patterns.length; i++) {
231+
p.set(patterns[i], `handler-${i}`);
232+
}
233+
const endSetup = process.hrtime.bigint();
234+
const setupTime = Number(endSetup - startSetup) / NS_TO_MS;
235+
result.setupTimes.push(setupTime);
236+
237+
// Create a set of fake targets for consistent hashing simulation
238+
const targets = new Set([
239+
"target1",
240+
"target2",
241+
"target3",
242+
"target4",
243+
"target5",
244+
]);
245+
246+
// Realistic benchmark: pattern matching + target selection (when matches found)
247+
const startMatch = process.hrtime.bigint();
248+
let totalMatches = 0;
249+
let totalTargetSelections = 0;
250+
let messagesWithMatches = 0;
251+
252+
for (const message of messages) {
253+
// Step 1: Pattern matching (uses split cache)
254+
const matches = p.matches(message);
255+
totalMatches += matches.length;
256+
257+
// Step 2: Target selection for each match (simulates realistic routing)
258+
if (matches.length > 0) {
259+
messagesWithMatches++;
260+
// Always use consistent hashing - caching is controlled internally
261+
const selectedTarget = consistentHashingChoice(targets, message);
262+
totalTargetSelections++;
263+
// Use the result to avoid optimization
264+
if (selectedTarget.length === 0) totalTargetSelections--;
265+
}
266+
}
267+
const endMatch = process.hrtime.bigint();
268+
269+
// Consistency check: totalTargetSelections should equal messagesWithMatches
270+
if (totalTargetSelections !== messagesWithMatches) {
271+
console.error(
272+
`Consistency error in ${result.name}: totalTargetSelections=${totalTargetSelections}, messagesWithMatches=${messagesWithMatches}`,
273+
);
274+
}
275+
276+
const matchTime = Number(endMatch - startMatch) / NS_TO_MS;
277+
const throughput = messages.length / (matchTime / 1000);
278+
279+
result.matchTimes.push(matchTime);
280+
result.throughputs.push(throughput);
281+
282+
// Get cache hit rates
283+
const splitStats = getSplitCacheStats();
284+
const splitCacheHitRate =
285+
splitCacheEnabled && splitStats.enabled ? splitStats.hitRate || 0 : 0;
286+
const hashStats = getConsistentHashCacheStats();
287+
const hashCacheHitRate =
288+
hashCacheEnabled && hashStats.enabled ? hashStats.hitRate || 0 : 0;
289+
290+
result.splitCacheHitRates.push(splitCacheHitRate);
291+
result.hashCacheHitRates.push(hashCacheHitRate);
292+
}
293+
}
294+
295+
console.log();
296+
297+
// Calculate statistics and use average times for speedup calculation
298+
const variantAvgTimes: number[] = [];
299+
for (const result of results) {
300+
variantAvgTimes.push(calculateMean(result.matchTimes));
301+
}
302+
const baselineAvgTime = variantAvgTimes[0]; // No Caching average
303+
304+
// Create results table using AsciiTable3
305+
const table = new AsciiTable3("Benchmark Results").setHeading(
306+
"Variant",
307+
"Setup (ms)",
308+
"Match (ms)",
309+
"Throughput",
310+
"Split Hit %",
311+
"Hash Hit %",
312+
"Speedup",
313+
);
314+
315+
for (let i = 0; i < results.length; i++) {
316+
const result = results[i];
317+
const variantAvgTime = variantAvgTimes[i];
318+
319+
// Calculate averages and standard deviations
320+
const setupMean = calculateMean(result.setupTimes);
321+
const setupStd = calculateStdDev(result.setupTimes, setupMean);
322+
const matchMean = calculateMean(result.matchTimes);
323+
const matchStd = calculateStdDev(result.matchTimes, matchMean);
324+
const throughputMean = calculateMean(result.throughputs);
325+
const throughputStd = calculateStdDev(result.throughputs, throughputMean);
326+
const splitCacheMean = calculateMean(result.splitCacheHitRates);
327+
const splitCacheStd = calculateStdDev(result.splitCacheHitRates, splitCacheMean);
328+
const hashCacheMean = calculateMean(result.hashCacheHitRates);
329+
const hashCacheStd = calculateStdDev(result.hashCacheHitRates, hashCacheMean);
330+
331+
// Use average time for speedup calculation
332+
const speedup = baselineAvgTime / variantAvgTime;
333+
334+
table.addRow(
335+
result.name,
336+
formatStat(setupMean, setupStd),
337+
formatStat(matchMean, matchStd),
338+
formatStat(throughputMean, throughputStd),
339+
formatStat(splitCacheMean, splitCacheStd),
340+
formatStat(hashCacheMean, hashCacheStd),
341+
speedup.toFixed(2),
342+
);
343+
}
344+
345+
table.setStyle("unicode-round");
346+
console.log(table.toString());
347+
console.log();
348+
349+
350+
console.log(
351+
`✅ Completed ${ITERATIONS} iterations with ${
352+
ITERATIONS * 4
353+
} total benchmark runs`,
354+
);
355+
console.log(" All variants ran on identical data for fair comparison");
356+
}
357+
358+
if (require.main === module) {
359+
benchmark();
360+
}

0 commit comments

Comments
 (0)