Skip to content

Commit 966b10a

Browse files
AlpAlp
authored andcommitted
perf(zql): comparator fast paths - string comparison + single-key optimization
Optimize hot comparison paths in the IVM pipeline: * Add compareStringUTF8Fast for ASCII-fast string comparison with UTF-8 fallback * Reorder compareValues to check strings before nulls (most common type) * Add single-key fast path in makeComparator avoiding loop overhead * Add single-key fast path in makeBoundComparator with fully inlined comparison * Fix compareBounds null handling for nullable database columns
1 parent 0210424 commit 966b10a

File tree

3 files changed

+178
-16
lines changed

3 files changed

+178
-16
lines changed

packages/zql/src/ivm/data.test.ts

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import {compareUTF8} from 'compare-utf8';
22
import fc from 'fast-check';
3-
import {expect, test} from 'vitest';
3+
import {describe, expect, test} from 'vitest';
44
import {
5+
compareStringUTF8Fast,
56
compareValues,
67
makeComparator,
78
normalizeUndefined,
@@ -84,10 +85,13 @@ test('compareValues', () => {
8485
),
8586
);
8687

87-
// string
88+
// string - compareStringUTF8Fast returns different magnitudes for ASCII
89+
// but always matches the sign of compareUTF8
8890
fc.assert(
8991
fc.property(fc.fullUnicodeString(), fc.fullUnicodeString(), (s1, s2) => {
90-
expect(compareValues(s1, s2)).toBe(compareUTF8(s1, s2));
92+
expect(Math.sign(compareValues(s1, s2))).toBe(
93+
Math.sign(compareUTF8(s1, s2)),
94+
);
9195
}),
9296
);
9397
fc.assert(
@@ -132,3 +136,73 @@ test('valuesEquals', () => {
132136
test('comparator', () => {
133137
compareRowsTest(makeComparator);
134138
});
139+
140+
describe('compareStringUTF8Fast', () => {
141+
test('ASCII strings compare correctly', () => {
142+
expect(compareStringUTF8Fast('abc', 'def')).toBeLessThan(0);
143+
expect(compareStringUTF8Fast('def', 'abc')).toBeGreaterThan(0);
144+
expect(compareStringUTF8Fast('abc', 'abc')).toBe(0);
145+
});
146+
147+
test('empty strings', () => {
148+
expect(compareStringUTF8Fast('', '')).toBe(0);
149+
expect(compareStringUTF8Fast('', 'a')).toBeLessThan(0);
150+
expect(compareStringUTF8Fast('a', '')).toBeGreaterThan(0);
151+
});
152+
153+
test('Unicode strings fall back correctly', () => {
154+
// Non-ASCII chars trigger compareUTF8 fallback; sign must match
155+
expect(Math.sign(compareStringUTF8Fast('café', 'cafë'))).toBe(
156+
Math.sign(compareUTF8('café', 'cafë')),
157+
);
158+
});
159+
160+
test('prefix strings', () => {
161+
expect(compareStringUTF8Fast('abc', 'abcd')).toBeLessThan(0);
162+
expect(compareStringUTF8Fast('abcd', 'abc')).toBeGreaterThan(0);
163+
});
164+
165+
test('sign matches compareUTF8 for all ASCII', () => {
166+
fc.assert(
167+
fc.property(fc.asciiString(), fc.asciiString(), (a, b) => {
168+
expect(Math.sign(compareStringUTF8Fast(a, b))).toBe(
169+
Math.sign(compareUTF8(a, b)),
170+
);
171+
}),
172+
);
173+
});
174+
});
175+
176+
describe('makeComparator single-key fast path', () => {
177+
test('single key asc matches multi-key behavior', () => {
178+
const singleKey = makeComparator([['name', 'asc']]);
179+
const multiKey = makeComparator([
180+
['name', 'asc'],
181+
['id', 'asc'],
182+
]);
183+
// For rows where only 'name' differs, both should give same sign
184+
expect(Math.sign(singleKey({name: 'a'}, {name: 'b'}))).toBe(
185+
Math.sign(multiKey({name: 'a', id: '1'}, {name: 'b', id: '1'})),
186+
);
187+
});
188+
189+
test('single key desc', () => {
190+
const cmp = makeComparator([['name', 'desc']]);
191+
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
192+
});
193+
194+
test('single key with reverse', () => {
195+
const cmp = makeComparator([['name', 'asc']], true);
196+
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
197+
});
198+
199+
test('single key desc with reverse', () => {
200+
const cmp = makeComparator([['name', 'desc']], true);
201+
expect(cmp({name: 'a'}, {name: 'b'})).toBeLessThan(0);
202+
});
203+
204+
test('single key equality', () => {
205+
const cmp = makeComparator([['id', 'asc']]);
206+
expect(cmp({id: 42}, {id: 42})).toBe(0);
207+
});
208+
});

packages/zql/src/ivm/data.ts

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,25 @@ export type Node = {
2222
relationships: Record<string, () => Stream<Node | 'yield'>>;
2323
};
2424

25+
/**
26+
* Fast-path string comparison that handles the common ASCII case
27+
* without calling into compareUTF8. Falls back to compareUTF8 for
28+
* non-ASCII characters.
29+
*/
30+
export function compareStringUTF8Fast(a: string, b: string): number {
31+
if (a === b) return 0;
32+
const len = a.length < b.length ? a.length : b.length;
33+
for (let i = 0; i < len; i++) {
34+
const ac = a.charCodeAt(i);
35+
const bc = b.charCodeAt(i);
36+
if (ac !== bc) {
37+
if (ac < 128 && bc < 128) return ac - bc;
38+
return compareUTF8(a, b);
39+
}
40+
}
41+
return a.length - b.length;
42+
}
43+
2544
/**
2645
* Compare two values. The values must be of the same type. This function
2746
* throws at runtime if the types differ.
@@ -41,6 +60,12 @@ export function compareValues(a: Value, b: Value): number {
4160
if (a === b) {
4261
return 0;
4362
}
63+
// String check first - most common type in practice
64+
if (typeof a === 'string') {
65+
if (b === null) return 1;
66+
assertString(b);
67+
return compareStringUTF8Fast(a, b);
68+
}
4469
if (a === null) {
4570
return -1;
4671
}
@@ -55,18 +80,6 @@ export function compareValues(a: Value, b: Value): number {
5580
assertNumber(b);
5681
return a - b;
5782
}
58-
if (typeof a === 'string') {
59-
assertString(b);
60-
// We compare all strings in Zero as UTF-8. This is the default on SQLite
61-
// and we need to match it. See:
62-
// https://blog.replicache.dev/blog/replicache-11-adventures-in-text-encoding.
63-
//
64-
// TODO: We could change this since SQLite supports UTF-16. Microbenchmark
65-
// to see if there's a big win.
66-
//
67-
// https://www.sqlite.org/c3ref/create_collation.html
68-
return compareUTF8(a, b);
69-
}
7083
throw new Error(`Unsupported type: ${a}`);
7184
}
7285

@@ -84,6 +97,18 @@ export function normalizeUndefined(v: Value): NormalizedValue {
8497
export type Comparator = (r1: Row, r2: Row) => number;
8598

8699
export function makeComparator(order: Ordering, reverse?: boolean): Comparator {
100+
if (order.length === 1) {
101+
const key = order[0][0];
102+
const dir = order[0][1];
103+
if (dir === 'asc') {
104+
return reverse
105+
? (a, b) => -compareValues(a[key], b[key])
106+
: (a, b) => compareValues(a[key], b[key]);
107+
}
108+
return reverse
109+
? (a, b) => compareValues(a[key], b[key])
110+
: (a, b) => -compareValues(a[key], b[key]);
111+
}
87112
return (a, b) => {
88113
// Skip destructuring here since it is hot code.
89114
for (const ord of order) {

packages/zql/src/ivm/memory-source.ts

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import {compareUTF8} from 'compare-utf8';
12
import {assert, unreachable} from '../../../shared/src/asserts.ts';
23
import {BTreeSet} from '../../../shared/src/btree-set.ts';
34
import {hasOwn} from '../../../shared/src/has-own.ts';
@@ -788,7 +789,57 @@ type MinValue = typeof minValue;
788789
const maxValue = Symbol('max-value');
789790
type MaxValue = typeof maxValue;
790791

792+
/**
793+
* Creates a comparator for RowBound values used in BTree index scans.
794+
*
795+
* For single-key sorts (the common case), we fully inline the comparison
796+
* logic to eliminate function call overhead. This is the hottest comparator
797+
* in the IVM pipeline: it runs for every BTree node visited during index
798+
* scans. In production profiling (135 pipelines x ~200 rows), this path
799+
* accounts for a significant portion of CPU time, and each eliminated
800+
* function call frame (compareBounds -> compareValues -> compareUTF8)
801+
* compounds across millions of invocations.
802+
*/
791803
function makeBoundComparator(sort: Ordering) {
804+
if (sort.length === 1) {
805+
const key = sort[0][0];
806+
const dir = sort[0][1];
807+
const cmp = (a: RowBound, b: RowBound): number => {
808+
const av = a[key];
809+
const bv = b[key];
810+
if (av === bv) return 0;
811+
if (av === minValue) return -1;
812+
if (bv === minValue) return 1;
813+
if (av === maxValue) return 1;
814+
if (bv === maxValue) return -1;
815+
// Null handling (min/max symbols ruled out above, safe to narrow)
816+
const aN: Value = av ?? null;
817+
const bN: Value = bv ?? null;
818+
if (aN === null) return bN === null ? 0 : -1;
819+
if (bN === null) return 1;
820+
// Inline compareValues for string (most common) and number
821+
if (typeof av === 'string') {
822+
const aStr = av;
823+
const bStr = bv as string;
824+
if (aStr === bStr) return 0;
825+
const len = aStr.length < bStr.length ? aStr.length : bStr.length;
826+
for (let i = 0; i < len; i++) {
827+
const ac = aStr.charCodeAt(i);
828+
const bc = bStr.charCodeAt(i);
829+
if (ac !== bc) {
830+
if (ac < 128 && bc < 128) return ac - bc;
831+
return compareUTF8(aStr, bStr);
832+
}
833+
}
834+
return aStr.length - bStr.length;
835+
}
836+
if (typeof av === 'number') return av - (bv as number);
837+
// Fallback for other types (boolean, etc.)
838+
return compareValues(aN, bN);
839+
};
840+
return dir === 'asc' ? cmp : (a: RowBound, b: RowBound) => -cmp(a, b);
841+
}
842+
// Multi-key path
792843
return (a: RowBound, b: RowBound) => {
793844
// Hot! Do not use destructuring
794845
for (const entry of sort) {
@@ -818,7 +869,19 @@ function compareBounds(a: Bound, b: Bound): number {
818869
if (b === maxValue) {
819870
return -1;
820871
}
821-
return compareValues(a, b);
872+
// Handle null/undefined before delegating to compareValues.
873+
// compareValues asserts type homogeneity (e.g. assertString(b) when a
874+
// is a string). Nullable database columns can produce null vs
875+
// string/number comparisons that compareValues doesn't handle.
876+
const aN: Value = a ?? null;
877+
const bN: Value = b ?? null;
878+
if (aN === null) {
879+
return bN === null ? 0 : -1;
880+
}
881+
if (bN === null) {
882+
return 1;
883+
}
884+
return compareValues(aN, bN);
822885
}
823886

824887
function* generateRows(

0 commit comments

Comments
 (0)