Skip to content

Commit 49b7023

Browse files
AlpAlp
authored andcommitted
perf(zql): comparator fast paths - string comparison + single-key optimization
Optimize hot comparison paths in the IVM pipeline: * Add compareStringUTF8Fast for ASCII-fast string comparison with UTF-8 fallback * Reorder compareValues to check strings before nulls (most common type) * Add single-key fast path in makeComparator avoiding loop overhead * Add single-key fast path in makeBoundComparator with fully inlined comparison * Fix compareBounds null handling for nullable database columns
1 parent a8d5eca commit 49b7023

File tree

3 files changed

+169
-38
lines changed

3 files changed

+169
-38
lines changed

packages/zql/src/ivm/data.test.ts

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import {compareUTF8} from 'compare-utf8';
22
import fc from 'fast-check';
3-
import {expect, test} from 'vitest';
3+
import {describe, expect, test} from 'vitest';
44
import {
5+
compareStringUTF8Fast,
56
compareValues,
67
makeComparator,
78
normalizeUndefined,
@@ -84,10 +85,13 @@ test('compareValues', () => {
8485
),
8586
);
8687

87-
// string
88+
// string - compareStringUTF8Fast returns different magnitudes for ASCII
89+
// but always matches the sign of compareUTF8
8890
fc.assert(
8991
fc.property(fc.fullUnicodeString(), fc.fullUnicodeString(), (s1, s2) => {
90-
expect(compareValues(s1, s2)).toBe(compareUTF8(s1, s2));
92+
expect(Math.sign(compareValues(s1, s2))).toBe(
93+
Math.sign(compareUTF8(s1, s2)),
94+
);
9195
}),
9296
);
9397
fc.assert(
@@ -132,3 +136,73 @@ test('valuesEquals', () => {
132136
test('comparator', () => {
133137
compareRowsTest(makeComparator);
134138
});
139+
140+
describe('compareStringUTF8Fast', () => {
141+
test('ASCII strings compare correctly', () => {
142+
expect(compareStringUTF8Fast('abc', 'def')).toBeLessThan(0);
143+
expect(compareStringUTF8Fast('def', 'abc')).toBeGreaterThan(0);
144+
expect(compareStringUTF8Fast('abc', 'abc')).toBe(0);
145+
});
146+
147+
test('empty strings', () => {
148+
expect(compareStringUTF8Fast('', '')).toBe(0);
149+
expect(compareStringUTF8Fast('', 'a')).toBeLessThan(0);
150+
expect(compareStringUTF8Fast('a', '')).toBeGreaterThan(0);
151+
});
152+
153+
test('Unicode strings fall back correctly', () => {
154+
// Non-ASCII chars trigger compareUTF8 fallback; sign must match
155+
expect(Math.sign(compareStringUTF8Fast('café', 'cafë'))).toBe(
156+
Math.sign(compareUTF8('café', 'cafë')),
157+
);
158+
});
159+
160+
test('prefix strings', () => {
161+
expect(compareStringUTF8Fast('abc', 'abcd')).toBeLessThan(0);
162+
expect(compareStringUTF8Fast('abcd', 'abc')).toBeGreaterThan(0);
163+
});
164+
165+
test('sign matches compareUTF8 for all ASCII', () => {
166+
fc.assert(
167+
fc.property(fc.asciiString(), fc.asciiString(), (a, b) => {
168+
expect(Math.sign(compareStringUTF8Fast(a, b))).toBe(
169+
Math.sign(compareUTF8(a, b)),
170+
);
171+
}),
172+
);
173+
});
174+
});
175+
176+
describe('makeComparator single-key fast path', () => {
177+
test('single key asc matches multi-key behavior', () => {
178+
const singleKey = makeComparator([['name', 'asc']]);
179+
const multiKey = makeComparator([
180+
['name', 'asc'],
181+
['id', 'asc'],
182+
]);
183+
// For rows where only 'name' differs, both should give same sign
184+
expect(Math.sign(singleKey({name: 'a'}, {name: 'b'}))).toBe(
185+
Math.sign(multiKey({name: 'a', id: '1'}, {name: 'b', id: '1'})),
186+
);
187+
});
188+
189+
test('single key desc', () => {
190+
const cmp = makeComparator([['name', 'desc']]);
191+
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
192+
});
193+
194+
test('single key with reverse', () => {
195+
const cmp = makeComparator([['name', 'asc']], true);
196+
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
197+
});
198+
199+
test('single key desc with reverse', () => {
200+
const cmp = makeComparator([['name', 'desc']], true);
201+
expect(cmp({name: 'a'}, {name: 'b'})).toBeLessThan(0);
202+
});
203+
204+
test('single key equality', () => {
205+
const cmp = makeComparator([['id', 'asc']]);
206+
expect(cmp({id: 42}, {id: 42})).toBe(0);
207+
});
208+
});

packages/zql/src/ivm/data.ts

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,28 @@ export type Node = {
2222
relationships: Record<string, () => Stream<Node | 'yield'>>;
2323
};
2424

25+
/**
26+
* Fast-path string comparison that handles the common ASCII case
27+
* without calling into compareUTF8. Falls back to compareUTF8 for
28+
* non-ASCII characters.
29+
*
30+
* Returns a sign-only contract: negative if a < b, 0 if equal, positive
31+
* if a > b. Callers must NOT rely on the magnitude of the return value.
32+
*/
33+
export function compareStringUTF8Fast(a: string, b: string): number {
34+
if (a === b) return 0;
35+
const len = a.length < b.length ? a.length : b.length;
36+
for (let i = 0; i < len; i++) {
37+
const ac = a.charCodeAt(i);
38+
const bc = b.charCodeAt(i);
39+
if (ac !== bc) {
40+
if (ac < 128 && bc < 128) return ac - bc;
41+
return compareUTF8(a, b);
42+
}
43+
}
44+
return a.length - b.length;
45+
}
46+
2547
/**
2648
* Compare two values. The values must be of the same type. This function
2749
* throws at runtime if the types differ.
@@ -41,6 +63,15 @@ export function compareValues(a: Value, b: Value): number {
4163
if (a === b) {
4264
return 0;
4365
}
66+
// String check before null: strings are the most common value type in
67+
// practice, so testing them first reduces branch mispredictions. The
68+
// null sub-check inside handles the string-vs-null comparison without
69+
// falling through to the generic null checks below.
70+
if (typeof a === 'string') {
71+
if (b === null) return 1;
72+
assertString(b);
73+
return compareStringUTF8Fast(a, b);
74+
}
4475
if (a === null) {
4576
return -1;
4677
}
@@ -55,18 +86,6 @@ export function compareValues(a: Value, b: Value): number {
5586
assertNumber(b);
5687
return a - b;
5788
}
58-
if (typeof a === 'string') {
59-
assertString(b);
60-
// We compare all strings in Zero as UTF-8. This is the default on SQLite
61-
// and we need to match it. See:
62-
// https://blog.replicache.dev/blog/replicache-11-adventures-in-text-encoding.
63-
//
64-
// TODO: We could change this since SQLite supports UTF-16. Microbenchmark
65-
// to see if there's a big win.
66-
//
67-
// https://www.sqlite.org/c3ref/create_collation.html
68-
return compareUTF8(a, b);
69-
}
7089
throw new Error(`Unsupported type: ${a}`);
7190
}
7291

@@ -84,6 +103,18 @@ export function normalizeUndefined(v: Value): NormalizedValue {
84103
export type Comparator = (r1: Row, r2: Row) => number;
85104

86105
export function makeComparator(order: Ordering, reverse?: boolean): Comparator {
106+
if (order.length === 1) {
107+
const key = order[0][0];
108+
const dir = order[0][1];
109+
if (dir === 'asc') {
110+
return reverse
111+
? (a, b) => -compareValues(a[key], b[key])
112+
: (a, b) => compareValues(a[key], b[key]);
113+
}
114+
return reverse
115+
? (a, b) => compareValues(a[key], b[key])
116+
: (a, b) => -compareValues(a[key], b[key]);
117+
}
87118
return (a, b) => {
88119
// Skip destructuring here since it is hot code.
89120
for (const ord of order) {

packages/zql/src/ivm/memory-source.ts

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
import {assert, unreachable} from '../../../shared/src/asserts.ts';
1+
import {
2+
assert,
3+
assertNumber,
4+
assertString,
5+
unreachable,
6+
} from '../../../shared/src/asserts.ts';
27
import {BTreeSet} from '../../../shared/src/btree-set.ts';
38
import {hasOwn} from '../../../shared/src/has-own.ts';
49
import {once} from '../../../shared/src/iterables.ts';
@@ -25,6 +30,7 @@ import {
2530
type Constraint,
2631
} from './constraint.ts';
2732
import {
33+
compareStringUTF8Fast,
2834
compareValues,
2935
makeComparator,
3036
valuesEqual,
@@ -801,12 +807,51 @@ type MinValue = typeof minValue;
801807
const maxValue = Symbol('max-value');
802808
type MaxValue = typeof maxValue;
803809

810+
/**
811+
* Compares two Bound values, handling minValue/maxValue sentinels,
812+
* null, and delegating to type-specific comparison. This merges the
813+
* logic of compareBounds + compareValues into a single function that
814+
* V8 can inline at the call site (well within TurboFan's 460-bytecode
815+
* inlining threshold).
816+
*/
817+
function compareBoundValue(a: Bound, b: Bound): number {
818+
if (a === b) return 0;
819+
if (a === minValue) return -1;
820+
if (b === minValue) return 1;
821+
if (a === maxValue) return 1;
822+
if (b === maxValue) return -1;
823+
const aN: Value = a ?? null;
824+
const bN: Value = b ?? null;
825+
if (aN === null) return bN === null ? 0 : -1;
826+
if (bN === null) return 1;
827+
if (typeof a === 'string') {
828+
assertString(b);
829+
return compareStringUTF8Fast(a, b);
830+
}
831+
if (typeof a === 'number') {
832+
assertNumber(b);
833+
return a - (b as number);
834+
}
835+
return compareValues(aN, bN);
836+
}
837+
838+
/**
839+
* Creates a comparator for RowBound values used in BTree index scans.
840+
*
841+
* For single-key sorts (the common case), returns a direct comparator
842+
* that avoids the multi-key loop. The actual comparison logic lives in
843+
* compareBoundValue, which V8 inlines at the call site.
844+
*/
804845
function makeBoundComparator(sort: Ordering) {
846+
if (sort.length === 1) {
847+
const key = sort[0][0];
848+
const dir = sort[0][1];
849+
const cmp = (a: RowBound, b: RowBound) => compareBoundValue(a[key], b[key]);
850+
return dir === 'asc' ? cmp : (a: RowBound, b: RowBound) => -cmp(a, b);
851+
}
805852
return (a: RowBound, b: RowBound) => {
806-
// Hot! Do not use destructuring
807853
for (const entry of sort) {
808-
const key = entry[0];
809-
const cmp = compareBounds(a[key], b[key]);
854+
const cmp = compareBoundValue(a[entry[0]], b[entry[0]]);
810855
if (cmp !== 0) {
811856
return entry[1] === 'asc' ? cmp : -cmp;
812857
}
@@ -815,25 +860,6 @@ function makeBoundComparator(sort: Ordering) {
815860
};
816861
}
817862

818-
function compareBounds(a: Bound, b: Bound): number {
819-
if (a === b) {
820-
return 0;
821-
}
822-
if (a === minValue) {
823-
return -1;
824-
}
825-
if (b === minValue) {
826-
return 1;
827-
}
828-
if (a === maxValue) {
829-
return 1;
830-
}
831-
if (b === maxValue) {
832-
return -1;
833-
}
834-
return compareValues(a, b);
835-
}
836-
837863
function* generateRows(
838864
data: BTreeSet<Row>,
839865
scanStart: RowBound | undefined,

0 commit comments

Comments
 (0)