Skip to content

Commit eac60c0

Browse files
AlpAlp
authored andcommitted
perf(zql): comparator fast paths - string comparison + single-key optimization
Optimize hot comparison paths in the IVM pipeline: * Add compareStringUTF8Fast for ASCII-fast string comparison with UTF-8 fallback * Reorder compareValues to check strings before nulls (most common type) * Add single-key fast path in makeComparator avoiding loop overhead * Add single-key fast path in makeBoundComparator with fully inlined comparison * Fix compareBounds null handling for nullable database columns
1 parent a0b573c commit eac60c0

File tree

3 files changed

+170
-38
lines changed

3 files changed

+170
-38
lines changed

packages/zql/src/ivm/data.test.ts

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import {compareUTF8} from 'compare-utf8';
22
import fc from 'fast-check';
3-
import {expect, test} from 'vitest';
3+
import {describe, expect, test} from 'vitest';
44
import {
5+
compareStringUTF8Fast,
56
compareValues,
67
makeComparator,
78
normalizeUndefined,
@@ -84,10 +85,13 @@ test('compareValues', () => {
8485
),
8586
);
8687

87-
// string
88+
// string - compareStringUTF8Fast returns different magnitudes for ASCII
89+
// but always matches the sign of compareUTF8
8890
fc.assert(
8991
fc.property(fc.fullUnicodeString(), fc.fullUnicodeString(), (s1, s2) => {
90-
expect(compareValues(s1, s2)).toBe(compareUTF8(s1, s2));
92+
expect(Math.sign(compareValues(s1, s2))).toBe(
93+
Math.sign(compareUTF8(s1, s2)),
94+
);
9195
}),
9296
);
9397
fc.assert(
@@ -132,3 +136,73 @@ test('valuesEquals', () => {
132136
test('comparator', () => {
133137
compareRowsTest(makeComparator);
134138
});
139+
140+
describe('compareStringUTF8Fast', () => {
141+
test('ASCII strings compare correctly', () => {
142+
expect(compareStringUTF8Fast('abc', 'def')).toBeLessThan(0);
143+
expect(compareStringUTF8Fast('def', 'abc')).toBeGreaterThan(0);
144+
expect(compareStringUTF8Fast('abc', 'abc')).toBe(0);
145+
});
146+
147+
test('empty strings', () => {
148+
expect(compareStringUTF8Fast('', '')).toBe(0);
149+
expect(compareStringUTF8Fast('', 'a')).toBeLessThan(0);
150+
expect(compareStringUTF8Fast('a', '')).toBeGreaterThan(0);
151+
});
152+
153+
test('Unicode strings fall back correctly', () => {
154+
// Non-ASCII chars trigger compareUTF8 fallback; sign must match
155+
expect(Math.sign(compareStringUTF8Fast('café', 'cafë'))).toBe(
156+
Math.sign(compareUTF8('café', 'cafë')),
157+
);
158+
});
159+
160+
test('prefix strings', () => {
161+
expect(compareStringUTF8Fast('abc', 'abcd')).toBeLessThan(0);
162+
expect(compareStringUTF8Fast('abcd', 'abc')).toBeGreaterThan(0);
163+
});
164+
165+
test('sign matches compareUTF8 for all ASCII', () => {
166+
fc.assert(
167+
fc.property(fc.asciiString(), fc.asciiString(), (a, b) => {
168+
expect(Math.sign(compareStringUTF8Fast(a, b))).toBe(
169+
Math.sign(compareUTF8(a, b)),
170+
);
171+
}),
172+
);
173+
});
174+
});
175+
176+
describe('makeComparator single-key fast path', () => {
177+
test('single key asc matches multi-key behavior', () => {
178+
const singleKey = makeComparator([['name', 'asc']]);
179+
const multiKey = makeComparator([
180+
['name', 'asc'],
181+
['id', 'asc'],
182+
]);
183+
// For rows where only 'name' differs, both should give same sign
184+
expect(Math.sign(singleKey({name: 'a'}, {name: 'b'}))).toBe(
185+
Math.sign(multiKey({name: 'a', id: '1'}, {name: 'b', id: '1'})),
186+
);
187+
});
188+
189+
test('single key desc', () => {
190+
const cmp = makeComparator([['name', 'desc']]);
191+
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
192+
});
193+
194+
test('single key with reverse', () => {
195+
const cmp = makeComparator([['name', 'asc']], true);
196+
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
197+
});
198+
199+
test('single key desc with reverse', () => {
200+
const cmp = makeComparator([['name', 'desc']], true);
201+
expect(cmp({name: 'a'}, {name: 'b'})).toBeLessThan(0);
202+
});
203+
204+
test('single key equality', () => {
205+
const cmp = makeComparator([['id', 'asc']]);
206+
expect(cmp({id: 42}, {id: 42})).toBe(0);
207+
});
208+
});

packages/zql/src/ivm/data.ts

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,29 @@ export type Node = {
2222
relationships: Record<string, () => Stream<Node | 'yield'>>;
2323
};
2424

25+
/**
26+
* Fast-path string comparison that handles the common ASCII case
27+
* without calling into compareUTF8. Falls back to compareUTF8 for
28+
* non-ASCII characters.
29+
*
30+
* Returns char code differences for ASCII (not -1/0/1), but the sign
31+
* is always consistent with compareUTF8. All callers only depend on
32+
* the sign, not the magnitude.
33+
*/
34+
export function compareStringUTF8Fast(a: string, b: string): number {
35+
if (a === b) return 0;
36+
const len = a.length < b.length ? a.length : b.length;
37+
for (let i = 0; i < len; i++) {
38+
const ac = a.charCodeAt(i);
39+
const bc = b.charCodeAt(i);
40+
if (ac !== bc) {
41+
if (ac < 128 && bc < 128) return ac - bc;
42+
return compareUTF8(a, b);
43+
}
44+
}
45+
return a.length - b.length;
46+
}
47+
2548
/**
2649
* Compare two values. The values must be of the same type. This function
2750
* throws at runtime if the types differ.
@@ -41,6 +64,15 @@ export function compareValues(a: Value, b: Value): number {
4164
if (a === b) {
4265
return 0;
4366
}
67+
// String check before null: strings are the most common value type in
68+
// practice, so checking first avoids the null branches on the hot path.
69+
// The b === null guard handles the case where a is string but b is null
70+
// (which compareStringUTF8Fast can't handle).
71+
if (typeof a === 'string') {
72+
if (b === null) return 1;
73+
assertString(b);
74+
return compareStringUTF8Fast(a, b);
75+
}
4476
if (a === null) {
4577
return -1;
4678
}
@@ -55,18 +87,6 @@ export function compareValues(a: Value, b: Value): number {
5587
assertNumber(b);
5688
return a - b;
5789
}
58-
if (typeof a === 'string') {
59-
assertString(b);
60-
// We compare all strings in Zero as UTF-8. This is the default on SQLite
61-
// and we need to match it. See:
62-
// https://blog.replicache.dev/blog/replicache-11-adventures-in-text-encoding.
63-
//
64-
// TODO: We could change this since SQLite supports UTF-16. Microbenchmark
65-
// to see if there's a big win.
66-
//
67-
// https://www.sqlite.org/c3ref/create_collation.html
68-
return compareUTF8(a, b);
69-
}
7090
throw new Error(`Unsupported type: ${a}`);
7191
}
7292

@@ -84,6 +104,18 @@ export function normalizeUndefined(v: Value): NormalizedValue {
84104
export type Comparator = (r1: Row, r2: Row) => number;
85105

86106
export function makeComparator(order: Ordering, reverse?: boolean): Comparator {
107+
if (order.length === 1) {
108+
const key = order[0][0];
109+
const dir = order[0][1];
110+
if (dir === 'asc') {
111+
return reverse
112+
? (a, b) => -compareValues(a[key], b[key])
113+
: (a, b) => compareValues(a[key], b[key]);
114+
}
115+
return reverse
116+
? (a, b) => compareValues(a[key], b[key])
117+
: (a, b) => -compareValues(a[key], b[key]);
118+
}
87119
return (a, b) => {
88120
// Skip destructuring here since it is hot code.
89121
for (const ord of order) {

packages/zql/src/ivm/memory-source.ts

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
import {assert, unreachable} from '../../../shared/src/asserts.ts';
1+
import {
2+
assert,
3+
assertNumber,
4+
assertString,
5+
unreachable,
6+
} from '../../../shared/src/asserts.ts';
27
import {BTreeSet} from '../../../shared/src/btree-set.ts';
38
import {hasOwn} from '../../../shared/src/has-own.ts';
49
import {once} from '../../../shared/src/iterables.ts';
@@ -25,6 +30,7 @@ import {
2530
type Constraint,
2631
} from './constraint.ts';
2732
import {
33+
compareStringUTF8Fast,
2834
compareValues,
2935
makeComparator,
3036
valuesEqual,
@@ -804,12 +810,51 @@ type MinValue = typeof minValue;
804810
const maxValue = Symbol('max-value');
805811
type MaxValue = typeof maxValue;
806812

813+
/**
814+
* Compares two Bound values, handling minValue/maxValue sentinels,
815+
* null, and delegating to type-specific comparison. This merges the
816+
* logic of compareBounds + compareValues into a single function that
817+
* V8 can inline at the call site (well within TurboFan's 460-bytecode
818+
* inlining threshold).
819+
*/
820+
function compareBoundValue(a: Bound, b: Bound): number {
821+
if (a === b) return 0;
822+
if (a === minValue) return -1;
823+
if (b === minValue) return 1;
824+
if (a === maxValue) return 1;
825+
if (b === maxValue) return -1;
826+
const aN: Value = a ?? null;
827+
const bN: Value = b ?? null;
828+
if (aN === null) return bN === null ? 0 : -1;
829+
if (bN === null) return 1;
830+
if (typeof a === 'string') {
831+
assertString(b);
832+
return compareStringUTF8Fast(a, b);
833+
}
834+
if (typeof a === 'number') {
835+
assertNumber(b);
836+
return a - (b as number);
837+
}
838+
return compareValues(aN, bN);
839+
}
840+
841+
/**
842+
* Creates a comparator for RowBound values used in BTree index scans.
843+
*
844+
* For single-key sorts (the common case), returns a direct comparator
845+
* that avoids the multi-key loop. The actual comparison logic lives in
846+
* compareBoundValue, which V8 inlines at the call site.
847+
*/
807848
function makeBoundComparator(sort: Ordering) {
849+
if (sort.length === 1) {
850+
const key = sort[0][0];
851+
const dir = sort[0][1];
852+
const cmp = (a: RowBound, b: RowBound) => compareBoundValue(a[key], b[key]);
853+
return dir === 'asc' ? cmp : (a: RowBound, b: RowBound) => -cmp(a, b);
854+
}
808855
return (a: RowBound, b: RowBound) => {
809-
// Hot! Do not use destructuring
810856
for (const entry of sort) {
811-
const key = entry[0];
812-
const cmp = compareBounds(a[key], b[key]);
857+
const cmp = compareBoundValue(a[entry[0]], b[entry[0]]);
813858
if (cmp !== 0) {
814859
return entry[1] === 'asc' ? cmp : -cmp;
815860
}
@@ -818,25 +863,6 @@ function makeBoundComparator(sort: Ordering) {
818863
};
819864
}
820865

821-
function compareBounds(a: Bound, b: Bound): number {
822-
if (a === b) {
823-
return 0;
824-
}
825-
if (a === minValue) {
826-
return -1;
827-
}
828-
if (b === minValue) {
829-
return 1;
830-
}
831-
if (a === maxValue) {
832-
return 1;
833-
}
834-
if (b === maxValue) {
835-
return -1;
836-
}
837-
return compareValues(a, b);
838-
}
839-
840866
function* generateRows(
841867
data: BTreeSet<Row>,
842868
scanStart: RowBound | undefined,

0 commit comments

Comments
 (0)