Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 77 additions & 3 deletions packages/zql/src/ivm/data.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {compareUTF8} from 'compare-utf8';
import fc from 'fast-check';
import {expect, test} from 'vitest';
import {describe, expect, test} from 'vitest';
import {
compareStringUTF8Fast,
compareValues,
makeComparator,
normalizeUndefined,
Expand Down Expand Up @@ -84,10 +85,13 @@ test('compareValues', () => {
),
);

// string
// string - compareStringUTF8Fast returns different magnitudes for ASCII
// but always matches the sign of compareUTF8
fc.assert(
fc.property(fc.fullUnicodeString(), fc.fullUnicodeString(), (s1, s2) => {
expect(compareValues(s1, s2)).toBe(compareUTF8(s1, s2));
expect(Math.sign(compareValues(s1, s2))).toBe(
Math.sign(compareUTF8(s1, s2)),
);
}),
);
fc.assert(
Expand Down Expand Up @@ -132,3 +136,73 @@ test('valuesEquals', () => {
test('comparator', () => {
compareRowsTest(makeComparator);
});

describe('compareStringUTF8Fast', () => {
test('ASCII strings compare correctly', () => {
expect(compareStringUTF8Fast('abc', 'def')).toBeLessThan(0);
expect(compareStringUTF8Fast('def', 'abc')).toBeGreaterThan(0);
expect(compareStringUTF8Fast('abc', 'abc')).toBe(0);
});

test('empty strings', () => {
expect(compareStringUTF8Fast('', '')).toBe(0);
expect(compareStringUTF8Fast('', 'a')).toBeLessThan(0);
expect(compareStringUTF8Fast('a', '')).toBeGreaterThan(0);
});

test('Unicode strings fall back correctly', () => {
// Non-ASCII chars trigger compareUTF8 fallback; sign must match
expect(Math.sign(compareStringUTF8Fast('café', 'cafë'))).toBe(
Math.sign(compareUTF8('café', 'cafë')),
);
});

test('prefix strings', () => {
expect(compareStringUTF8Fast('abc', 'abcd')).toBeLessThan(0);
expect(compareStringUTF8Fast('abcd', 'abc')).toBeGreaterThan(0);
});

test('sign matches compareUTF8 for all ASCII', () => {
fc.assert(
fc.property(fc.asciiString(), fc.asciiString(), (a, b) => {
expect(Math.sign(compareStringUTF8Fast(a, b))).toBe(
Math.sign(compareUTF8(a, b)),
);
}),
);
});
});

describe('makeComparator single-key fast path', () => {
test('single key asc matches multi-key behavior', () => {
const singleKey = makeComparator([['name', 'asc']]);
const multiKey = makeComparator([
['name', 'asc'],
['id', 'asc'],
]);
// For rows where only 'name' differs, both should give same sign
expect(Math.sign(singleKey({name: 'a'}, {name: 'b'}))).toBe(
Math.sign(multiKey({name: 'a', id: '1'}, {name: 'b', id: '1'})),
);
});

test('single key desc', () => {
const cmp = makeComparator([['name', 'desc']]);
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
});

test('single key with reverse', () => {
const cmp = makeComparator([['name', 'asc']], true);
expect(cmp({name: 'a'}, {name: 'b'})).toBeGreaterThan(0);
});

test('single key desc with reverse', () => {
const cmp = makeComparator([['name', 'desc']], true);
expect(cmp({name: 'a'}, {name: 'b'})).toBeLessThan(0);
});

test('single key equality', () => {
const cmp = makeComparator([['id', 'asc']]);
expect(cmp({id: 42}, {id: 42})).toBe(0);
});
});
55 changes: 43 additions & 12 deletions packages/zql/src/ivm/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,28 @@ export type Node = {
relationships: Record<string, () => Stream<Node | 'yield'>>;
};

/**
* Fast-path string comparison that handles the common ASCII case
* without calling into compareUTF8. Falls back to compareUTF8 for
* non-ASCII characters.
*
* Returns a sign-only contract: negative if a < b, 0 if equal, positive
* if a > b. Callers must NOT rely on the magnitude of the return value.
*/
export function compareStringUTF8Fast(a: string, b: string): number {
if (a === b) return 0;
const len = a.length < b.length ? a.length : b.length;
for (let i = 0; i < len; i++) {
const ac = a.charCodeAt(i);
const bc = b.charCodeAt(i);
if (ac !== bc) {
if (ac < 128 && bc < 128) return ac - bc;
return compareUTF8(a, b);
}
}
return a.length - b.length;
}

/**
* Compare two values. The values must be of the same type. This function
* throws at runtime if the types differ.
Expand All @@ -41,6 +63,15 @@ export function compareValues(a: Value, b: Value): number {
if (a === b) {
return 0;
}
// String check before null: strings are the most common value type in
// practice, so testing them first reduces branch mispredictions. The
// null sub-check inside handles the string-vs-null comparison without
// falling through to the generic null checks below.
if (typeof a === 'string') {
if (b === null) return 1;
assertString(b);
return compareStringUTF8Fast(a, b);
}
if (a === null) {
return -1;
}
Expand All @@ -55,18 +86,6 @@ export function compareValues(a: Value, b: Value): number {
assertNumber(b);
return a - b;
}
if (typeof a === 'string') {
assertString(b);
// We compare all strings in Zero as UTF-8. This is the default on SQLite
// and we need to match it. See:
// https://blog.replicache.dev/blog/replicache-11-adventures-in-text-encoding.
//
// TODO: We could change this since SQLite supports UTF-16. Microbenchmark
// to see if there's a big win.
//
// https://www.sqlite.org/c3ref/create_collation.html
return compareUTF8(a, b);
}
throw new Error(`Unsupported type: ${a}`);
}

Expand All @@ -84,6 +103,18 @@ export function normalizeUndefined(v: Value): NormalizedValue {
export type Comparator = (r1: Row, r2: Row) => number;

export function makeComparator(order: Ordering, reverse?: boolean): Comparator {
if (order.length === 1) {
const key = order[0][0];
const dir = order[0][1];
if (dir === 'asc') {
return reverse
? (a, b) => -compareValues(a[key], b[key])
: (a, b) => compareValues(a[key], b[key]);
}
return reverse
? (a, b) => compareValues(a[key], b[key])
: (a, b) => -compareValues(a[key], b[key]);
}
return (a, b) => {
// Skip destructuring here since it is hot code.
for (const ord of order) {
Expand Down
Loading