Skip to content

Commit 0f84670

Browse files
authored
Merge pull request #15 from CraigMacomber/comparison
fix: make defaultComparator provide a strict partial order
2 parents 2e62c0a + 08d36e0 commit 0f84670

File tree

4 files changed

+374
-52
lines changed

4 files changed

+374
-52
lines changed

b+tree.d.ts

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,40 @@ export declare type EditRangeResult<V, R = number> = {
55
break?: R;
66
delete?: boolean;
77
};
8-
/** Compares two numbers, strings, arrays of numbers/strings, Dates,
9-
* or objects that have a valueOf() method returning a number or string.
10-
* Optimized for numbers. Returns 1 if a>b, -1 if a<b, and 0 if a===b.
8+
/**
9+
* Types that BTree supports by default
10+
*/
11+
export declare type DefaultComparable = number | string | Date | boolean | null | undefined | (number | string)[] | {
12+
valueOf: () => number | string | Date | boolean | null | undefined | (number | string)[];
13+
};
14+
/**
15+
* Compares DefaultComparables to form a strict partial ordering.
16+
*
17+
* Handles +/-0 and NaN like Map: NaN is equal to NaN, and -0 is equal to +0.
18+
*
19+
* Arrays are compared using '<' and '>', which may cause unexpected equality:
20+
* for example [1] will be considered equal to ['1'].
21+
*
22+
* Two objects with equal valueOf compare the same, but compare unequal to
23+
* primitives that have the same value.
1124
*/
12-
export declare function defaultComparator(a: any, b: any): number;
25+
export declare function defaultComparator(a: DefaultComparable, b: DefaultComparable): number;
26+
/**
27+
* Compares items using the < and > operators. This function is probably slightly
28+
* faster than the defaultComparator for Dates and strings, but has not been benchmarked.
29+
* Unlike defaultComparator, this comparator doesn't support mixed types correctly,
30+
* i.e. use it with `BTree<string>` or `BTree<number>` but not `BTree<string|number>`.
31+
*
32+
* NaN is not supported.
33+
*
34+
* Note: null is treated like 0 when compared with numbers or Date, but in general
35+
* null is not ordered with respect to strings (neither greater nor less), and
36+
* undefined is not ordered with other types.
37+
*/
38+
export declare function simpleComparator(a: string, b: string): number;
39+
export declare function simpleComparator(a: number | null, b: number | null): number;
40+
export declare function simpleComparator(a: Date | null, b: Date | null): number;
41+
export declare function simpleComparator(a: (number | string)[], b: (number | string)[]): number;
1342
/**
1443
* A reasonably fast collection of key-value pairs with a powerful API.
1544
* Largely compatible with the standard Map. BTree is a B+ tree data structure,
@@ -78,11 +107,15 @@ export default class BTree<K = any, V = any> implements ISortedMapF<K, V>, ISort
78107
private _root;
79108
_size: number;
80109
_maxNodeSize: number;
110+
/**
111+
* provides a total order over keys (and a strict partial order over the type K)
112+
* @returns a negative value if a < b, 0 if a === b and a positive value if a > b
113+
*/
81114
_compare: (a: K, b: K) => number;
82115
/**
83116
* Initializes an empty B+ tree.
84117
* @param compare Custom function to compare pairs of elements in the tree.
85-
* This is not required for numbers, strings and arrays of numbers/strings.
118+
* If not specified, defaultComparator will be used which is valid as long as K extends DefaultComparable.
86119
* @param entries A set of key-value pairs to initialize the tree
87120
* @param maxNodeSize Branching factor (maximum items or children per node)
88121
* Must be in range 4..256. If undefined or <4 then default is used; if >256 then 256.

b+tree.js

Lines changed: 63 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -13,44 +13,71 @@ var __extends = (this && this.__extends) || (function () {
1313
};
1414
})();
1515
Object.defineProperty(exports, "__esModule", { value: true });
16-
exports.EmptyBTree = exports.defaultComparator = void 0;
17-
// Informative microbenchmarks & stuff:
18-
// http://www.jayconrod.com/posts/52/a-tour-of-v8-object-representation (very educational)
19-
// https://blog.mozilla.org/luke/2012/10/02/optimizing-javascript-variable-access/ (local vars are faster than properties)
20-
// http://benediktmeurer.de/2017/12/13/an-introduction-to-speculative-optimization-in-v8/ (other stuff)
21-
// https://jsperf.com/js-in-operator-vs-alternatives (avoid 'in' operator; `.p!==undefined` faster than `hasOwnProperty('p')` in all browsers)
22-
// https://jsperf.com/instanceof-vs-typeof-vs-constructor-vs-member (speed of type tests varies wildly across browsers)
23-
// https://jsperf.com/detecting-arrays-new (a.constructor===Array is best across browsers, assuming a is an object)
24-
// https://jsperf.com/shallow-cloning-methods (a constructor is faster than Object.create; hand-written clone faster than Object.assign)
25-
// https://jsperf.com/ways-to-fill-an-array (slice-and-replace is fastest)
26-
// https://jsperf.com/math-min-max-vs-ternary-vs-if (Math.min/max is slow on Edge)
27-
// https://jsperf.com/array-vs-property-access-speed (v.x/v.y is faster than a[0]/a[1] in major browsers IF hidden class is constant)
28-
// https://jsperf.com/detect-not-null-or-undefined (`x==null` slightly slower than `x===null||x===undefined` on all browsers)
29-
// Overall, microbenchmarks suggest Firefox is the fastest browser for JavaScript and Edge is the slowest.
30-
// Lessons from https://v8project.blogspot.com/2017/09/elements-kinds-in-v8.html:
31-
// - Avoid holes in arrays. Avoid `new Array(N)`, it will be "holey" permanently.
32-
// - Don't read outside bounds of an array (it scans prototype chain).
33-
// - Small integer arrays are stored differently from doubles
34-
// - Adding non-numbers to an array deoptimizes it permanently into a general array
35-
// - Objects can be used like arrays (e.g. have length property) but are slower
36-
// - V8 source (NewElementsCapacity in src/objects.h): arrays grow by 50% + 16 elements
37-
/** Compares two numbers, strings, arrays of numbers/strings, Dates,
38-
* or objects that have a valueOf() method returning a number or string.
39-
* Optimized for numbers. Returns 1 if a>b, -1 if a<b, and 0 if a===b.
16+
exports.EmptyBTree = exports.simpleComparator = exports.defaultComparator = void 0;
17+
/**
18+
* Compares DefaultComparables to form a strict partial ordering.
19+
*
20+
* Handles +/-0 and NaN like Map: NaN is equal to NaN, and -0 is equal to +0.
21+
*
22+
* Arrays are compared using '<' and '>', which may cause unexpected equality:
23+
* for example [1] will be considered equal to ['1'].
24+
*
25+
* Two objects with equal valueOf compare the same, but compare unequal to
26+
* primitives that have the same value.
4027
*/
4128
function defaultComparator(a, b) {
42-
var c = a - b;
43-
if (c === c)
44-
return c; // a & b are number
45-
// General case (c is NaN): string / arrays / Date / incomparable things
46-
if (a)
29+
// Special case finite numbers first for performance.
30+
// Note that the trick of using 'a - b' and checking for NaN to detect non-numbers
31+
// does not work if the strings are numeric (ex: "5"). This would leading most
32+
// comparison functions using that approach to fail to have transitivity.
33+
if (Number.isFinite(a) && Number.isFinite(b)) {
34+
return a - b;
35+
}
36+
// The default < and > operators are not totally ordered. To allow types to be mixed
37+
// in a single collection, compare types and order values of different types by type.
38+
var ta = typeof a;
39+
var tb = typeof b;
40+
if (ta !== tb) {
41+
return ta < tb ? -1 : 1;
42+
}
43+
if (ta === 'object') {
44+
// standardized JavaScript bug: null is not an object, but typeof says it is
45+
if (a === null)
46+
return b === null ? 0 : -1;
47+
else if (b === null)
48+
return 1;
4749
a = a.valueOf();
48-
if (b)
4950
b = b.valueOf();
50-
return a < b ? -1 : a > b ? 1 : a == b ? 0 : c;
51+
ta = typeof a;
52+
tb = typeof b;
53+
// Deal with the two valueOf()s producing different types
54+
if (ta !== tb) {
55+
return ta < tb ? -1 : 1;
56+
}
57+
}
58+
// a and b are now the same type, and will be a number, string or array
59+
// (which we assume holds numbers or strings), or something unsupported.
60+
if (a < b)
61+
return -1;
62+
if (a > b)
63+
return 1;
64+
if (a === b)
65+
return 0;
66+
// Order NaN less than other numbers
67+
if (Number.isNaN(a))
68+
return Number.isNaN(b) ? 0 : -1;
69+
else if (Number.isNaN(b))
70+
return 1;
71+
// This could be two objects (e.g. [7] and ['7']) that aren't ordered
72+
return Array.isArray(a) ? 0 : Number.NaN;
5173
}
5274
exports.defaultComparator = defaultComparator;
5375
;
76+
function simpleComparator(a, b) {
77+
return a > b ? 1 : a < b ? -1 : 0;
78+
}
79+
exports.simpleComparator = simpleComparator;
80+
;
5481
/**
5582
* A reasonably fast collection of key-value pairs with a powerful API.
5683
* Largely compatible with the standard Map. BTree is a B+ tree data structure,
@@ -119,7 +146,7 @@ var BTree = /** @class */ (function () {
119146
/**
120147
* Initializes an empty B+ tree.
121148
* @param compare Custom function to compare pairs of elements in the tree.
122-
* This is not required for numbers, strings and arrays of numbers/strings.
149+
* If not specified, defaultComparator will be used which is valid as long as K extends DefaultComparable.
123150
* @param entries A set of key-value pairs to initialize the tree
124151
* @param maxNodeSize Branching factor (maximum items or children per node)
125152
* Must be in range 4..256. If undefined or <4 then default is used; if >256 then 256.
@@ -732,8 +759,12 @@ var BTree = /** @class */ (function () {
732759
};
733760
/** Ensures mutations are allowed, reversing the effect of freeze(). */
734761
BTree.prototype.unfreeze = function () {
762+
// @ts-ignore "The operand of a 'delete' operator must be optional."
763+
// (wrong: delete does not affect the prototype.)
735764
delete this.clear;
765+
// @ts-ignore
736766
delete this.set;
767+
// @ts-ignore
737768
delete this.editRange;
738769
};
739770
Object.defineProperty(BTree.prototype, "isFrozen", {
@@ -788,7 +819,6 @@ var BNode = /** @class */ (function () {
788819
// If key not found, returns i^failXor where i is the insertion index.
789820
// Callers that don't care whether there was a match will set failXor=0.
790821
BNode.prototype.indexOf = function (key, failXor, cmp) {
791-
// TODO: benchmark multiple search strategies
792822
var keys = this.keys;
793823
var lo = 0, hi = keys.length, mid = hi >> 1;
794824
while (lo < hi) {

b+tree.test.ts

Lines changed: 179 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import BTree, {IMap, EmptyBTree} from './b+tree';
1+
import BTree, {IMap, EmptyBTree, defaultComparator, simpleComparator} from './b+tree';
22
import SortedArray from './sorted-array';
33
import MersenneTwister from 'mersenne-twister';
44

@@ -14,6 +14,155 @@ function addToBoth<K,V>(a: IMap<K,V>, b: IMap<K,V>, k: K, v: V) {
1414
expect(a.set(k,v)).toEqual(b.set(k,v));
1515
}
1616

17+
describe('defaultComparator', () =>
18+
{
19+
const dateA = new Date(Date.UTC(96, 1, 2, 3, 4, 5));
20+
const dateA2 = new Date(Date.UTC(96, 1, 2, 3, 4, 5));
21+
const dateB = new Date(Date.UTC(96, 1, 2, 3, 4, 6));
22+
const values = [
23+
dateA,
24+
dateA2,
25+
dateB,
26+
dateA.valueOf(),
27+
'24x',
28+
'0',
29+
'1',
30+
'3',
31+
'String',
32+
'10',
33+
0,
34+
"NaN",
35+
NaN,
36+
Infinity,
37+
-0,
38+
-Infinity,
39+
1,
40+
10,
41+
2,
42+
[],
43+
'[]',
44+
[1],
45+
['1']
46+
];
47+
const sorted = [-Infinity, -10, -1, -0, 0, 1, 2, 10, Infinity];
48+
testComparison(defaultComparator, sorted, values, [[dateA, dateA2], [0, -0], [[1], ['1']]]);
49+
});
50+
51+
describe('simpleComparator with non-NaN numbers and null', () =>
52+
{
53+
const sorted = [-Infinity, -10, -1, -0, 0, null, 1, 2, 10, Infinity];
54+
testComparison<number | null>(simpleComparator, sorted, sorted, [[-0, 0], [-0, null], [0, null]]);
55+
});
56+
57+
describe('simpleComparator with strings', () =>
58+
{
59+
const values = [
60+
'24x',
61+
'+0',
62+
'0.0',
63+
'0',
64+
'-0',
65+
'1',
66+
'3',
67+
'String',
68+
'10',
69+
"NaN",
70+
];;
71+
testComparison<string>(simpleComparator, [], values, []);
72+
});
73+
74+
describe('simpleComparator with Date', () =>
75+
{
76+
const dateA = new Date(Date.UTC(96, 1, 2, 3, 4, 5));
77+
const dateA2 = new Date(Date.UTC(96, 1, 2, 3, 4, 5));
78+
const dateB = new Date(Date.UTC(96, 1, 2, 3, 4, 6));
79+
const values = [
80+
dateA,
81+
dateA2,
82+
dateB,
83+
null,
84+
];
85+
testComparison<Date>(simpleComparator, [], values, [[dateA, dateA2]]);
86+
});
87+
88+
describe('simpleComparator arrays', () =>
89+
{
90+
const values = [
91+
[],
92+
[1],
93+
['1'],
94+
[2],
95+
];
96+
testComparison<(number|string)[] >(simpleComparator, [], values, [[[1], ['1']]]);
97+
});
98+
99+
/**
100+
* Tests a comparison function, ensuring it produces a strict partial order over the provided values.
101+
* Additionally confirms that the comparison function has the correct definition of equality via expectedDuplicates.
102+
*/
103+
function testComparison<T>(comparison: (a: T, b: T) => number, inOrder: T[], values: T[], expectedDuplicates: [T, T][] = []) {
104+
function compare(a: T, b: T): number {
105+
const v = comparison(a, b);
106+
expect(typeof v).toEqual('number');
107+
if (v !== v)
108+
console.log('!!!', a, b);
109+
expect(v === v).toEqual(true); // Not NaN
110+
return Math.sign(v);
111+
}
112+
113+
test('comparison has correct order', () => {
114+
expect([...inOrder].sort(comparison)).toMatchObject(inOrder);
115+
});
116+
117+
test('comparison deffierantes values', () => {
118+
let duplicates = [];
119+
for (let i = 0; i < values.length; i++) {
120+
for (let j = i + 1; j < values.length; j++) {
121+
if (compare(values[i], values[j]) === 0) {
122+
duplicates.push([values[i], values[j]]);
123+
}
124+
}
125+
}
126+
expect(duplicates).toMatchObject(expectedDuplicates);
127+
});
128+
129+
test('comparison forms a strict partial ordering', () => {
130+
// To be a strict partial order, the function must be:
131+
// irreflexive: not a < a
132+
// transitive: if a < b and b < c then a < c
133+
// asymmetric: if a < b then not b < a
134+
135+
// Since our comparison has three outputs, we adjust that to, we need to tighten the rules that involve 'not a < b' (where we have two possible outputs) as follows:
136+
// irreflexive: compare(a, a) === 0
137+
// transitive: if compare(a, b) < 0 and compare(b, c) < 0 then compare(a, c) < 0
138+
// asymmetric: sign(compare(a, b)) === -sign(compare(b, a))
139+
140+
// This can is brute forced in O(n^3) time below:
141+
// Violations
142+
const irreflexive = []
143+
const transitive = []
144+
const asymmetric = []
145+
for (const a of values) {
146+
// irreflexive: compare(a, a) === 0
147+
if(compare(a, a) !== 0) irreflexive.push(a);
148+
for (const b of values) {
149+
for (const c of values) {
150+
// transitive: if compare(a, b) < 0 and compare(b, c) < 0 then compare(a, c) < 0
151+
if (compare(a, b) < 0 && compare(b, c) < 0) {
152+
if(compare(a, c) !== -1) transitive.push([a, b, c]);
153+
}
154+
}
155+
// sign(compare(a, b)) === -sign(compare(b, a))
156+
if(compare(a, b) !== -compare(b, a)) asymmetric.push([a, b]);
157+
}
158+
}
159+
expect(irreflexive).toEqual([]);
160+
expect(transitive).toEqual([]);
161+
expect(asymmetric).toEqual([]);
162+
});
163+
}
164+
165+
17166
describe('Simple tests on leaf nodes', () =>
18167
{
19168
test('A few insertions (fanout 8)', insert8.bind(null, 8));
@@ -427,4 +576,33 @@ function testBTree(maxNodeSize: number)
427576
expect(tree.nextLowerPair(undefined)).toEqual([300, 600]);
428577
expect(tree.nextHigherPair(undefined)).toEqual([-10, -20]);
429578
});
579+
580+
test('Regression test for invalid default comparator causing malformed trees', () => {
581+
const key = '24e26f0b-3c1a-47f8-a7a1-e8461ddb69ce6';
582+
const tree = new BTree<string,{}>(undefined, undefined, maxNodeSize);
583+
// The defaultComparator was not transitive for these inputs due to comparing numeric strings to each other numerically,
584+
// but lexically when compared to non-numeric strings. This resulted in keys not being orderable, and the tree behaving incorrectly.
585+
const inputs: [string,{}][] = [
586+
[key, {}],
587+
['0', {}],
588+
['1', {}],
589+
['2', {}],
590+
['3', {}],
591+
['4', {}],
592+
['Cheese', {}],
593+
['10', {}],
594+
['11', {}],
595+
['12', {}],
596+
['13', {}],
597+
['15', {}],
598+
['16', {}],
599+
];
600+
601+
for (const [id, node] of inputs) {
602+
expect( tree.set(id, node)).toBeTruthy();
603+
tree.checkValid();
604+
expect(tree.get(key)).not.toBeUndefined();
605+
}
606+
expect(tree.get(key)).not.toBeUndefined();
607+
});
430608
}

0 commit comments

Comments
 (0)