Skip to content

Commit 6807695

Browse files
Craig MacomberCraig Macomber
authored andcommitted
fix: make defaultComparator provide a strict partial order
1 parent 2e62c0a commit 6807695

File tree

4 files changed

+380
-50
lines changed

4 files changed

+380
-50
lines changed

b+tree.d.ts

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,36 @@ export declare type EditRangeResult<V, R = number> = {
55
break?: R;
66
delete?: boolean;
77
};
8-
/** Compares two numbers, strings, arrays of numbers/strings, Dates,
9-
* or objects that have a valueOf() method returning a number or string.
10-
* Optimized for numbers. Returns 1 if a>b, -1 if a<b, and 0 if a===b.
8+
/**
9+
* numbers, strings and arrays thereof, and objects that have a valueOf() method returning a number or string like Dates.
10+
*/
11+
export declare type DefaultComparable = number | string | (number | string)[] | {
12+
valueOf: () => number | string | (number | string)[];
13+
};
14+
/**
15+
* Compares DefaultComparables to form a strict partial ordering.
16+
*
17+
* Handles +/-0 and NaN like Map: NaN is equal to NaN, and -0 is equal to +0.
18+
*
19+
* Arrays are compared using '<' and '>', which may cause unexpected equality: for example [1] will be considered equal to ['1'].
20+
*
21+
* Two objects with equal valueOf compare the same, but compare unequal to primitives that have the same value.
22+
*/
23+
export declare function defaultComparator(a: DefaultComparable, b: DefaultComparable): number;
24+
/**
25+
* Compares finite numbers to form a strict partial ordering.
26+
*
27+
* Handles +/-0 like Map: -0 is equal to +0.
28+
*/
29+
export declare function compareFiniteNumbers(a: number, b: number): number;
30+
/**
31+
* Compares strings lexically to form a strict partial ordering.
1132
*/
12-
export declare function defaultComparator(a: any, b: any): number;
33+
export declare function compareStrings(a: string, b: string): number;
34+
/**
35+
* If a and b are arrays, they are compared using '<' and '>', which may cause unexpected equality, for example [1] will be considered equal to ['1'].
36+
*/
37+
export declare function compareFiniteNumbersOrStringOrArray(a: number | string | (number | string)[], b: number | string | (number | string)[]): number;
1338
/**
1439
* A reasonably fast collection of key-value pairs with a powerful API.
1540
* Largely compatible with the standard Map. BTree is a B+ tree data structure,
@@ -78,11 +103,15 @@ export default class BTree<K = any, V = any> implements ISortedMapF<K, V>, ISort
78103
private _root;
79104
_size: number;
80105
_maxNodeSize: number;
106+
/**
107+
* provides a total order over keys (and a strict partial order over the type K)
108+
* @returns a negative value if a < b, 0 if a === b and a positive value if a > b
109+
*/
81110
_compare: (a: K, b: K) => number;
82111
/**
83112
* Initializes an empty B+ tree.
84113
* @param compare Custom function to compare pairs of elements in the tree.
85-
* This is not required for numbers, strings and arrays of numbers/strings.
114+
* If not specified, defaultComparator will be used which is valid as long as K extends DefaultComparable.
86115
* @param entries A set of key-value pairs to initialize the tree
87116
* @param maxNodeSize Branching factor (maximum items or children per node)
88117
* Must be in range 4..256. If undefined or <4 then default is used; if >256 then 256.

b+tree.js

Lines changed: 78 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,44 +13,90 @@ var __extends = (this && this.__extends) || (function () {
1313
};
1414
})();
1515
Object.defineProperty(exports, "__esModule", { value: true });
16-
exports.EmptyBTree = exports.defaultComparator = void 0;
17-
// Informative microbenchmarks & stuff:
18-
// http://www.jayconrod.com/posts/52/a-tour-of-v8-object-representation (very educational)
19-
// https://blog.mozilla.org/luke/2012/10/02/optimizing-javascript-variable-access/ (local vars are faster than properties)
20-
// http://benediktmeurer.de/2017/12/13/an-introduction-to-speculative-optimization-in-v8/ (other stuff)
21-
// https://jsperf.com/js-in-operator-vs-alternatives (avoid 'in' operator; `.p!==undefined` faster than `hasOwnProperty('p')` in all browsers)
22-
// https://jsperf.com/instanceof-vs-typeof-vs-constructor-vs-member (speed of type tests varies wildly across browsers)
23-
// https://jsperf.com/detecting-arrays-new (a.constructor===Array is best across browsers, assuming a is an object)
24-
// https://jsperf.com/shallow-cloning-methods (a constructor is faster than Object.create; hand-written clone faster than Object.assign)
25-
// https://jsperf.com/ways-to-fill-an-array (slice-and-replace is fastest)
26-
// https://jsperf.com/math-min-max-vs-ternary-vs-if (Math.min/max is slow on Edge)
27-
// https://jsperf.com/array-vs-property-access-speed (v.x/v.y is faster than a[0]/a[1] in major browsers IF hidden class is constant)
28-
// https://jsperf.com/detect-not-null-or-undefined (`x==null` slightly slower than `x===null||x===undefined` on all browsers)
29-
// Overall, microbenchmarks suggest Firefox is the fastest browser for JavaScript and Edge is the slowest.
30-
// Lessons from https://v8project.blogspot.com/2017/09/elements-kinds-in-v8.html:
31-
// - Avoid holes in arrays. Avoid `new Array(N)`, it will be "holey" permanently.
32-
// - Don't read outside bounds of an array (it scans prototype chain).
33-
// - Small integer arrays are stored differently from doubles
34-
// - Adding non-numbers to an array deoptimizes it permanently into a general array
35-
// - Objects can be used like arrays (e.g. have length property) but are slower
36-
// - V8 source (NewElementsCapacity in src/objects.h): arrays grow by 50% + 16 elements
37-
/** Compares two numbers, strings, arrays of numbers/strings, Dates,
38-
* or objects that have a valueOf() method returning a number or string.
39-
* Optimized for numbers. Returns 1 if a>b, -1 if a<b, and 0 if a===b.
16+
exports.EmptyBTree = exports.compareFiniteNumbersOrStringOrArray = exports.compareStrings = exports.compareFiniteNumbers = exports.defaultComparator = void 0;
17+
/**
18+
* Compares DefaultComparables to form a strict partial ordering.
19+
*
20+
* Handles +/-0 and NaN like Map: NaN is equal to NaN, and -0 is equal to +0.
21+
*
22+
* Arrays are compared using '<' and '>', which may cause unexpected equality: for example [1] will be considered equal to ['1'].
23+
*
24+
* Two objects with equal valueOf compare the same, but compare unequal to primitives that have the same value.
4025
*/
4126
function defaultComparator(a, b) {
42-
var c = a - b;
43-
if (c === c)
44-
return c; // a & b are number
45-
// General case (c is NaN): string / arrays / Date / incomparable things
46-
if (a)
27+
// Compare types first.
28+
// Note that the trick of using 'a - b' the checking for NaN to detect non numbers values does not work if the strings are numeric (ex: "5"),
29+
// leading most comparison functions using that approach to fail to have transitivity.
30+
var ta = typeof a;
31+
var tb = typeof b;
32+
if (ta !== tb) {
33+
return ta < tb ? -1 : 1;
34+
}
35+
if (ta === 'object') {
4736
a = a.valueOf();
48-
if (b)
4937
b = b.valueOf();
50-
return a < b ? -1 : a > b ? 1 : a == b ? 0 : c;
38+
ta = typeof a;
39+
tb = typeof b;
40+
// Deal with one producing a string, and the other a number
41+
if (ta !== tb) {
42+
return ta < tb ? -1 : 1;
43+
}
44+
}
45+
// a and b are now the same type, and either a number, string or array.
46+
// use Object.is to make NaN compare equal to NaN.
47+
// This treats also -0 as not equal to 0, which is handled separately below.
48+
if (Object.is(a, b))
49+
return 0;
50+
// All comparisons with NaN return false, so NaNs will pass here.
51+
if (a < b)
52+
return -1;
53+
// Since a and b might be arrays, we cannot rely on === or ==, only < and > do something useful for ordering arrays.
54+
// To find if two arrays are equal using comparison operators, both < and > must be checked (even == returns false if not the same object).
55+
if (a > b)
56+
return 1;
57+
// Order NaN less than other numbers
58+
if (Number.isNaN(a))
59+
return -1;
60+
if (Number.isNaN(b))
61+
return 1;
62+
// Handles 0 and -0 case, as well as equal (but not same object) arrays case.
63+
return 0;
5164
}
5265
exports.defaultComparator = defaultComparator;
5366
;
67+
/**
68+
* Compares finite numbers to form a strict partial ordering.
69+
*
70+
* Handles +/-0 like Map: -0 is equal to +0.
71+
*/
72+
function compareFiniteNumbers(a, b) {
73+
return a - b;
74+
}
75+
exports.compareFiniteNumbers = compareFiniteNumbers;
76+
;
77+
/**
78+
* Compares strings lexically to form a strict partial ordering.
79+
*/
80+
function compareStrings(a, b) {
81+
return a > b ? 1 : a === b ? 0 : -1;
82+
}
83+
exports.compareStrings = compareStrings;
84+
;
85+
/**
86+
* If a and b are arrays, they are compared using '<' and '>', which may cause unexpected equality, for example [1] will be considered equal to ['1'].
87+
*/
88+
function compareFiniteNumbersOrStringOrArray(a, b) {
89+
// Strings can not be ordered relative to numbers using '<' and '>' since no matter the order, the comparison will return false.
90+
var ta = typeof a;
91+
var tb = typeof b;
92+
if (ta !== tb) {
93+
return ta < tb ? -1 : 1;
94+
}
95+
// Use < and > instead of < and === so arrays work correctly.
96+
return a > b ? 1 : a < b ? -1 : 0;
97+
}
98+
exports.compareFiniteNumbersOrStringOrArray = compareFiniteNumbersOrStringOrArray;
99+
;
54100
/**
55101
* A reasonably fast collection of key-value pairs with a powerful API.
56102
* Largely compatible with the standard Map. BTree is a B+ tree data structure,
@@ -119,7 +165,7 @@ var BTree = /** @class */ (function () {
119165
/**
120166
* Initializes an empty B+ tree.
121167
* @param compare Custom function to compare pairs of elements in the tree.
122-
* This is not required for numbers, strings and arrays of numbers/strings.
168+
* If not specified, defaultComparator will be used which is valid as long as K extends DefaultComparable.
123169
* @param entries A set of key-value pairs to initialize the tree
124170
* @param maxNodeSize Branching factor (maximum items or children per node)
125171
* Must be in range 4..256. If undefined or <4 then default is used; if >256 then 256.

b+tree.test.ts

Lines changed: 177 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import BTree, {IMap, EmptyBTree} from './b+tree';
1+
import BTree, {IMap, EmptyBTree, defaultComparator, compareFiniteNumbers, compareFiniteNumbersOrStringOrArray, compareStrings} from './b+tree';
22
import SortedArray from './sorted-array';
33
import MersenneTwister from 'mersenne-twister';
44

@@ -14,6 +14,153 @@ function addToBoth<K,V>(a: IMap<K,V>, b: IMap<K,V>, k: K, v: V) {
1414
expect(a.set(k,v)).toEqual(b.set(k,v));
1515
}
1616

17+
describe('defaultComparator', () =>
18+
{
19+
const dateA = new Date(Date.UTC(96, 1, 2, 3, 4, 5));
20+
const dateA2 = new Date(Date.UTC(96, 1, 2, 3, 4, 5));
21+
const dateB = new Date(Date.UTC(96, 1, 2, 3, 4, 6));
22+
const values = [
23+
dateA,
24+
dateA2,
25+
dateB,
26+
dateA.valueOf(),
27+
'24x',
28+
'0',
29+
'1',
30+
'3',
31+
'String',
32+
'10',
33+
0,
34+
"NaN",
35+
NaN,
36+
Infinity,
37+
-0,
38+
-Infinity,
39+
1,
40+
10,
41+
2,
42+
[],
43+
'[]',
44+
[1],
45+
['1']
46+
];
47+
const sorted = [-Infinity, -10, -1, -0, 0, 1, 2, 10, Infinity];
48+
testComparison(defaultComparator, sorted, values, [[dateA, dateA2], [0, -0], [[1], ['1']]]);
49+
});
50+
51+
describe('compareFiniteNumbers', () =>
52+
{
53+
const sorted = [-10, -1, -0, 0, 1, 2, 10];
54+
testComparison(compareFiniteNumbers, sorted, sorted, [[-0, 0]]);
55+
});
56+
57+
describe('compareStrings', () =>
58+
{
59+
const values = [
60+
'24x',
61+
'+0',
62+
'0.0',
63+
'0',
64+
'-0',
65+
'1',
66+
'3',
67+
'String',
68+
'10',
69+
"NaN",
70+
];;
71+
testComparison(compareStrings, [], values, []);
72+
});
73+
74+
describe('compareFiniteNumbersOrStringOrArray', () =>
75+
{
76+
const values = [
77+
'24x',
78+
'0',
79+
'1',
80+
'3',
81+
'String',
82+
'10',
83+
0,
84+
"NaN",
85+
-0,
86+
1,
87+
10,
88+
2,
89+
[],
90+
'[]',
91+
[1],
92+
['1']
93+
];
94+
const sorted = [-10, -1, -0, 0, 1, 2, 10];
95+
testComparison(compareFiniteNumbersOrStringOrArray, sorted, values, [[0, -0], [[1], ['1']]]);
96+
});
97+
98+
99+
/**
100+
* Tests a comparison function, ensuring it produces a strict partial order over the provided values.
101+
* Additionally confirms that the comparison function has the correct definition of equality via expectedDuplicates.
102+
*/
103+
function testComparison(comparison: (a: any, b: any) => number, inOrder: any[], values: any[], expectedDuplicates: [any, any][] = []) {
104+
function check(a: any, b: any): number {
105+
const v = comparison(a, b);
106+
expect(typeof v).toEqual('number');
107+
expect(v === v).toEqual(true); // Not NaN
108+
return Math.sign(v);
109+
}
110+
111+
test('comparison has correct order', () => {
112+
expect([...inOrder].sort(comparison)).toMatchObject(inOrder);
113+
});
114+
115+
test('comparison deffierantes values', () => {
116+
let duplicates = [];
117+
for (let i = 0; i < values.length; i++) {
118+
for (let j = i + 1; j < values.length; j++) {
119+
if (check(values[i], values[j]) === 0) {
120+
duplicates.push([values[i], values[j]]);
121+
}
122+
}
123+
}
124+
expect(duplicates).toMatchObject(expectedDuplicates);
125+
});
126+
127+
test('comparison forms a strict partial ordering', () => {
128+
// To be a strict partial order, the function must be:
129+
// irreflexive: not a < a
130+
// transitive: if a < b and b < c then a < c
131+
// asymmetric: if a < b then not b < a
132+
133+
// Since our comparison has three outputs, we adjust that to, we need to tighten the rules that involve 'not a < b' (where we have two possible outputs) as follows:
134+
// irreflexive: compare(a, a) === 0
135+
// transitive: if compare(a, b) < 0 and compare(b, c) < 0 then compare(a, c) < 0
136+
// asymmetric: sign(compare(a, b)) === -sign(compare(b, a))
137+
138+
// This can is brute forced in O(n^3) time below:
139+
// Violations
140+
const irreflexive = []
141+
const transitive = []
142+
const asymmetric = []
143+
for (const a of values) {
144+
// irreflexive: compare(a, a) === 0
145+
if(check(a, a) !== 0) irreflexive.push(a);
146+
for (const b of values) {
147+
for (const c of values) {
148+
// transitive: if compare(a, b) < 0 and compare(b, c) < 0 then compare(a, c) < 0
149+
if (check(a, b) < 0 && check(b, c) < 0) {
150+
if(check(a, c) !== -1) transitive.push([a, b, c]);
151+
}
152+
}
153+
// sign(compare(a, b)) === -sign(compare(b, a))
154+
if(check(a, b) !== -check(b, a)) asymmetric.push([a, b]);
155+
}
156+
}
157+
expect(irreflexive).toEqual([]);
158+
expect(transitive).toEqual([]);
159+
expect(asymmetric).toEqual([]);
160+
});
161+
}
162+
163+
17164
describe('Simple tests on leaf nodes', () =>
18165
{
19166
test('A few insertions (fanout 8)', insert8.bind(null, 8));
@@ -427,4 +574,33 @@ function testBTree(maxNodeSize: number)
427574
expect(tree.nextLowerPair(undefined)).toEqual([300, 600]);
428575
expect(tree.nextHigherPair(undefined)).toEqual([-10, -20]);
429576
});
577+
578+
test('Regression test for invalid default comparator causing malformed trees', () => {
579+
const key = '24e26f0b-3c1a-47f8-a7a1-e8461ddb69ce6';
580+
const tree = new BTree<string,{}>(undefined, undefined, maxNodeSize);
581+
// The defaultComparator was not transitive for these inputs due to comparing numeric strings to each other numerically,
582+
// but lexically when compared to non-numeric strings. This resulted in keys not being orderable, and the tree behaving incorrectly.
583+
const inputs: [string,{}][] = [
584+
[key, {}],
585+
['0', {}],
586+
['1', {}],
587+
['2', {}],
588+
['3', {}],
589+
['4', {}],
590+
['Cheese', {}],
591+
['10', {}],
592+
['11', {}],
593+
['12', {}],
594+
['13', {}],
595+
['15', {}],
596+
['16', {}],
597+
];
598+
599+
for (const [id, node] of inputs) {
600+
expect( tree.set(id, node)).toBeTruthy();
601+
tree.checkValid();
602+
expect(tree.get(key)).not.toBeUndefined();
603+
}
604+
expect(tree.get(key)).not.toBeUndefined();
605+
});
430606
}

0 commit comments

Comments
 (0)