Skip to content

Commit 9758b8e

Browse files
committed
add unit test
1 parent f325c93 commit 9758b8e

File tree

2 files changed

+263
-12
lines changed

2 files changed

+263
-12
lines changed

packages/firestore/src/util/misc.ts

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ export interface Equatable<T> {
7777

7878
/** Compare strings in UTF-8 encoded byte order */
7979
export function compareUtf8Strings(left: string, right: string): number {
80-
let i = 0;
81-
while (i < left.length && i < right.length) {
80+
for (let i = 0; i < left.length && i < right.length; i++) {
8281
const leftCodePoint = left.codePointAt(i)!;
8382
const rightCodePoint = right.codePointAt(i)!;
8483

@@ -90,9 +89,8 @@ export function compareUtf8Strings(left: string, right: string): number {
9089
// Lazy instantiate TextEncoder
9190
const encoder = newTextEncoder();
9291

93-
// UTF-8 encoded byte comparison, substring 2 indexes to cover surrogate pairs
94-
const leftBytes = encoder.encode(left.substring(i, i + 2));
95-
const rightBytes = encoder.encode(right.substring(i, i + 2));
92+
const leftBytes = encoder.encode(getUtf8SafeSubstring(left, i));
93+
const rightBytes = encoder.encode(getUtf8SafeSubstring(right, i));
9694
for (
9795
let j = 0;
9896
j < Math.min(leftBytes.length, rightBytes.length);
@@ -103,20 +101,25 @@ export function compareUtf8Strings(left: string, right: string): number {
103101
return comparison;
104102
}
105103
}
106-
107-
// Compare lengths if all bytes are equal
108-
return primitiveComparator(leftBytes.length, rightBytes.length);
109104
}
110105
}
111-
112-
// Increment by 2 for surrogate pairs, 1 otherwise
113-
i += leftCodePoint > 0xffff ? 2 : 1;
114106
}
115107

116108
// Compare lengths if all characters are equal
117109
return primitiveComparator(left.length, right.length);
118110
}
119111

112+
function getUtf8SafeSubstring(str: string, index: number): string {
113+
const firstCodePoint = str.codePointAt(index)!;
114+
if (firstCodePoint > 0xffff) {
115+
// It's a surrogate pair, return the whole pair
116+
return str.substring(index, index + 2);
117+
} else {
118+
// It's a single code point, return it
119+
return str.substring(index, index + 1);
120+
}
121+
}
122+
120123
export interface Iterable<V> {
121124
forEach: (cb: (v: V) => void) => void;
122125
}

packages/firestore/test/unit/util/misc.test.ts

Lines changed: 249 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import { expect } from 'chai';
1919

2020
import { debugCast } from '../../../src/util/assert';
21-
import { immediateSuccessor } from '../../../src/util/misc';
21+
import { compareUtf8Strings, immediateSuccessor } from '../../../src/util/misc';
2222
import { mask } from '../../util/helpers';
2323

2424
describe('immediateSuccessor', () => {
@@ -53,3 +53,251 @@ describe('FieldMask', () => {
5353
);
5454
});
5555
});
56+
57+
class StringPair {
58+
constructor(public s1: string, public s2: string) {}
59+
}
60+
61+
class StringPairGenerator {
62+
constructor(private stringGenerator: StringGenerator) {}
63+
64+
next(): StringPair {
65+
const prefix = this.stringGenerator.next();
66+
const s1 = prefix + this.stringGenerator.next();
67+
const s2 = prefix + this.stringGenerator.next();
68+
return new StringPair(s1, s2);
69+
}
70+
}
71+
72+
class StringGenerator {
73+
private static readonly DEFAULT_SURROGATE_PAIR_PROBABILITY = 0.33;
74+
private static readonly DEFAULT_MAX_LENGTH = 20;
75+
76+
// The first Unicode code point that is in the basic multilingual plane ("BMP") and,
77+
// therefore requires 1 UTF-16 code unit to be represented in UTF-16.
78+
private static readonly MIN_BMP_CODE_POINT = 0x00000000;
79+
80+
// The last Unicode code point that is in the basic multilingual plane ("BMP") and,
81+
// therefore requires 1 UTF-16 code unit to be represented in UTF-16.
82+
private static readonly MAX_BMP_CODE_POINT = 0x0000ffff;
83+
84+
// The first Unicode code point that is outside of the basic multilingual plane ("BMP") and,
85+
// therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16.
86+
private static readonly MIN_SUPPLEMENTARY_CODE_POINT = 0x00010000;
87+
88+
// The last Unicode code point that is outside of the basic multilingual plane ("BMP") and,
89+
// therefore requires 2 UTF-16 code units, a surrogate pair, to be represented in UTF-16.
90+
private static readonly MAX_SUPPLEMENTARY_CODE_POINT = 0x0010ffff;
91+
92+
private readonly rnd: Random;
93+
private readonly surrogatePairProbability: number;
94+
private readonly maxLength: number;
95+
96+
constructor(seed: number);
97+
constructor(rnd: Random, surrogatePairProbability: number, maxLength: number);
98+
constructor(
99+
seedOrRnd: number | Random,
100+
surrogatePairProbability?: number,
101+
maxLength?: number
102+
) {
103+
if (typeof seedOrRnd === 'number') {
104+
this.rnd = new Random(seedOrRnd);
105+
this.surrogatePairProbability =
106+
StringGenerator.DEFAULT_SURROGATE_PAIR_PROBABILITY;
107+
this.maxLength = StringGenerator.DEFAULT_MAX_LENGTH;
108+
} else {
109+
this.rnd = seedOrRnd;
110+
this.surrogatePairProbability = StringGenerator.validateProbability(
111+
'surrogate pair',
112+
surrogatePairProbability!
113+
);
114+
this.maxLength = StringGenerator.validateLength(
115+
'maximum string',
116+
maxLength!
117+
);
118+
}
119+
}
120+
121+
private static validateProbability(
122+
name: string,
123+
probability: number
124+
): number {
125+
if (!Number.isFinite(probability)) {
126+
throw new Error(
127+
`invalid ${name} probability: ${probability} (must be between 0.0 and 1.0, inclusive)`
128+
);
129+
} else if (probability < 0.0) {
130+
throw new Error(
131+
`invalid ${name} probability: ${probability} (must be greater than or equal to zero)`
132+
);
133+
} else if (probability > 1.0) {
134+
throw new Error(
135+
`invalid ${name} probability: ${probability} (must be less than or equal to 1)`
136+
);
137+
}
138+
return probability;
139+
}
140+
141+
private static validateLength(name: string, length: number): number {
142+
if (length < 0) {
143+
throw new Error(
144+
`invalid ${name} length: ${length} (must be greater than or equal to zero)`
145+
);
146+
}
147+
return length;
148+
}
149+
150+
next(): string {
151+
const length = this.rnd.nextInt(this.maxLength + 1);
152+
const sb = new StringBuilder();
153+
while (sb.length() < length) {
154+
const codePoint = this.nextCodePoint();
155+
sb.appendCodePoint(codePoint);
156+
}
157+
return sb.toString();
158+
}
159+
160+
private isNextSurrogatePair(): boolean {
161+
return StringGenerator.nextBoolean(this.rnd, this.surrogatePairProbability);
162+
}
163+
164+
private static nextBoolean(rnd: Random, probability: number): boolean {
165+
if (probability === 0.0) {
166+
return false;
167+
} else if (probability === 1.0) {
168+
return true;
169+
} else {
170+
return rnd.nextFloat() < probability;
171+
}
172+
}
173+
174+
private nextCodePoint(): number {
175+
if (this.isNextSurrogatePair()) {
176+
return this.nextSurrogateCodePoint();
177+
} else {
178+
return this.nextNonSurrogateCodePoint();
179+
}
180+
}
181+
182+
private nextSurrogateCodePoint(): number {
183+
const highSurrogateMin = 0xd800;
184+
const highSurrogateMax = 0xdbff;
185+
const lowSurrogateMin = 0xdc00;
186+
const lowSurrogateMax = 0xdfff;
187+
188+
const highSurrogate = this.nextCodePointRange(
189+
highSurrogateMin,
190+
highSurrogateMax
191+
);
192+
const lowSurrogate = this.nextCodePointRange(
193+
lowSurrogateMin,
194+
lowSurrogateMax
195+
);
196+
197+
return (highSurrogate - 0xd800) * 0x400 + (lowSurrogate - 0xdc00) + 0x10000;
198+
}
199+
200+
private nextNonSurrogateCodePoint(): number {
201+
return this.nextCodePointRange(
202+
StringGenerator.MIN_BMP_CODE_POINT,
203+
StringGenerator.MAX_BMP_CODE_POINT
204+
);
205+
}
206+
207+
private nextCodePointRange(min: number, max: number): number {
208+
const rangeSize = max - min + 1;
209+
const offset = this.rnd.nextInt(rangeSize);
210+
return min + offset;
211+
}
212+
213+
// private nextCodePointRange(min: number, max: number, expectedCharCount: number): number {
214+
// const rangeSize = max - min;
215+
// const offset = this.rnd.nextInt(rangeSize);
216+
// const codePoint = min + offset;
217+
// if (String.fromCharCode(codePoint).length !== expectedCharCount) {
218+
// throw new Error(
219+
// `internal error vqgqnxcy97: Character.charCount(${codePoint}) returned ${
220+
// String.fromCharCode(codePoint).length
221+
// }, but expected ${expectedCharCount}`,
222+
// );
223+
// }
224+
// return codePoint;
225+
// }
226+
}
227+
228+
class Random {
229+
private seed: number;
230+
231+
constructor(seed: number) {
232+
this.seed = seed;
233+
}
234+
235+
nextInt(max: number): number {
236+
this.seed = (this.seed * 9301 + 49297) % 233280;
237+
const rnd = this.seed / 233280;
238+
return Math.floor(rnd * max);
239+
}
240+
241+
nextFloat(): number {
242+
this.seed = (this.seed * 9301 + 49297) % 233280;
243+
return this.seed / 233280;
244+
}
245+
}
246+
247+
class StringBuilder {
248+
private buffer: string[] = [];
249+
250+
append(str: string): StringBuilder {
251+
this.buffer.push(str);
252+
return this;
253+
}
254+
255+
appendCodePoint(codePoint: number): StringBuilder {
256+
this.buffer.push(String.fromCodePoint(codePoint));
257+
return this;
258+
}
259+
260+
toString(): string {
261+
return this.buffer.join('');
262+
}
263+
264+
length(): number {
265+
return this.buffer.join('').length;
266+
}
267+
}
268+
269+
describe('CompareUtf8Strings', () => {
270+
it('testCompareUtf8Strings', () => {
271+
const errors = [];
272+
const seed = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER);
273+
let passCount = 0;
274+
const stringGenerator = new StringGenerator(new Random(seed), 0.33, 20);
275+
const stringPairGenerator = new StringPairGenerator(stringGenerator);
276+
277+
for (let i = 0; i < 1000000 && errors.length < 10; i++) {
278+
const { s1, s2 } = stringPairGenerator.next();
279+
280+
const actual = compareUtf8Strings(s1, s2);
281+
const expected = Buffer.from(s1, 'utf8').compare(Buffer.from(s2, 'utf8'));
282+
283+
if (actual === expected) {
284+
passCount++;
285+
} else {
286+
errors.push(
287+
`compareUtf8Strings(s1="${s1}", s2="${s2}") returned ${actual}, ` +
288+
`but expected ${expected} (i=${i}, s1.length=${s1.length}, s2.length=${s2.length})`
289+
);
290+
}
291+
}
292+
293+
if (errors.length > 0) {
294+
console.error(
295+
`${errors.length} test cases failed, ${passCount} test cases passed, seed=${seed};`
296+
);
297+
errors.forEach((error, index) =>
298+
console.error(`errors[${index}]: ${error}`)
299+
);
300+
throw new Error('Test failed');
301+
}
302+
}).timeout(10000);
303+
});

0 commit comments

Comments
 (0)