Skip to content

Commit 750abbc

Browse files
Fix mergeOverlappingSets implementation (#321)
* Fix `mergeOverlappingSets` implementation * More efficient implementation
1 parent c9854de commit 750abbc

File tree

3 files changed

+113
-78
lines changed

3 files changed

+113
-78
lines changed

lib/utils/reorder-alternatives.ts

Lines changed: 99 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ function getDirectionIndependedDeterminismEqClasses(
234234
const ltr = getDirectionalDeterminismEqClasses(alternatives, "ltr", context)
235235
const rtl = getDirectionalDeterminismEqClasses(alternatives, "rtl", context)
236236

237-
const disjoint = mergeOverlappingSets(new Set([...ltr, ...rtl]), (s) => s)
237+
const disjoint = mergeOverlappingSets([...ltr, ...rtl], (s) => s)
238238

239239
const result: (readonly Alternative[])[] = []
240240
for (const sets of disjoint) {
@@ -325,17 +325,17 @@ function getDirectionalDeterminismEqClasses(
325325

326326
/** Subdivide */
327327
function subdivide(
328-
eqClass: ReadonlySet<Prefix>,
328+
eqClass: readonly Prefix[],
329329
index: number,
330330
): (readonly Prefix[])[] {
331-
if (eqClass.size < 2) {
332-
return [[...eqClass]]
331+
if (eqClass.length < 2) {
332+
return [eqClass]
333333
}
334334

335335
for (const prefix of eqClass) {
336336
if (index >= prefix.characters.length) {
337337
// ran out of characters
338-
return [[...eqClass]]
338+
return [eqClass]
339339
}
340340
}
341341

@@ -352,9 +352,73 @@ function getDirectionalDeterminismEqClasses(
352352
return result
353353
}
354354

355-
return subdivide(new Set(prefixes), 0).map((eq) =>
356-
eq.map((p) => p.alternative),
357-
)
355+
return subdivide(prefixes, 0).map((eq) => eq.map((p) => p.alternative))
356+
}
357+
358+
class SetEquivalence {
359+
private readonly indexes: number[]
360+
361+
public readonly count: number
362+
363+
public constructor(count: number) {
364+
this.count = count
365+
this.indexes = []
366+
for (let i = 0; i < count; i++) {
367+
this.indexes.push(i)
368+
}
369+
}
370+
371+
public makeEqual(a: number, b: number): void {
372+
// This works using the following idea:
373+
// 1. If the eq set of a and b is the same, then we can stop.
374+
// 2. If indexes[a] < indexes[b], then we want to make
375+
// indexes[b] := indexes[a]. However, this means that we lose the
376+
// information about the indexes[b]! So we will store
377+
// oldB := indexes[b], then indexes[b] := indexes[a], and then
378+
// make oldB == a.
379+
// 3. If indexes[a] > indexes[b], similar to 2.
380+
381+
let aValue = this.indexes[a]
382+
let bValue = this.indexes[b]
383+
while (aValue !== bValue) {
384+
if (aValue < bValue) {
385+
this.indexes[b] = aValue
386+
// eslint-disable-next-line no-param-reassign -- x
387+
b = bValue
388+
bValue = this.indexes[b]
389+
} else {
390+
this.indexes[a] = bValue
391+
// eslint-disable-next-line no-param-reassign -- x
392+
a = aValue
393+
aValue = this.indexes[a]
394+
}
395+
}
396+
}
397+
398+
/**
399+
* This returns:
400+
*
401+
* 1. `eqSet.count`: How many different equivalence classes there are.
402+
* 2. `eqSet.indexes`: A map (array) from each element (index) to the index
403+
* of its equivalence class.
404+
*
405+
* All equivalence class indexes `eqSet.indexes[i]` are guaranteed to
406+
* be <= `eqSet.count`.
407+
*/
408+
public getEquivalenceSets(): { count: number; indexes: number[] } {
409+
let counter = 0
410+
for (let i = 0; i < this.count; i++) {
411+
if (i === this.indexes[i]) {
412+
this.indexes[i] = counter++
413+
} else {
414+
this.indexes[i] = this.indexes[this.indexes[i]]
415+
}
416+
}
417+
return {
418+
count: counter,
419+
indexes: this.indexes,
420+
}
421+
}
358422
}
359423

360424
/**
@@ -366,90 +430,47 @@ function getDirectionalDeterminismEqClasses(
366430
* This function will not merge the given sets itself. Instead, it will
367431
* return an iterable of sets (`Set<S>`) of sets (`S`) to merge. Each set (`S`)
368432
* is guaranteed to be returned exactly once.
433+
*
434+
* Note: Instead of actual JS `Set` instances, the implementation will treat
435+
* `readonly S[]` instances as sets. This makes the whole implementation a lot
436+
* more efficient.
369437
*/
370-
function* mergeOverlappingSets<S, E>(
371-
sets: ReadonlySet<S>,
438+
function mergeOverlappingSets<S, E>(
439+
sets: readonly S[],
372440
getElements: (set: S) => Iterable<E>,
373-
): Iterable<ReadonlySet<S>> {
374-
if (sets.size < 2) {
375-
yield sets
376-
return
377-
}
378-
379-
interface SetEntry {
380-
readonly type: "Set"
381-
readonly set: Set<S>
441+
): (readonly S[])[] {
442+
if (sets.length < 2) {
443+
return [sets]
382444
}
383-
interface RefEntry {
384-
readonly type: "Ref"
385-
readonly key: E
386-
}
387-
type Entry = SetEntry | RefEntry
388445

389-
// map from base set (index) to prefixes with that base set
390-
const map = new Map<E, Entry>()
391-
let disjoint = true
446+
const eq = new SetEquivalence(sets.length)
447+
const elementMap = new Map<E, number>()
392448

393-
for (const s of sets) {
449+
for (let i = 0; i < sets.length; i++) {
450+
const s = sets[i]
394451
for (const e of getElements(s)) {
395-
const entry = map.get(e)
396-
if (entry === undefined) {
397-
map.set(e, { type: "Set", set: new Set([s]) })
398-
} else if (entry.type === "Set") {
399-
entry.set.add(s)
400-
disjoint = false
401-
}
402-
}
403-
}
404-
405-
// merge entries in the map
406-
407-
/**
408-
* Resolves the given key until the key maps to a set entry.
409-
*/
410-
function resolve(key: E): E {
411-
let e = key
412-
for (;;) {
413-
const entry = map.get(e)
414-
if (entry && entry.type === "Ref") {
415-
e = entry.key
452+
const elementSet = elementMap.get(e)
453+
if (elementSet === undefined) {
454+
// It's the first time we see this element.
455+
elementMap.set(e, i)
416456
} else {
417-
return e
457+
// We've seen this element before in another set.
458+
// Make the 2 sets equal.
459+
eq.makeEqual(i, elementSet)
418460
}
419461
}
420462
}
421463

422-
if (!disjoint) {
423-
// this step is only necessary if the sets aren't all disjoint
464+
const eqSets = eq.getEquivalenceSets()
424465

425-
for (const s of sets) {
426-
const toMergeSet = new Set<E>()
427-
for (const e of getElements(s)) {
428-
toMergeSet.add(resolve(e))
429-
}
430-
431-
if (toMergeSet.size < 2) {
432-
continue
433-
}
434-
435-
const toMerge = [...toMergeSet]
436-
437-
const intoKey = toMerge[0]
438-
const intoSet = map.get(intoKey)! as SetEntry
439-
for (let i = 1; i < toMerge.length; i++) {
440-
const mergeKey = toMerge[i]
441-
const mergeSet = map.get(mergeKey)! as SetEntry
442-
mergeSet.set.forEach((p) => intoSet.set.add(p))
443-
map.set(mergeKey, { type: "Ref", key: intoKey })
444-
}
445-
}
466+
const result: S[][] = []
467+
for (let i = 0; i < eqSets.count; i++) {
468+
result.push([])
446469
}
447-
448-
for (const value of map.values()) {
449-
if (value.type === "Set") {
450-
yield value.set
451-
}
470+
for (let i = 0; i < sets.length; i++) {
471+
result[eqSets.indexes[i]].push(sets[i])
452472
}
473+
return result
453474
}
454475

455476
/**

tests/lib/utils/__snapshots__/reorder-alternatives.ts.snap

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
// Jest Snapshot v1, https://goo.gl/fbAQLP
22

3+
exports[`getDeterminismEqClasses /(?:script|source)_foo|sample/ 1`] = `
4+
Object {
5+
"(?:script|source)": Array [
6+
"script",
7+
"source",
8+
],
9+
"(?:script|source)_foo|sample": Array [
10+
"(?:script|source)_foo",
11+
"sample",
12+
],
13+
}
14+
`;
15+
316
exports[`getDeterminismEqClasses /(int|integer)\\b/ 1`] = `
417
Object {
518
"(int|integer)": Array [

tests/lib/utils/reorder-alternatives.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ describe(getDeterminismEqClasses.name, function () {
7070
/aaaaaaaaaaaaaaaaaaaa|bbbbbbbbbbbbbbbbbbbb|[^][^][^][^][^][^][^][^][^][^][^][^][^][^][^][^][^][^][^][^]/,
7171
/a{20}|b{20}|[^]{20}/,
7272
/a{20}c|b{20}a|[^]{20}b/,
73+
/(?:script|source)_foo|sample/,
7374
]
7475
const directionalRegexes: RegExp[] = [
7576
/abc|a/,

0 commit comments

Comments
 (0)