Skip to content

Commit 912e19b

Browse files
committed
fixes and formating
1 parent 71033ea commit 912e19b

14 files changed

+634
-257
lines changed

packages/d2mini/src/indexes.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ export class Index<K, V> {
1010
#inner: DefaultMap<K, Map<V, number>>
1111

1212
constructor() {
13-
this.#inner = new DefaultMap<K, Map<V, number>>(
14-
() => new Map<V, number>(),
15-
)
13+
this.#inner = new DefaultMap<K, Map<V, number>>(() => new Map<V, number>())
1614
// #inner is a map of:
1715
// {
1816
// [key]: Map<V, number> // Direct value-to-multiplicity mapping
@@ -58,7 +56,7 @@ export class Index<K, V> {
5856
const valueMap = this.#inner.get(key)
5957
const existingMultiplicity = valueMap.get(val) ?? 0
6058
const newMultiplicity = existingMultiplicity + multiplicity
61-
59+
6260
if (multiplicity !== 0) {
6361
if (newMultiplicity === 0) {
6462
valueMap.delete(val)

packages/d2mini/src/multiset.ts

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
import { DefaultMap, chunkedArrayPush, hash } from './utils.js'
1+
import {
2+
DefaultMap,
3+
chunkedArrayPush,
4+
hash,
5+
globalObjectIdGenerator,
6+
} from './utils.js'
27

38
export type MultiSetArray<T> = [T, number][]
49
export type KeyedData<T> = [key: string, value: T]
@@ -66,6 +71,101 @@ export class MultiSet<T> {
6671
* (record, multiplicity) pair.
6772
*/
6873
consolidate(): MultiSet<T> {
74+
// Check if this looks like a keyed multiset (first item is a tuple of length 2)
75+
if (this.#inner.length > 0) {
76+
const firstItem = this.#inner[0][0]
77+
if (Array.isArray(firstItem) && firstItem.length === 2) {
78+
return this.#consolidateKeyed()
79+
}
80+
}
81+
82+
// Fall back to original method for unkeyed data
83+
return this.#consolidateUnkeyed()
84+
}
85+
86+
/**
87+
* Private method for consolidating keyed multisets where keys are strings/numbers
88+
* and values are compared by reference equality.
89+
*
90+
* This method provides significant performance improvements over the hash-based approach
91+
* by using WeakMap for object reference tracking and avoiding expensive serialization.
92+
*
93+
* Special handling for join operations: When values are tuples of length 2 (common in joins),
94+
* we unpack them and compare each element individually to maintain proper equality semantics.
95+
*/
96+
#consolidateKeyed(): MultiSet<T> {
97+
const consolidated = new Map<string, number>()
98+
const values = new Map<string, T>()
99+
100+
// Use global object ID generator for consistent reference equality
101+
102+
/**
103+
* Special handler for tuples (arrays of length 2) commonly produced by join operations.
104+
* Unpacks the tuple and generates an ID based on both elements to ensure proper
105+
* consolidation of join results like ['A', null] and [null, 'X'].
106+
*/
107+
const getTupleId = (tuple: any[]): string => {
108+
if (tuple.length !== 2) {
109+
throw new Error('Expected tuple of length 2')
110+
}
111+
const [first, second] = tuple
112+
return `${globalObjectIdGenerator.getStringId(first)}|${globalObjectIdGenerator.getStringId(second)}`
113+
}
114+
115+
// Process each item in the multiset
116+
for (const [data, multiplicity] of this.#inner) {
117+
// Verify this is still a keyed item (should be [key, value] pair)
118+
if (!Array.isArray(data) || data.length !== 2) {
119+
// Found non-keyed item, fall back to unkeyed consolidation
120+
return this.#consolidateUnkeyed()
121+
}
122+
123+
const [key, value] = data
124+
125+
// Verify key is string or number as expected for keyed multisets
126+
if (typeof key !== 'string' && typeof key !== 'number') {
127+
// Found non-string/number key, fall back to unkeyed consolidation
128+
return this.#consolidateUnkeyed()
129+
}
130+
131+
// Generate value ID with special handling for join tuples
132+
let valueId: string
133+
if (Array.isArray(value) && value.length === 2) {
134+
// Special case: value is a tuple from join operations
135+
valueId = getTupleId(value)
136+
} else {
137+
// Regular case: use reference/value equality
138+
valueId = globalObjectIdGenerator.getStringId(value)
139+
}
140+
141+
// Create composite key and consolidate
142+
const compositeKey = key + '|' + valueId
143+
consolidated.set(
144+
compositeKey,
145+
(consolidated.get(compositeKey) || 0) + multiplicity,
146+
)
147+
148+
// Store the original data for the first occurrence
149+
if (!values.has(compositeKey)) {
150+
values.set(compositeKey, data as T)
151+
}
152+
}
153+
154+
// Build result array, filtering out zero multiplicities
155+
const result: MultiSetArray<T> = []
156+
for (const [compositeKey, multiplicity] of consolidated) {
157+
if (multiplicity !== 0) {
158+
result.push([values.get(compositeKey)!, multiplicity])
159+
}
160+
}
161+
162+
return new MultiSet(result)
163+
}
164+
165+
/**
166+
* Private method for consolidating unkeyed multisets using the original approach.
167+
*/
168+
#consolidateUnkeyed(): MultiSet<T> {
69169
const consolidated = new DefaultMap<string | number, number>(() => 0)
70170
const values = new Map<string, any>()
71171

packages/d2mini/src/operators/topKWithFractionalIndex.ts

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { MultiSet } from '../multiset.js'
99
import { Index } from '../indexes.js'
1010
import { generateKeyBetween } from 'fractional-indexing'
1111
import { binarySearch } from '../utils.js'
12+
import { globalObjectIdGenerator } from '../utils.js'
1213

1314
export interface TopKWithFractionalIndexOptions {
1415
limit?: number
@@ -203,7 +204,10 @@ export class TopKWithFractionalIndexOperator<K, V1> extends UnaryOperator<
203204
protected createTopK(
204205
offset: number,
205206
limit: number,
206-
comparator: (a: TieBreakerTaggedValue<V1>, b: TieBreakerTaggedValue<V1>) => number,
207+
comparator: (
208+
a: TieBreakerTaggedValue<V1>,
209+
b: TieBreakerTaggedValue<V1>,
210+
) => number,
207211
): TopK<TieBreakerTaggedValue<V1>> {
208212
return new TopKArray(offset, limit, comparator)
209213
}
@@ -232,7 +236,10 @@ export class TopKWithFractionalIndexOperator<K, V1> extends UnaryOperator<
232236
this.#index.addValue(key, [value, multiplicity])
233237
const newMultiplicity = this.#index.getMultiplicity(key, value)
234238

235-
let res: TopKChanges<TieBreakerTaggedValue<V1>> = { moveIn: null, moveOut: null }
239+
let res: TopKChanges<TieBreakerTaggedValue<V1>> = {
240+
moveIn: null,
241+
moveOut: null,
242+
}
236243
if (oldMultiplicity <= 0 && newMultiplicity > 0) {
237244
// The value was invisible but should now be visible
238245
// Need to insert it into the array of sorted values
@@ -334,43 +341,19 @@ function mapValue<V, W>(
334341
return [f(getValue(value)), getIndex(value)]
335342
}
336343

337-
// Abstraction for values tagged with a tie breaker
338-
// Object identity-based tie-breaking using WeakMap
339-
const objectIds = new WeakMap<object, number>()
340-
let nextObjectId = 0
341-
342-
function getObjectId(value: any): number {
343-
// For primitives, use a simple hash of their string representation
344-
if (typeof value !== 'object' || value === null) {
345-
// Simple string-based hash for primitives to ensure consistency
346-
const str = String(value)
347-
let hash = 0
348-
for (let i = 0; i < str.length; i++) {
349-
const char = str.charCodeAt(i)
350-
hash = ((hash << 5) - hash) + char
351-
hash = hash & hash // Convert to 32-bit integer
352-
}
353-
return hash
354-
}
355-
356-
// For objects, use WeakMap to assign unique IDs
357-
if (!objectIds.has(value)) {
358-
objectIds.set(value, nextObjectId++)
359-
}
360-
return objectIds.get(value)!
361-
}
362-
363344
export type TieBreaker = number
364345
export type TieBreakerTaggedValue<V> = [V, TieBreaker]
365346

366347
function tagValue<V>(value: V): TieBreakerTaggedValue<V> {
367-
return [value, getObjectId(value)]
348+
return [value, globalObjectIdGenerator.getId(value)]
368349
}
369350

370351
function untagValue<V>(tieBreakerTaggedValue: TieBreakerTaggedValue<V>): V {
371352
return tieBreakerTaggedValue[0]
372353
}
373354

374-
function getTieBreaker<V>(tieBreakerTaggedValue: TieBreakerTaggedValue<V>): TieBreaker {
355+
function getTieBreaker<V>(
356+
tieBreakerTaggedValue: TieBreakerTaggedValue<V>,
357+
): TieBreaker {
375358
return tieBreakerTaggedValue[1]
376359
}

packages/d2mini/src/operators/topKWithFractionalIndexBTree.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,10 @@ export class TopKWithFractionalIndexBTreeOperator<
240240
protected override createTopK(
241241
offset: number,
242242
limit: number,
243-
comparator: (a: TieBreakerTaggedValue<V1>, b: TieBreakerTaggedValue<V1>) => number,
243+
comparator: (
244+
a: TieBreakerTaggedValue<V1>,
245+
b: TieBreakerTaggedValue<V1>,
246+
) => number,
244247
): TopK<TieBreakerTaggedValue<V1>> {
245248
if (!BTree) {
246249
throw new Error(

packages/d2mini/src/utils.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,53 @@ export function binarySearch<T>(
111111
}
112112
return low
113113
}
114+
115+
/**
116+
* Utility for generating unique IDs for objects and values.
117+
* Uses WeakMap for object reference tracking and consistent hashing for primitives.
118+
*/
119+
export class ObjectIdGenerator {
120+
private objectIds = new WeakMap<object, number>()
121+
private nextId = 0
122+
123+
/**
124+
* Get a unique identifier for any value.
125+
* - Objects: Uses WeakMap for reference-based identity
126+
* - Primitives: Uses consistent string-based hashing
127+
*/
128+
getId(value: any): number {
129+
// For primitives, use a simple hash of their string representation
130+
if (typeof value !== 'object' || value === null) {
131+
const str = String(value)
132+
let hash = 0
133+
for (let i = 0; i < str.length; i++) {
134+
const char = str.charCodeAt(i)
135+
hash = (hash << 5) - hash + char
136+
hash = hash & hash // Convert to 32-bit integer
137+
}
138+
return hash
139+
}
140+
141+
// For objects, use WeakMap to assign unique IDs
142+
if (!this.objectIds.has(value)) {
143+
this.objectIds.set(value, this.nextId++)
144+
}
145+
return this.objectIds.get(value)!
146+
}
147+
148+
/**
149+
* Get a string representation of the ID for use in composite keys.
150+
*/
151+
getStringId(value: any): string {
152+
if (value === null) return 'null'
153+
if (value === undefined) return 'undefined'
154+
if (typeof value !== 'object') return String(value)
155+
156+
return `obj_${this.getId(value)}`
157+
}
158+
}
159+
160+
/**
161+
* Global instance for cases where a shared object ID space is needed.
162+
*/
163+
export const globalObjectIdGenerator = new ObjectIdGenerator()

packages/d2mini/tests/operators/count.test.ts

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ import { D2 } from '../../src/d2.js'
33
import { MultiSet } from '../../src/multiset.js'
44
import { count } from '../../src/operators/count.js'
55
import { output } from '../../src/operators/output.js'
6-
import { KeyedMessageTracker, assertKeyedResults, assertOnlyKeysAffected } from '../test-utils.js'
6+
import {
7+
KeyedMessageTracker,
8+
assertKeyedResults,
9+
assertOnlyKeysAffected,
10+
} from '../test-utils.js'
711

812
describe('Operators', () => {
913
describe('Count operation', () => {
@@ -40,10 +44,10 @@ function testCount() {
4044
graph.run()
4145

4246
const result = tracker.getResult()
43-
47+
4448
// Assert only keys that have values are affected
4549
assertOnlyKeysAffected('basic count operation', result.messages, [1, 2, 3])
46-
50+
4751
// Assert the final materialized results are correct
4852
assertKeyedResults(
4953
'basic count operation',
@@ -53,7 +57,7 @@ function testCount() {
5357
[2, 3], // 3 values for key 2
5458
[3, 1], // 1 value for key 3 (1 + (-1) + 1 = 1)
5559
],
56-
6 // Expected message count
60+
6, // Expected message count
5761
)
5862
})
5963

@@ -80,18 +84,22 @@ function testCount() {
8084
graph.run()
8185

8286
const result = tracker.getResult()
83-
87+
8488
// Assert only key 1 is affected
85-
assertOnlyKeysAffected('count with all negative multiplicities', result.messages, [1])
86-
89+
assertOnlyKeysAffected(
90+
'count with all negative multiplicities',
91+
result.messages,
92+
[1],
93+
)
94+
8795
// Assert the final materialized results are correct
8896
assertKeyedResults(
8997
'count with all negative multiplicities',
9098
result,
9199
[
92100
[1, -3], // -1 + (-2) = -3
93101
],
94-
2 // Expected message count
102+
2, // Expected message count
95103
)
96104
})
97105

@@ -126,10 +134,13 @@ function testCount() {
126134
graph.run()
127135

128136
const result = tracker.getResult()
129-
137+
130138
// Assert only keys 'one' and 'two' are affected
131-
assertOnlyKeysAffected('count with multiple batches', result.messages, ['one', 'two'])
132-
139+
assertOnlyKeysAffected('count with multiple batches', result.messages, [
140+
'one',
141+
'two',
142+
])
143+
133144
// Assert the final materialized results are correct
134145
assertKeyedResults(
135146
'count with multiple batches',
@@ -138,7 +149,7 @@ function testCount() {
138149
['one', 3], // 2 + 1 = 3
139150
['two', 1], // 1
140151
],
141-
5 // Expected message count
152+
5, // Expected message count
142153
)
143154
})
144155

@@ -182,10 +193,13 @@ function testCount() {
182193
graph.run()
183194

184195
const result = tracker.getResult()
185-
196+
186197
// Assert only keys 'a' and 'c' are affected (NOT 'b')
187-
assertOnlyKeysAffected('count incremental updates', result.messages, ['a', 'c'])
188-
198+
assertOnlyKeysAffected('count incremental updates', result.messages, [
199+
'a',
200+
'c',
201+
])
202+
189203
// Assert the final materialized results are correct
190204
assertKeyedResults(
191205
'count incremental updates',
@@ -194,7 +208,7 @@ function testCount() {
194208
['a', 3], // Count increased from 2 to 3
195209
['c', 0], // Count decreased from 1 to 0
196210
],
197-
4 // Expected message count: remove old 'a', add new 'a', remove old 'c', add new 'c'
211+
4, // Expected message count: remove old 'a', add new 'a', remove old 'c', add new 'c'
198212
)
199213
})
200214
}

0 commit comments

Comments
 (0)