Skip to content

Commit 192f677

Browse files
authored
feat: hash cache for compaction and consolidate (#53)
* hash cache for compaction and consolidate * use murmurhash * changeset * tweak
1 parent aae5581 commit 192f677

File tree

6 files changed

+55
-9
lines changed

6 files changed

+55
-9
lines changed

.changeset/thick-symbols-peel.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@electric-sql/d2ts': patch
3+
---
4+
5+
better hashing in the consolidate and compaction methods

packages/d2ts/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171
}
7272
},
7373
"dependencies": {
74-
"fractional-indexing": "^3.2.0"
74+
"@types/murmurhash-js": "^1.0.6",
75+
"fractional-indexing": "^3.2.0",
76+
"murmurhash-js": "^1.0.0"
7577
}
7678
}

packages/d2ts/src/multiset.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { DefaultMap, chunkedArrayPush } from './utils.js'
1+
import { DefaultMap, chunkedArrayPush, hash } from './utils.js'
22

33
export type MultiSetArray<T> = [T, number][]
44
export type KeyedData<T> = [key: string, value: T]
@@ -67,6 +67,7 @@ export class MultiSet<T> {
6767
*/
6868
consolidate(): MultiSet<T> {
6969
const consolidated = new DefaultMap<string | number, number>(() => 0)
70+
const values = new Map<string, any>()
7071

7172
let hasString = false
7273
let hasNumber = false
@@ -85,14 +86,17 @@ export class MultiSet<T> {
8586
const requireJson = hasOther || (hasString && hasNumber)
8687

8788
for (const [data, multiplicity] of this.#inner) {
88-
const key = requireJson ? JSON.stringify(data) : (data as string | number)
89+
const key = requireJson ? hash(data) : (data as string | number)
90+
if (requireJson && !values.has(key as string)) {
91+
values.set(key as string, data)
92+
}
8993
consolidated.update(key, (count) => count + multiplicity)
9094
}
9195

9296
const result: MultiSetArray<T> = []
9397
for (const [key, multiplicity] of consolidated.entries()) {
9498
if (multiplicity !== 0) {
95-
const parsedKey = requireJson ? JSON.parse(key as string) : key
99+
const parsedKey = requireJson ? values.get(key as string) : key
96100
result.push([parsedKey as T, multiplicity])
97101
}
98102
}
@@ -253,10 +257,10 @@ export class MultiSet<T> {
253257
*/
254258
distinct(): MultiSet<KeyedData<T>> {
255259
return this.reduce((vals: [T, number][]): [T, number][] => {
256-
const consolidated = new Map<string, [T, number]>()
260+
const consolidated = new Map<string | number, [T, number]>()
257261

258262
for (const [val, multiplicity] of vals) {
259-
const key = JSON.stringify(val)
263+
const key = hash(val)
260264
const current = consolidated.get(key)?.[1] || 0
261265
consolidated.set(key, [val, current + multiplicity])
262266
}

packages/d2ts/src/utils.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import murmurhash from 'murmurhash-js'
2+
13
/**
24
* A map that uses WeakRefs to store objects, and automatically removes them when
35
* they are no longer referenced.
@@ -67,3 +69,20 @@ export function chunkedArrayPush(array: unknown[], other: unknown[]) {
6769
}
6870
}
6971
}
72+
73+
const hashCache = new WeakMap()
74+
75+
/**
76+
* A hash method that caches the hash of a value in a week map
77+
*/
78+
export function hash(data: any): string | number {
79+
if (data === null || data === undefined || typeof data !== 'object') {
80+
return JSON.stringify(data)
81+
}
82+
83+
if (hashCache.has(data)) {
84+
return hashCache.get(data)
85+
}
86+
87+
return murmurhash.murmur3(JSON.stringify(JSON.stringify(data)))
88+
}

packages/d2ts/src/version-index.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { Version, Antichain } from './order.js'
22
import { MultiSet } from './multiset.js'
3-
import { DefaultMap, chunkedArrayPush } from './utils.js'
3+
import { DefaultMap, chunkedArrayPush, hash } from './utils.js'
44

55
type VersionMap<T> = DefaultMap<Version, T[]>
66
type IndexMap<K, V> = DefaultMap<K, VersionMap<[V, number]>>
@@ -209,10 +209,10 @@ export class Index<K, V> implements IndexType<K, V> {
209209

210210
const consolidateValues = (values: [V, number][]): [V, number][] => {
211211
// Use string representation of values as keys for proper deduplication
212-
const consolidated = new Map<string, [V, number]>()
212+
const consolidated = new Map<string | number, [V, number]>()
213213

214214
for (const [value, multiplicity] of values) {
215-
const key = JSON.stringify(value)
215+
const key = hash(value)
216216
const existing = consolidated.get(key)
217217
if (existing) {
218218
consolidated.set(key, [value, existing[1] + multiplicity])

pnpm-lock.yaml

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)