Skip to content

Commit 2b813d1

Browse files
authored
improve hashing (#56)
1 parent 9b9759e commit 2b813d1

File tree

2 files changed

+351
-4
lines changed

2 files changed

+351
-4
lines changed

packages/d2ts/src/utils.ts

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,46 @@ export function chunkedArrayPush(array: unknown[], other: unknown[]) {
7272

7373
const hashCache = new WeakMap()
7474

75+
/**
76+
* Replacer function for JSON.stringify that converts unsupported types to strings
77+
*/
78+
function hashReplacer(_key: string, value: any): any {
79+
if (typeof value === 'bigint') {
80+
return String(value)
81+
} else if (typeof value === 'symbol') {
82+
return String(value)
83+
} else if (typeof value === 'function') {
84+
return String(value)
85+
} else if (value === undefined) {
86+
return 'undefined'
87+
} else if (value instanceof Map) {
88+
return `Map(${JSON.stringify(Array.from(value.entries()), hashReplacer)})`
89+
} else if (value instanceof Set) {
90+
return `Set(${JSON.stringify(Array.from(value.values()), hashReplacer)})`
91+
}
92+
return value
93+
}
94+
7595
/**
7696
* A hash method that caches the hash of a value in a week map
7797
*/
7898
export function hash(data: any): string | number {
79-
if (data === null || data === undefined || typeof data !== 'object') {
80-
return JSON.stringify(data)
99+
if (
100+
data === null ||
101+
data === undefined ||
102+
(typeof data !== 'object' && typeof data !== 'function')
103+
) {
104+
// Can't be cached in the weak map because it's not an object
105+
const serialized = JSON.stringify(data, hashReplacer)
106+
return murmurhash.murmur3(serialized)
81107
}
82108

83109
if (hashCache.has(data)) {
84110
return hashCache.get(data)
85111
}
86112

87-
return murmurhash.murmur3(JSON.stringify(JSON.stringify(data)))
113+
const serialized = JSON.stringify(data, hashReplacer)
114+
const hashValue = murmurhash.murmur3(JSON.stringify(serialized))
115+
hashCache.set(data, hashValue)
116+
return hashValue
88117
}

packages/d2ts/tests/utils.test.ts

Lines changed: 319 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { describe, it, expect } from 'vitest'
2-
import { DefaultMap, WeakRefMap } from '../src/utils.js'
2+
import { DefaultMap, WeakRefMap, hash } from '../src/utils.js'
33

44
describe('DefaultMap', () => {
55
it('should return default value for missing keys', () => {
@@ -76,3 +76,321 @@ describe('WeakRefMap', () => {
7676
// }
7777
// })
7878
})
79+
80+
describe('hash', () => {
81+
describe('primitive types', () => {
82+
it('should hash null', () => {
83+
const result = hash(null)
84+
expect(typeof result).toBe('number')
85+
})
86+
87+
it('should hash undefined', () => {
88+
const result = hash(undefined)
89+
expect(typeof result).toBe('number')
90+
})
91+
92+
it('should hash strings', () => {
93+
const result1 = hash('hello')
94+
const result2 = hash('')
95+
const result3 = hash('test with spaces')
96+
const result4 = hash('special\nchars\t"')
97+
98+
expect(typeof result1).toBe('number')
99+
expect(typeof result2).toBe('number')
100+
expect(typeof result3).toBe('number')
101+
expect(typeof result4).toBe('number')
102+
103+
// Same strings should have same hash
104+
expect(hash('hello')).toBe(result1)
105+
})
106+
107+
it('should hash numbers', () => {
108+
const result1 = hash(42)
109+
const result2 = hash(0)
110+
const result3 = hash(-1)
111+
const result4 = hash(3.14159)
112+
const result5 = hash(Infinity)
113+
const result6 = hash(-Infinity)
114+
const result7 = hash(NaN)
115+
116+
expect(typeof result1).toBe('number')
117+
expect(typeof result2).toBe('number')
118+
expect(typeof result3).toBe('number')
119+
expect(typeof result4).toBe('number')
120+
expect(typeof result5).toBe('number')
121+
expect(typeof result6).toBe('number')
122+
expect(typeof result7).toBe('number')
123+
124+
// Same numbers should have same hash
125+
expect(hash(42)).toBe(result1)
126+
})
127+
128+
it('should hash booleans', () => {
129+
const result1 = hash(true)
130+
const result2 = hash(false)
131+
132+
expect(typeof result1).toBe('number')
133+
expect(typeof result2).toBe('number')
134+
expect(result1).not.toBe(result2)
135+
136+
// Same booleans should have same hash
137+
expect(hash(true)).toBe(result1)
138+
expect(hash(false)).toBe(result2)
139+
})
140+
141+
it('should hash bigint', () => {
142+
const result1 = hash(123n)
143+
const result2 = hash(456n)
144+
const result3 = hash(123n)
145+
146+
expect(typeof result1).toBe('number')
147+
expect(typeof result2).toBe('number')
148+
expect(typeof result3).toBe('number')
149+
expect(result1).toBe(result3) // Same bigint should have same hash
150+
expect(result1).not.toBe(result2) // Different bigints should have different hash
151+
})
152+
153+
it('should hash symbols', () => {
154+
const sym1 = Symbol('test')
155+
const sym2 = Symbol('test')
156+
const sym3 = Symbol('different')
157+
158+
const result1 = hash(sym1)
159+
const result2 = hash(sym2)
160+
const result3 = hash(sym3)
161+
162+
expect(typeof result1).toBe('number')
163+
expect(typeof result2).toBe('number')
164+
expect(typeof result3).toBe('number')
165+
// Note: Different symbol instances with same description have same string representation
166+
expect(result1).toBe(result2)
167+
expect(result1).not.toBe(result3)
168+
})
169+
})
170+
171+
describe('object types', () => {
172+
it('should hash plain objects', () => {
173+
const obj1 = { a: 1, b: 2 }
174+
const obj2 = { b: 2, a: 1 } // Different key order
175+
176+
const hash1 = hash(obj1)
177+
const hash2 = hash(obj2)
178+
179+
expect(typeof hash1).toBe('number')
180+
expect(typeof hash2).toBe('number')
181+
// Note: Different key orders might produce different hashes depending on JSON.stringify behavior
182+
})
183+
184+
it('should hash arrays', () => {
185+
const arr1 = [1, 2, 3]
186+
const arr2 = [1, 2, 3]
187+
const arr3 = [3, 2, 1]
188+
189+
const hash1 = hash(arr1)
190+
const hash2 = hash(arr2)
191+
const hash3 = hash(arr3)
192+
193+
expect(typeof hash1).toBe('number')
194+
expect(hash1).toBe(hash2) // Same content should have same hash
195+
expect(hash1).not.toBe(hash3) // Different content should have different hash
196+
})
197+
198+
it('should hash Date objects', () => {
199+
const date1 = new Date('2023-01-01')
200+
const date2 = new Date('2023-01-01')
201+
const date3 = new Date('2023-01-02')
202+
203+
const hash1 = hash(date1)
204+
const hash2 = hash(date2)
205+
const hash3 = hash(date3)
206+
207+
expect(typeof hash1).toBe('number')
208+
expect(hash1).toBe(hash2) // Same date should have same hash
209+
expect(hash1).not.toBe(hash3) // Different dates should have different hash
210+
})
211+
212+
it('should hash RegExp objects', () => {
213+
const regex1 = /test/g
214+
const regex2 = /test/g
215+
const regex3 = /different/i
216+
217+
const hash1 = hash(regex1)
218+
const hash2 = hash(regex2)
219+
const hash3 = hash(regex3)
220+
221+
expect(typeof hash1).toBe('number')
222+
expect(hash1).toBe(hash2) // Same regex should have same hash
223+
// Note: RegExp objects serialize to empty objects {}, so they all produce the same hash
224+
expect(hash1).toBe(hash3) // All RegExp objects have the same hash
225+
})
226+
227+
it('should hash nested objects', () => {
228+
const nested1 = { a: { b: { c: 1 } } }
229+
const nested2 = { a: { b: { c: 1 } } }
230+
const nested3 = { a: { b: { c: 2 } } }
231+
232+
const hash1 = hash(nested1)
233+
const hash2 = hash(nested2)
234+
const hash3 = hash(nested3)
235+
236+
expect(typeof hash1).toBe('number')
237+
expect(hash1).toBe(hash2)
238+
expect(hash1).not.toBe(hash3)
239+
})
240+
241+
it('should hash functions', () => {
242+
const func1 = function test() { return 1 }
243+
const func2 = function test() { return 1 }
244+
const func3 = function different() { return 2 }
245+
246+
const hash1 = hash(func1)
247+
const hash2 = hash(func2)
248+
const hash3 = hash(func3)
249+
250+
expect(typeof hash1).toBe('number')
251+
expect(typeof hash2).toBe('number')
252+
expect(typeof hash3).toBe('number')
253+
expect(hash1).toBe(hash2) // Same function definition should have same hash
254+
expect(hash1).not.toBe(hash3) // Different function should have different hash
255+
})
256+
257+
it('should hash Set objects', () => {
258+
const set1 = new Set([1, 2, 3])
259+
const set2 = new Set([1, 2, 3])
260+
const set3 = new Set([1, 2, 3, 4])
261+
262+
const hash1 = hash(set1)
263+
const hash2 = hash(set2)
264+
const hash3 = hash(set3)
265+
266+
expect(typeof hash1).toBe('number')
267+
expect(hash1).toBe(hash2) // Same content should have same hash
268+
expect(hash1).not.toBe(hash3) // Different content should have different hash
269+
})
270+
271+
it('should hash Map objects', () => {
272+
const map1 = new Map([['a', 1], ['b', 2]])
273+
const map2 = new Map([['a', 1], ['b', 2]])
274+
const map3 = new Map([['a', 1], ['b', 2], ['c', 3]])
275+
276+
const hash1 = hash(map1)
277+
const hash2 = hash(map2)
278+
const hash3 = hash(map3)
279+
280+
expect(typeof hash1).toBe('number')
281+
expect(hash1).toBe(hash2) // Same content should have same hash
282+
expect(hash1).not.toBe(hash3) // Different content should have different hash
283+
})
284+
285+
it('should hash Maps and Sets with unsupported types', () => {
286+
// Map with BigInt values
287+
const mapWithBigInt1 = new Map([['a', 123n], ['b', 456n]])
288+
const mapWithBigInt2 = new Map([['a', 123n], ['b', 456n]])
289+
const mapWithBigInt3 = new Map([['a', 123n], ['b', 789n]])
290+
291+
const hash1 = hash(mapWithBigInt1)
292+
const hash2 = hash(mapWithBigInt2)
293+
const hash3 = hash(mapWithBigInt3)
294+
295+
expect(typeof hash1).toBe('number')
296+
expect(hash1).toBe(hash2) // Same BigInt content should have same hash
297+
expect(hash1).not.toBe(hash3) // Different BigInt content should have different hash
298+
299+
// Set with Symbol values
300+
const sym1 = Symbol('test')
301+
const sym2 = Symbol('different')
302+
const setWithSymbols1 = new Set([sym1, sym2])
303+
const setWithSymbols2 = new Set([sym1, sym2])
304+
const setWithSymbols3 = new Set([sym1])
305+
306+
const hash4 = hash(setWithSymbols1)
307+
const hash5 = hash(setWithSymbols2)
308+
const hash6 = hash(setWithSymbols3)
309+
310+
expect(typeof hash4).toBe('number')
311+
expect(hash4).toBe(hash5) // Same Symbol content should have same hash
312+
expect(hash4).not.toBe(hash6) // Different Symbol content should have different hash
313+
})
314+
})
315+
316+
describe('caching behavior', () => {
317+
it('should cache hash values for objects', () => {
318+
const obj = { test: 'value' }
319+
320+
const hash1 = hash(obj)
321+
const hash2 = hash(obj)
322+
323+
expect(hash1).toBe(hash2)
324+
expect(typeof hash1).toBe('number')
325+
})
326+
327+
it('should return cached values on subsequent calls', () => {
328+
const obj = { complex: { nested: { data: [1, 2, 3] } } }
329+
330+
// First call should compute and cache
331+
const hash1 = hash(obj)
332+
333+
// Second call should return cached value
334+
const hash2 = hash(obj)
335+
336+
expect(hash1).toBe(hash2)
337+
expect(typeof hash1).toBe('number')
338+
})
339+
340+
it('should not cache primitive values', () => {
341+
// Primitives should not be cached as they use JSON.stringify directly
342+
const hash1 = hash('test')
343+
const hash2 = hash('test')
344+
345+
expect(hash1).toBe(hash2)
346+
expect(typeof hash1).toBe('number')
347+
})
348+
})
349+
350+
describe('edge cases', () => {
351+
it('should handle empty objects and arrays', () => {
352+
expect(typeof hash({})).toBe('number')
353+
expect(typeof hash([])).toBe('number')
354+
expect(hash({})).not.toBe(hash([]))
355+
})
356+
357+
it('should handle objects with null and undefined values', () => {
358+
const obj1 = { a: null, b: undefined }
359+
const obj2 = { a: null, b: undefined }
360+
361+
const hash1 = hash(obj1)
362+
const hash2 = hash(obj2)
363+
364+
expect(hash1).toBe(hash2)
365+
expect(typeof hash1).toBe('number')
366+
})
367+
368+
it('should handle mixed type arrays', () => {
369+
const mixedArray = [1, 'string', true, null, { key: 'value' }]
370+
const sameArray = [1, 'string', true, null, { key: 'value' }]
371+
372+
const hash1 = hash(mixedArray)
373+
const hash2 = hash(sameArray)
374+
375+
expect(hash1).toBe(hash2)
376+
expect(typeof hash1).toBe('number')
377+
})
378+
379+
it('should produce consistent hashes for same content', () => {
380+
const obj = {
381+
string: 'test',
382+
number: 42,
383+
boolean: true,
384+
array: [1, 2, 3],
385+
nested: { inner: 'value' }
386+
}
387+
388+
// Multiple calls should return the same hash
389+
const hashes = Array.from({ length: 5 }, () => hash(obj))
390+
const firstHash = hashes[0]
391+
392+
expect(hashes.every(h => h === firstHash)).toBe(true)
393+
expect(typeof firstHash).toBe('number')
394+
})
395+
})
396+
})

0 commit comments

Comments
 (0)