Skip to content

Commit e1923b0

Browse files
authored
feat: add bloom filter (#2507)
Adds a bloom filter implementation to `@libp2p/utils` for use in libp2p components.
1 parent 998fcaf commit e1923b0

File tree

3 files changed

+350
-3
lines changed

3 files changed

+350
-3
lines changed

packages/utils/package.json

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@
5656
"types": "./dist/src/array-equals.d.ts",
5757
"import": "./dist/src/array-equals.js"
5858
},
59+
"./bloom-filter": {
60+
"types": "./dist/src/bloom-filter.d.ts",
61+
"import": "./dist/src/bloom-filter.js"
62+
},
5963
"./close-source": {
6064
"types": "./dist/src/close-source.d.ts",
6165
"import": "./dist/src/close-source.js"
@@ -128,20 +132,24 @@
128132
},
129133
"dependencies": {
130134
"@chainsafe/is-ip": "^2.0.2",
135+
"@libp2p/crypto": "^4.1.0",
131136
"@libp2p/interface": "^1.3.0",
132137
"@libp2p/logger": "^4.0.11",
133138
"@multiformats/multiaddr": "^12.2.1",
134139
"@multiformats/multiaddr-matcher": "^1.2.0",
140+
"@types/murmurhash3js-revisited": "^3.0.3",
135141
"delay": "^6.0.0",
136142
"get-iterator": "^2.0.1",
137143
"is-loopback-addr": "^2.0.2",
138144
"it-pushable": "^3.2.3",
139145
"it-stream-types": "^2.0.1",
146+
"murmurhash3js-revisited": "^3.0.0",
140147
"netmask": "^2.0.2",
141148
"p-defer": "^4.0.1",
142149
"race-event": "^1.2.0",
143150
"race-signal": "^1.0.2",
144-
"uint8arraylist": "^2.4.8"
151+
"uint8arraylist": "^2.4.8",
152+
"uint8arrays": "^5.0.3"
145153
},
146154
"devDependencies": {
147155
"@libp2p/peer-id-factory": "^4.1.0",
@@ -153,8 +161,7 @@
153161
"it-pair": "^2.0.6",
154162
"it-pipe": "^3.0.1",
155163
"sinon": "^17.0.1",
156-
"sinon-ts": "^2.0.0",
157-
"uint8arrays": "^5.0.3"
164+
"sinon-ts": "^2.0.0"
158165
},
159166
"sideEffects": false
160167
}

packages/utils/src/bloom-filter.ts

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE
2+
import { randomBytes } from '@libp2p/crypto'
3+
import mur from 'murmurhash3js-revisited'
4+
import { Uint8ArrayList } from 'uint8arraylist'
5+
import { alloc } from 'uint8arrays/alloc'
6+
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
7+
8+
const LN2_SQUARED = Math.LN2 * Math.LN2
9+
10+
export interface BloomFilterOptions {
11+
seeds?: number[]
12+
hashes?: number
13+
bits?: number
14+
}
15+
16+
export class BloomFilter {
17+
/**
18+
* Create a `BloomFilter` with the smallest `bits` and `hashes` value for the
19+
* specified item count and error rate.
20+
*/
21+
static create (itemcount: number, errorRate: number = 0.005): BloomFilter {
22+
const opts = optimize(itemcount, errorRate)
23+
return new BloomFilter(opts)
24+
}
25+
26+
public readonly seeds: number[]
27+
public readonly bits: number
28+
public buffer: Uint8Array
29+
30+
constructor (options: BloomFilterOptions = {}) {
31+
if (options.seeds != null) {
32+
this.seeds = options.seeds
33+
} else {
34+
this.seeds = generateSeeds(options.hashes ?? 8)
35+
}
36+
37+
this.bits = options.bits ?? 1024
38+
this.buffer = alloc(Math.ceil(this.bits / 8))
39+
}
40+
41+
/**
42+
* Add an item to the filter
43+
*/
44+
add (item: Uint8Array | string): void {
45+
if (typeof item === 'string') {
46+
item = uint8ArrayFromString(item)
47+
}
48+
49+
for (let i = 0; i < this.seeds.length; i++) {
50+
const hash = mur.x86.hash32(item, this.seeds[i])
51+
const bit = hash % this.bits
52+
53+
this.setbit(bit)
54+
}
55+
}
56+
57+
/**
58+
* Test if the filter has an item. If it returns false it definitely does not
59+
* have the item. If it returns true, it probably has the item but there's
60+
* an `errorRate` chance it doesn't.
61+
*/
62+
has (item: Uint8Array | string): boolean {
63+
if (typeof item === 'string') {
64+
item = uint8ArrayFromString(item)
65+
}
66+
67+
for (let i = 0; i < this.seeds.length; i++) {
68+
const hash = mur.x86.hash32(item, this.seeds[i])
69+
const bit = hash % this.bits
70+
71+
const isSet = this.getbit(bit)
72+
73+
if (!isSet) {
74+
return false
75+
}
76+
}
77+
78+
return true
79+
}
80+
81+
/**
82+
* Reset the filter
83+
*/
84+
clear (): void {
85+
this.buffer.fill(0)
86+
}
87+
88+
setbit (bit: number): void {
89+
let pos = 0
90+
let shift = bit
91+
while (shift > 7) {
92+
pos++
93+
shift -= 8
94+
}
95+
96+
let bitfield = this.buffer[pos]
97+
bitfield |= (0x1 << shift)
98+
this.buffer[pos] = bitfield
99+
}
100+
101+
getbit (bit: number): boolean {
102+
let pos = 0
103+
let shift = bit
104+
while (shift > 7) {
105+
pos++
106+
shift -= 8
107+
}
108+
109+
const bitfield = this.buffer[pos]
110+
return (bitfield & (0x1 << shift)) !== 0
111+
}
112+
}
113+
114+
function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } {
115+
const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED)
116+
const hashes = Math.round((bits / itemcount) * Math.LN2)
117+
118+
return { bits, hashes }
119+
}
120+
121+
function generateSeeds (count: number): number[] {
122+
let buf: Uint8ArrayList
123+
let j: number
124+
const seeds = []
125+
126+
for (let i = 0; i < count; i++) {
127+
buf = new Uint8ArrayList(randomBytes(4))
128+
seeds[i] = buf.getUint32(0, true)
129+
130+
// Make sure we don't end up with two identical seeds,
131+
// which is unlikely but possible.
132+
for (j = 0; j < i; j++) {
133+
if (seeds[i] === seeds[j]) {
134+
i--
135+
break
136+
}
137+
}
138+
}
139+
140+
return seeds
141+
}
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
// ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE
2+
import { expect } from 'aegir/chai'
3+
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
4+
import { BloomFilter } from '../src/bloom-filter.js'
5+
6+
function hasBitsSet (buffer: Uint8Array): number {
7+
let isset = 0
8+
for (let i = 0; i < buffer.length; i++) {
9+
isset |= (buffer[i] !== 0 ? 1 : 0)
10+
}
11+
return isset
12+
}
13+
14+
describe('bloom-filter', () => {
15+
it('constructs a filter of the requested size', () => {
16+
const filter = new BloomFilter({ hashes: 4, bits: 32 })
17+
expect(filter.seeds).to.have.lengthOf(4)
18+
expect(filter.bits).to.equal(32)
19+
expect(filter.buffer).to.be.an.instanceOf(Uint8Array)
20+
})
21+
22+
it('zeroes out its storage buffer', () => {
23+
const filter = new BloomFilter({ hashes: 3, bits: 64 })
24+
for (let i = 0; i < filter.buffer.length; i++) {
25+
expect(filter.buffer[i]).to.equal(0)
26+
}
27+
})
28+
29+
it('uses passed-in seeds if provided', () => {
30+
const filter = new BloomFilter({ bits: 256, seeds: [1, 2, 3, 4, 5] })
31+
expect(filter.seeds.length).to.equal(5)
32+
expect(filter.seeds[0]).to.equal(1)
33+
expect(filter.seeds[4]).to.equal(5)
34+
})
35+
36+
describe('createOptimal()', () => {
37+
it('creates a filter with good defaults', () => {
38+
let filter = BloomFilter.create(95)
39+
expect(filter.bits).to.equal(1048)
40+
expect(filter.seeds.length).to.equal(8)
41+
42+
filter = BloomFilter.create(148)
43+
expect(filter.bits).to.equal(1632)
44+
expect(filter.seeds.length).to.equal(8)
45+
46+
filter = BloomFilter.create(10)
47+
expect(filter.bits).to.equal(110)
48+
expect(filter.seeds.length).to.equal(8)
49+
})
50+
51+
it('createOptimal() lets you specify an error rate', () => {
52+
let filter = BloomFilter.create(20000)
53+
expect(filter.bits).to.equal(220555)
54+
const previous = filter.bits
55+
56+
filter = BloomFilter.create(20000, 0.2)
57+
expect(filter.bits).to.be.below(previous)
58+
})
59+
})
60+
61+
describe('setbit() and getbit()', () => {
62+
it('sets the specified bit', () => {
63+
const filter = new BloomFilter({ hashes: 3, bits: 16 })
64+
65+
filter.setbit(0)
66+
let val = filter.getbit(0)
67+
expect(val).to.equal(true)
68+
69+
filter.setbit(1)
70+
val = filter.getbit(1)
71+
expect(val).to.equal(true)
72+
73+
val = filter.getbit(2)
74+
expect(val).to.equal(false)
75+
76+
filter.setbit(10)
77+
val = filter.getbit(10)
78+
expect(val).to.equal(true)
79+
})
80+
81+
it('can set all bits', () => {
82+
let i: number
83+
let value: number
84+
85+
const filter = new BloomFilter({ hashes: 3, bits: 16 })
86+
expect(filter.buffer.length).to.equal(2)
87+
88+
for (i = 0; i < 16; i++) {
89+
filter.setbit(i)
90+
}
91+
92+
for (i = 0; i < 2; i++) {
93+
value = filter.buffer[i]
94+
expect(value).to.equal(255)
95+
}
96+
})
97+
98+
it('slides over into the next buffer slice when setting bits', () => {
99+
let val
100+
const filter = new BloomFilter({ hashes: 3, bits: 64 })
101+
102+
filter.setbit(8)
103+
val = filter.buffer[1]
104+
expect(val).to.equal(1)
105+
106+
filter.setbit(17)
107+
val = filter.buffer[2]
108+
expect(val).to.equal(2)
109+
110+
filter.setbit(34)
111+
val = filter.buffer[4]
112+
expect(val).to.equal(4)
113+
})
114+
})
115+
116+
describe('add()', () => {
117+
it('can store buffers', () => {
118+
const filter = new BloomFilter({ hashes: 4, bits: 128 })
119+
120+
expect(hasBitsSet(filter.buffer)).to.equal(0)
121+
filter.add(uint8ArrayFromString('cat'))
122+
expect(hasBitsSet(filter.buffer)).to.equal(1)
123+
})
124+
125+
it('can store strings', () => {
126+
const filter = new BloomFilter({ hashes: 4, bits: 128 })
127+
filter.add('cat')
128+
129+
expect(hasBitsSet(filter.buffer)).to.equal(1)
130+
})
131+
132+
it('can add a hundred random items', () => {
133+
const alpha = '0123456789abcdefghijklmnopqrstuvwxyz'
134+
function randomWord (length?: number): string {
135+
length = length ?? Math.ceil(Math.random() * 20)
136+
let result = ''
137+
for (let i = 0; i < length; i++) {
138+
result += alpha[Math.floor(Math.random() * alpha.length)]
139+
}
140+
141+
return result
142+
}
143+
144+
const filter = BloomFilter.create(100)
145+
const words: string[] = []
146+
147+
for (let i = 0; i < 100; i++) {
148+
const w = randomWord()
149+
words.push(w)
150+
filter.add(w)
151+
}
152+
153+
for (let i = 0; i < words.length; i++) {
154+
expect(filter.has(words[i])).to.equal(true)
155+
}
156+
})
157+
})
158+
159+
describe('has()', () => {
160+
it('returns true when called on a stored item', () => {
161+
const filter = new BloomFilter({ hashes: 3, bits: 16 })
162+
filter.add('cat')
163+
164+
expect(hasBitsSet(filter.buffer)).to.equal(1)
165+
expect(filter.has('cat')).to.be.true()
166+
})
167+
168+
it('returns false for items not in the set (mostly)', () => {
169+
const filter = new BloomFilter({ hashes: 4, bits: 50 })
170+
filter.add('cat')
171+
expect(filter.has('dog')).to.be.false()
172+
})
173+
174+
it('responds appropriately for arrays of added items', () => {
175+
const filter = new BloomFilter({ hashes: 3, bits: 128 })
176+
filter.add('cat')
177+
filter.add('dog')
178+
filter.add('wallaby')
179+
180+
expect(filter.has('cat')).to.equal(true)
181+
expect(filter.has('dog')).to.equal(true)
182+
expect(filter.has('wallaby')).to.equal(true)
183+
expect(filter.has('orange')).to.equal(false)
184+
})
185+
})
186+
187+
describe('clear()', () => {
188+
it('clears the filter', () => {
189+
const filter = new BloomFilter({ hashes: 3, bits: 128 })
190+
filter.add('cat')
191+
filter.add('dog')
192+
filter.add('wallaby')
193+
expect(hasBitsSet(filter.buffer)).to.equal(1)
194+
195+
filter.clear()
196+
expect(hasBitsSet(filter.buffer)).to.equal(0)
197+
})
198+
})
199+
})

0 commit comments

Comments
 (0)