Skip to content

Commit 3d7a9da

Browse files
authored
feat: add cuckoo filter (#2510)
Add a filter type that lets us efficiently remove items as well as add them. It would be better to use the `bloom-filters` module at this point but it adds 50KB+ to browser bundles for very simple use cases so it's not suitable. We can revisit if Callidon/bloom-filters#70 is ever resolved.
1 parent 3bc94b4 commit 3d7a9da

File tree

15 files changed

+626
-36
lines changed

15 files changed

+626
-36
lines changed

packages/peer-collections/src/filter.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
import { BloomFilter } from '@libp2p/utils/bloom-filter'
1+
import { createScalableCuckooFilter } from '@libp2p/utils/filters'
22
import type { PeerId } from '@libp2p/interface'
3+
import type { Filter } from '@libp2p/utils/filters'
34

45
/**
56
* Uses a Bloom filter to implement a mechansim for deduplicating PeerIds in a
67
* way that uses a fixed amount of memory.
78
*/
89
export class PeerFilter {
9-
private readonly filter: BloomFilter
10+
private readonly filter: Filter
1011

1112
constructor (size: number, errorRate?: number) {
12-
this.filter = BloomFilter.create(size, errorRate)
13+
this.filter = createScalableCuckooFilter(size, errorRate)
1314
}
1415

1516
has (peerId: PeerId): boolean {
@@ -19,6 +20,10 @@ export class PeerFilter {
1920
add (peerId: PeerId): void {
2021
this.filter.add(peerId.toBytes())
2122
}
23+
24+
remove (peerId: PeerId): void {
25+
this.filter.remove?.(peerId.toBytes())
26+
}
2227
}
2328

2429
export function peerFilter (size: number): PeerFilter {

packages/peer-collections/test/filter.spec.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,9 @@ describe('peer-filter', () => {
1212
filter.add(peer)
1313

1414
expect(filter.has(peer)).to.be.true()
15+
16+
filter.remove(peer)
17+
18+
expect(filter.has(peer)).to.be.false()
1519
})
1620
})

packages/peer-collections/tsconfig.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
},
1717
{
1818
"path": "../peer-id-factory"
19+
},
20+
{
21+
"path": "../utils"
1922
}
2023
]
2124
}

packages/utils/package.json

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,14 @@
5656
"types": "./dist/src/array-equals.d.ts",
5757
"import": "./dist/src/array-equals.js"
5858
},
59-
"./bloom-filter": {
60-
"types": "./dist/src/bloom-filter.d.ts",
61-
"import": "./dist/src/bloom-filter.js"
62-
},
6359
"./close-source": {
6460
"types": "./dist/src/close-source.d.ts",
6561
"import": "./dist/src/close-source.js"
6662
},
63+
"./filters": {
64+
"types": "./dist/src/filters/index.d.ts",
65+
"import": "./dist/src/filters/index.js"
66+
},
6767
"./ip-port-to-multiaddr": {
6868
"types": "./dist/src/ip-port-to-multiaddr.d.ts",
6969
"import": "./dist/src/ip-port-to-multiaddr.js"
@@ -137,6 +137,7 @@
137137
"@libp2p/logger": "^4.0.11",
138138
"@multiformats/multiaddr": "^12.2.1",
139139
"@multiformats/multiaddr-matcher": "^1.2.0",
140+
"@sindresorhus/fnv1a": "^3.1.0",
140141
"@types/murmurhash3js-revisited": "^3.0.3",
141142
"delay": "^6.0.0",
142143
"get-iterator": "^2.0.1",

packages/utils/src/bloom-filter.ts renamed to packages/utils/src/filters/bloom-filter.ts

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import mur from 'murmurhash3js-revisited'
44
import { Uint8ArrayList } from 'uint8arraylist'
55
import { alloc } from 'uint8arrays/alloc'
66
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
7+
import type { Filter } from './index.js'
78

89
const LN2_SQUARED = Math.LN2 * Math.LN2
910

@@ -13,16 +14,7 @@ export interface BloomFilterOptions {
1314
bits?: number
1415
}
1516

16-
export class BloomFilter {
17-
/**
18-
* Create a `BloomFilter` with the smallest `bits` and `hashes` value for the
19-
* specified item count and error rate.
20-
*/
21-
static create (itemcount: number, errorRate: number = 0.005): BloomFilter {
22-
const opts = optimize(itemcount, errorRate)
23-
return new BloomFilter(opts)
24-
}
25-
17+
export class BloomFilter implements Filter {
2618
public readonly seeds: number[]
2719
public readonly bits: number
2820
public buffer: Uint8Array
@@ -111,9 +103,18 @@ export class BloomFilter {
111103
}
112104
}
113105

114-
function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } {
115-
const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED)
116-
const hashes = Math.round((bits / itemcount) * Math.LN2)
106+
/**
107+
* Create a `BloomFilter` with the smallest `bits` and `hashes` value for the
108+
* specified item count and error rate.
109+
*/
110+
export function createBloomFilter (itemcount: number, errorRate: number = 0.005): Filter {
111+
const opts = optimize(itemcount, errorRate)
112+
return new BloomFilter(opts)
113+
}
114+
115+
function optimize (itemCount: number, errorRate: number = 0.005): { bits: number, hashes: number } {
116+
const bits = Math.round(-1 * itemCount * Math.log(errorRate) / LN2_SQUARED)
117+
const hashes = Math.round((bits / itemCount) * Math.LN2)
117118

118119
return { bits, hashes }
119120
}

packages/utils/src/filters/bucket.ts

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import { Fingerprint } from './fingerprint.js'
2+
import { getRandomInt } from './utils.js'
3+
4+
export class Bucket {
5+
private readonly contents: Array<Fingerprint | null>
6+
7+
constructor (size: number) {
8+
this.contents = new Array(size).fill(null)
9+
}
10+
11+
has (fingerprint: Fingerprint): boolean {
12+
if (!(fingerprint instanceof Fingerprint)) {
13+
throw new TypeError('Invalid Fingerprint')
14+
}
15+
16+
return this.contents.some((fp) => {
17+
return fingerprint.equals(fp)
18+
})
19+
}
20+
21+
add (fingerprint: Fingerprint): boolean {
22+
if (!(fingerprint instanceof Fingerprint)) {
23+
throw new TypeError('Invalid Fingerprint')
24+
}
25+
26+
for (let i = 0; i < this.contents.length; i++) {
27+
if (this.contents[i] == null) {
28+
this.contents[i] = fingerprint
29+
return true
30+
}
31+
}
32+
33+
return true
34+
}
35+
36+
swap (fingerprint: Fingerprint): Fingerprint | null {
37+
if (!(fingerprint instanceof Fingerprint)) {
38+
throw new TypeError('Invalid Fingerprint')
39+
}
40+
41+
const i = getRandomInt(0, this.contents.length - 1)
42+
const current = this.contents[i]
43+
this.contents[i] = fingerprint
44+
45+
return current
46+
}
47+
48+
remove (fingerprint: Fingerprint): boolean {
49+
if (!(fingerprint instanceof Fingerprint)) {
50+
throw new TypeError('Invalid Fingerprint')
51+
}
52+
53+
const found = this.contents.findIndex((fp) => {
54+
return fingerprint.equals(fp)
55+
})
56+
57+
if (found > -1) {
58+
this.contents[found] = null
59+
return true
60+
} else {
61+
return false
62+
}
63+
}
64+
}
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
2+
import { Bucket } from './bucket.js'
3+
import { Fingerprint, MAX_FINGERPRINT_SIZE } from './fingerprint.js'
4+
import { fnv1a, type Hash } from './hashes.js'
5+
import { getRandomInt } from './utils.js'
6+
import type { Filter } from './index.js'
7+
8+
const maxCuckooCount = 500
9+
10+
export interface CuckooFilterInit {
11+
/**
12+
* How many items the filter is expected to contain
13+
*/
14+
filterSize: number
15+
16+
/**
17+
* How many items to put in each bucket
18+
*/
19+
bucketSize?: number
20+
21+
/**
22+
* How many bytes the fingerprint is expected to be
23+
*/
24+
fingerprintSize?: number
25+
26+
/**
27+
* A non-cryptographic hash implementation
28+
*/
29+
hash?: Hash
30+
31+
/**
32+
* A number used to seed the hash
33+
*/
34+
seed?: number
35+
}
36+
37+
export class CuckooFilter implements Filter {
38+
private readonly bucketSize: number
39+
private readonly filterSize: number
40+
private readonly fingerprintSize: number
41+
private readonly buckets: Bucket[]
42+
public count: number
43+
private readonly hash: Hash
44+
private readonly seed: number
45+
46+
constructor (init: CuckooFilterInit) {
47+
this.filterSize = init.filterSize
48+
this.bucketSize = init.bucketSize ?? 4
49+
this.fingerprintSize = init.fingerprintSize ?? 2
50+
this.count = 0
51+
this.buckets = []
52+
this.hash = init.hash ?? fnv1a
53+
this.seed = init.seed ?? getRandomInt(0, Math.pow(2, 10))
54+
}
55+
56+
add (item: Uint8Array | string): boolean {
57+
if (typeof item === 'string') {
58+
item = uint8ArrayFromString(item)
59+
}
60+
61+
const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize)
62+
const j = this.hash.hash(item, this.seed) % this.filterSize
63+
const k = (j ^ fingerprint.hash()) % this.filterSize
64+
65+
if (this.buckets[j] == null) {
66+
this.buckets[j] = new Bucket(this.bucketSize)
67+
}
68+
69+
if (this.buckets[k] == null) {
70+
this.buckets[k] = new Bucket(this.bucketSize)
71+
}
72+
73+
if (this.buckets[j].add(fingerprint) || this.buckets[k].add(fingerprint)) {
74+
this.count++
75+
return true
76+
}
77+
78+
const rand = [j, k]
79+
let i = rand[getRandomInt(0, rand.length - 1)]
80+
81+
if (this.buckets[i] == null) {
82+
this.buckets[i] = new Bucket(this.bucketSize)
83+
}
84+
85+
for (let n = 0; n < maxCuckooCount; n++) {
86+
const swapped = this.buckets[i].swap(fingerprint)
87+
88+
if (swapped == null) {
89+
continue
90+
}
91+
92+
i = (i ^ swapped.hash()) % this.filterSize
93+
94+
if (this.buckets[i] == null) {
95+
this.buckets[i] = new Bucket(this.bucketSize)
96+
}
97+
98+
if (this.buckets[i].add(swapped)) {
99+
this.count++
100+
101+
return true
102+
} else {
103+
continue
104+
}
105+
}
106+
107+
return false
108+
}
109+
110+
has (item: Uint8Array | string): boolean {
111+
if (typeof item === 'string') {
112+
item = uint8ArrayFromString(item)
113+
}
114+
115+
const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize)
116+
const j = this.hash.hash(item, this.seed) % this.filterSize
117+
const inJ = this.buckets[j]?.has(fingerprint) ?? false
118+
119+
if (inJ) {
120+
return inJ
121+
}
122+
123+
const k = (j ^ fingerprint.hash()) % this.filterSize
124+
125+
return this.buckets[k]?.has(fingerprint) ?? false
126+
}
127+
128+
remove (item: Uint8Array | string): boolean {
129+
if (typeof item === 'string') {
130+
item = uint8ArrayFromString(item)
131+
}
132+
133+
const fingerprint = new Fingerprint(item, this.hash, this.seed, this.fingerprintSize)
134+
const j = this.hash.hash(item, this.seed) % this.filterSize
135+
const inJ = this.buckets[j]?.remove(fingerprint) ?? false
136+
137+
if (inJ) {
138+
this.count--
139+
return inJ
140+
}
141+
142+
const k = (j ^ fingerprint.hash()) % this.filterSize
143+
const inK = this.buckets[k]?.remove(fingerprint) ?? false
144+
145+
if (inK) {
146+
this.count--
147+
}
148+
149+
return inK
150+
}
151+
152+
get reliable (): boolean {
153+
return Math.floor(100 * (this.count / this.filterSize)) <= 95
154+
}
155+
}
156+
157+
// max load constants, defined in the cuckoo paper
158+
const MAX_LOAD = {
159+
1: 0.5,
160+
2: 0.84,
161+
4: 0.95,
162+
8: 0.98
163+
}
164+
165+
function calculateBucketSize (errorRate: number = 0.001): 2 | 4 | 8 {
166+
if (errorRate > 0.002) {
167+
return 2
168+
}
169+
170+
if (errorRate > 0.00001) {
171+
return 4
172+
}
173+
174+
return 8
175+
}
176+
177+
export function optimize (maxItems: number, errorRate: number = 0.001): CuckooFilterInit {
178+
// https://www.eecs.harvard.edu/~michaelm/postscripts/cuckoo-conext2014.pdf
179+
// Section 5.1 Optimal Bucket Size
180+
const bucketSize = calculateBucketSize(errorRate)
181+
const load = MAX_LOAD[bucketSize]
182+
183+
// https://stackoverflow.com/questions/57555236/how-to-size-a-cuckoo-filter/57617208#57617208
184+
const filterSize = Math.round(maxItems / load)
185+
const fingerprintSize = Math.min(Math.ceil(Math.log(filterSize / bucketSize)) + 2, MAX_FINGERPRINT_SIZE)
186+
187+
return {
188+
filterSize,
189+
bucketSize,
190+
fingerprintSize
191+
}
192+
}
193+
194+
export function createCuckooFilter (maxItems: number, errorRate: number = 0.005): Filter {
195+
const opts = optimize(maxItems, errorRate)
196+
return new CuckooFilter(opts)
197+
}

0 commit comments

Comments
 (0)