Skip to content

Commit 4985e69

Browse files
committed
update readme, minor fixes
1 parent dea8eca commit 4985e69

File tree

7 files changed

+97
-45
lines changed

7 files changed

+97
-45
lines changed

badgerdb.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ type BadgerStore struct {
1717
// default temp file path for badgerdb
1818
var badgerTmpFile = "/tmp/badger.db"
1919

20+
// NewBadger instantiates a new BadgerStore.
2021
func NewBadger(opts ...badger.Options) *BadgerStore {
2122
store := &BadgerStore{
2223
dblock: sync.Mutex{},
@@ -86,8 +87,8 @@ func (store *BadgerStore) Put(key, value []byte) error {
8687
return err
8788
}
8889

89-
// IsReady returns true if the store is ready to use.
90-
func (store *BadgerStore) IsReady() bool {
90+
// isReady returns true if the store is ready to use.
91+
func (store *BadgerStore) isReady() bool {
9192
return store.db != nil
9293
}
9394

bloom.go

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ var ErrKeyNotFound = fmt.Errorf("Key not found")
1414

1515
type BloomFilter struct {
1616

17-
// The desired false positive rate. e.g. 0.1 error rate implies 1 in 1000
17+
// The desired false positive rate
1818
err_rate float64
1919

2020
// the number of items intended to be added to the bloom filter (n)
@@ -53,11 +53,53 @@ func NewBloom(err_rate float64, capacity int, database Store) *BloomFilter {
5353
panic("Capacity must be greater than 0")
5454
}
5555

56+
// P = err_rate
57+
58+
// number of hash functions (k)
59+
numHashFn := int(math.Ceil(math.Log2(1.0 / err_rate)))
60+
61+
//ln22 = ln2^2
62+
ln22 := math.Pow(math.Ln2, 2)
63+
64+
// M
65+
bit_width := int(math.Ceil((float64(capacity) * math.Abs(math.Log(err_rate))) /
66+
ln22))
67+
//m
68+
bits_per_slice := bit_width / numHashFn
69+
70+
seeds := make([]int64, numHashFn)
71+
72+
for i := 0; i < len(seeds); i++ {
73+
seeds[i] = int64((i + 1) << 16)
74+
}
75+
76+
return &BloomFilter{
77+
err_rate: err_rate,
78+
capacity: capacity,
79+
bit_width: bit_width,
80+
bit_array: make([]bool, bit_width),
81+
m: bits_per_slice,
82+
seeds: seeds,
83+
db: database,
84+
}
85+
}
86+
87+
func NewBloomWithK(err_rate float64, capacity int, database Store, k int) *BloomFilter {
88+
if err_rate <= 0 || err_rate >= 1 {
89+
panic("Error rate must be between 0 and 1")
90+
}
91+
if capacity < 0 {
92+
panic("Capacity must be greater than 0")
93+
}
94+
5695
// P
5796
err_rate /= 100.0
5897

5998
// number of hash functions (k)
6099
numHashFn := int(math.Ceil(math.Log2(1.0 / err_rate)))
100+
if k > 0 {
101+
numHashFn = int(k)
102+
}
61103

62104
//ln22 = ln2^2
63105
ln22 := math.Pow(math.Ln2, 2)
@@ -135,7 +177,7 @@ func (bf *BloomFilter) Get(key []byte) []byte {
135177
}
136178

137179
func (bf *BloomFilter) hasStore() bool {
138-
return bf.db != nil && bf.db.IsReady()
180+
return bf.db != nil && bf.db.isReady()
139181
}
140182

141183
// every checks if each index in the indices array has a value of 1 in the bit array

boltdb.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ var (
2323
bucketName = "boltstore"
2424
)
2525

26+
// NewBolt instantiates a new BoltStore.
2627
func NewBolt(filePath string, filemode os.FileMode, opts ...bolt.Options) *BoltStore {
2728
store := &BoltStore{
2829
filePath: filePath,
@@ -93,8 +94,8 @@ func (store *BoltStore) Put(key []byte, value []byte) error {
9394
return err
9495
}
9596

96-
// IsReady returns true if the store is ready to use.
97-
func (store *BoltStore) IsReady() bool {
97+
// isReady returns true if the store is ready to use.
98+
func (store *BoltStore) isReady() bool {
9899
return store.db != nil
99100
}
100101

cmd/main.go

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package main
22

33
import (
44
"encoding/binary"
5-
"fmt"
65

76
"github.com/dsa0x/gobloomgo"
87
)
@@ -18,30 +17,17 @@ func main() {
1817
// opts := badger.DefaultOptions("/tmp/bloom.db")
1918
// db := gobloomgo.NewBadger(opts)
2019

21-
bf := gobloomgo.NewBloom(0.1, 59000, nil)
22-
// bf := gobloomgo.NewScalableBloom(0.1, 1000, nil)
20+
// bf := gobloomgo.NewBloom(0.1, 50000, nil)
21+
bf := gobloomgo.NewScalableBloom(0.000001, 10, nil)
2322

24-
// mp := map[bool]int{}
25-
26-
// start := time.Now()
27-
// bf.Add([]byte("key"), []byte("bar"))
28-
ii := 0
29-
for i := 0; i < 50000; i++ {
23+
for i := 0; i < 100000; i++ {
3024
var by [4]byte
3125
binary.LittleEndian.PutUint32(by[:], uint32(i))
3226
bf.Add(by[:], []byte("bar"))
33-
ii++
34-
count := bf.Count()
35-
36-
fmt.Println(by, bf.Find(by[:]), count == ii, "bfCo=", count, "ii=", ii)
37-
3827
}
28+
bf.Add([]byte("foo"), []byte("var"))
3929

40-
// fmt.Println((0 - bf.Count()), bf.Count(), ii)
41-
// for i := 50000; i < 2000; i++ {
42-
// mp[bf.Find([]byte(fmt.Sprintf("foo%d", i)))] += 1
43-
// }
44-
// fmt.Printf("%s %v\n", bf.Get([]byte("key")), mp)
45-
// fmt.Println(bf.Capacity(), time.Since(start))
30+
// fmt.Printf("Count: %d, Capacity: %d, ExpCap: %.f\n", bf.Count(), bf.Capacity(), bf.ExpCapacity())
31+
// fmt.Println(bf.Capacity(), bf.ExpCapacity(), bf.Count(), bf.Prob())
4632

4733
}

readme.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Bloomdb implements a bloom filter in Go, while using boltdb and badgerdb as opti
66

77
Bloomdb also implement a scalable bloom filter described in a paper written by [P. Almeida, C.Baquero, N. Preguiça, D. Hutchison](https://haslab.uminho.pt/cbm/files/dbloom.pdf).
88

9-
A scalable bloom filter removes the need for an apriori filter size as expected by the basic bloom filter, while preserving the desired false positive rate by scaling the filter as needed.
9+
A scalable bloom filter allows you to grow the filter as needed, and removes the need for an apriori filter size as expected by the basic bloom filter, while preserving the desired false positive rate by scaling the filter as needed.
1010

1111
### Installation
1212

@@ -74,7 +74,7 @@ import (
7474
"fmt"
7575

7676
"github.com/dgraph-io/badger/v3"
77-
gobloomgo "github.com/dsa0x/gobloomgo"
77+
"github.com/dsa0x/gobloomgo"
7878
)
7979

8080
func main() {
@@ -86,3 +86,7 @@ func main() {
8686
fmt.Printf("%s\n", bf.Get([]byte("key")))
8787
}
8888
```
89+
90+
#### References
91+
92+
1. [P. Almeida, C.Baquero, N. Preguiça, D. Hutchison](https://haslab.uminho.pt/cbm/files/dbloom.pdf)

scalable_bloom.go

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
package gobloomgo
22

33
import (
4-
"fmt"
54
"math"
65
)
76

8-
// https://haslab.uminho.pt/cbm/files/dbloom.pdf
9-
107
type ScalableBloomFilter struct {
11-
// The desired false positive rate. e.g. 0.1 error rate implies 1 in 1000
8+
// The desired false positive rate
129
err_rate float64
1310

1411
// the number of items intended to be added to the bloom filter
@@ -27,9 +24,9 @@ type ScalableBloomFilter struct {
2724
type GrowthRate uint
2825

2926
var (
30-
// GrowthRateSmall represents a small expected growth rate
27+
// GrowthRateSmall represents a small expected set growth
3128
GrowthRateSmall GrowthRate = 2
32-
// GrowthRateLarge represents a large expected growth rate
29+
// GrowthRateLarge represents a large expected set growth
3330
GrowthRateLarge GrowthRate = 4
3431
)
3532

@@ -56,7 +53,7 @@ func NewScalableBloom(err_rate float64, initial_capacity int, database Store, gr
5653
err_rate: err_rate,
5754
capacity: initial_capacity,
5855
growth_rate: _growth_rate,
59-
ratio: 0.9,
56+
ratio: 0.9, // Source: [1]
6057
m0: initialFilter.m,
6158
filters: []*BloomFilter{initialFilter},
6259
db: database,
@@ -100,10 +97,9 @@ func (sbf *ScalableBloomFilter) Top() *BloomFilter {
10097
// grow increases the capacity of the bloom filter by adding a new filter
10198
func (sbf *ScalableBloomFilter) grow() {
10299
err_rate := sbf.err_rate * math.Pow(sbf.ratio, float64(len(sbf.filters)))
103-
104-
// newCapacity = m0 * growth_rate^i * ln2
105-
newCapacity := sbf.m0 * int(math.Pow(float64(sbf.growth_rate), float64(len(sbf.filters))+1.0)*math.Ln2)
106-
newFilter := NewBloom(err_rate, newCapacity, sbf.db)
100+
i := float64(len(sbf.filters)) - 1.0
101+
newCapacity := float64(sbf.m0) * float64(math.Pow(float64(sbf.growth_rate), i)) * math.Ln2
102+
newFilter := NewBloom(err_rate, int(newCapacity), sbf.db)
107103
sbf.filters = append(sbf.filters, newFilter)
108104
}
109105

@@ -129,13 +125,35 @@ func (sbf *ScalableBloomFilter) filterSize() int {
129125
func (sbf *ScalableBloomFilter) getStore() Store {
130126
return sbf.db
131127
}
128+
129+
// Count returns the number of items added to the bloom filter
132130
func (sbf *ScalableBloomFilter) Count() int {
133-
sum, smm := 0, 0
134-
for i, filter := range sbf.filters {
131+
sum := 0
132+
for _, filter := range sbf.filters {
135133
sum += filter.count
136-
smm += filter.capacity
137-
// fmt.Println(sum, smm, i)
138-
fmt.Sprintln(filter.count, "i=", i)
139134
}
140135
return sum
141136
}
137+
func (sbf *ScalableBloomFilter) bitWidth() int {
138+
sum := 0
139+
for _, filter := range sbf.filters {
140+
sum += filter.bit_width
141+
}
142+
return sum
143+
}
144+
145+
func (sbf *ScalableBloomFilter) prob() float64 {
146+
sum := 1.0
147+
for i, _ := range sbf.filters {
148+
sum *= 1.0 - (sbf.err_rate * math.Pow(sbf.ratio, float64(i)))
149+
}
150+
return 1.0 - sum
151+
}
152+
153+
func (sbf *ScalableBloomFilter) expCapacity() float64 {
154+
sum := 0
155+
for i, _ := range sbf.filters {
156+
sum += int(math.Pow(float64(sbf.growth_rate), float64(i)))
157+
}
158+
return float64(sum*sbf.m0) * math.Ln2
159+
}

storage.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ type Store interface {
1212
Close() error
1313
Get(key []byte) ([]byte, error)
1414
Put(key, value []byte) error
15-
IsReady() bool
15+
isReady() bool
1616
}
1717

1818
type StoreOptions struct {

0 commit comments

Comments
 (0)