Skip to content

Commit 7495613

Browse files
committed
add Put method
1 parent f4fd2d3 commit 7495613

File tree

9 files changed

+212
-112
lines changed

9 files changed

+212
-112
lines changed

bloom.go

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ type BloomFilter struct {
4545
seeds []int64
4646

4747
path string
48+
49+
// candidates cache
50+
cdCache map[string][]uint64
4851
}
4952

5053
type BloomOptions struct {
@@ -63,8 +66,14 @@ type BloomOptions struct {
6366

6467
// growth rate of the bloom filter (valid values are 2 and 4)
6568
GrowthRate int
69+
}
6670

67-
Scalable bool
71+
var DefaultBloomOptions = BloomOptions{
72+
Path: "bloom.db",
73+
Err_rate: 0.001,
74+
Capacity: 100000,
75+
GrowthRate: 2,
76+
Database: nil,
6877
}
6978

7079
// NewBloom creates a new bloom filter.
@@ -74,6 +83,9 @@ type BloomOptions struct {
7483
//
7584
// database is the persistent store to attach to the filter. can be nil.
7685
func NewBloom(opts *BloomOptions) *BloomFilter {
86+
if opts == nil {
87+
opts = &DefaultBloomOptions
88+
}
7789
if opts.Err_rate <= 0 || opts.Err_rate >= 1 {
7890
panic("Error rate must be between 0 and 1")
7991
}
@@ -140,7 +152,7 @@ func NewBloom(opts *BloomOptions) *BloomFilter {
140152
}
141153

142154
// Add adds the key to the bloom filter
143-
func (bf *BloomFilter) Add(key, val []byte) {
155+
func (bf *BloomFilter) Add(key []byte) {
144156
bf.lock.Lock()
145157
defer bf.lock.Unlock()
146158
defer func() {
@@ -165,10 +177,16 @@ func (bf *BloomFilter) Add(key, val []byte) {
165177
}
166178
bf.count++
167179

168-
if bf.hasStore() {
169-
bf.db.Put([]byte(key), val)
180+
}
181+
182+
// Put adds the key to the bloom filter, and also stores it in the persistent store
183+
func (bf *BloomFilter) Put(key, val []byte) error {
184+
if !bf.hasStore() {
185+
fmt.Errorf("BloomFilter does not have a store, use Add() to add keys")
170186
}
171187

188+
bf.Add(key)
189+
return bf.db.Put([]byte(key), val)
172190
}
173191

174192
// Contains checks if the key exists in the bloom filter
@@ -330,6 +348,32 @@ func (bf *BloomFilter) DB() interface{} {
330348
return bf.db.DB()
331349
}
332350

351+
// Clear resets all bits in the bloom filter
352+
func (bf *BloomFilter) Clear() {
353+
bf.lock.Lock()
354+
defer bf.lock.Unlock()
355+
bf.mem = make([]byte, bf.bit_width)
356+
}
357+
358+
type BloomFilterStats struct {
359+
Capacity int
360+
Count int
361+
Size int
362+
M int
363+
K int
364+
}
365+
366+
// Stats returns the stats of the bloom filter
367+
func (bf *BloomFilter) Stats() BloomFilterStats {
368+
return BloomFilterStats{
369+
Capacity: bf.capacity,
370+
Count: bf.count,
371+
Size: bf.bit_width,
372+
M: bf.m,
373+
K: bf.k,
374+
}
375+
}
376+
333377
// divmod returns the quotient and remainder of a/b
334378
func divmod(num, denom int64) (quot, rem int64) {
335379
quot = num / denom

bloom_test.go

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ func TestBloomFilter_Add(t *testing.T) {
4444
}()
4545

4646
t.Run("success", func(t *testing.T) {
47-
key, val := []byte("foo"), []byte("var")
48-
bf.Add(key, val)
47+
key := []byte("foo")
48+
bf.Add(key)
4949
})
5050

5151
t.Run("count should sum up to the number of entries added", func(t *testing.T) {
@@ -65,7 +65,7 @@ func TestBloomFilter_Add(t *testing.T) {
6565
for i := 0; i < count; i++ {
6666
var by [4]byte
6767
binary.LittleEndian.PutUint32(by[:], uint32(i))
68-
bf.Add(by[:], []byte("bar"))
68+
bf.Add(by[:])
6969
}
7070
if bf.Count() != count {
7171
t.Errorf("Expected count to be %d, got %d", bf.Count(), count)
@@ -95,9 +95,9 @@ func TestBloomFilter_Add(t *testing.T) {
9595
for i := 0; i < count; i++ {
9696
var by [4]byte
9797
binary.LittleEndian.PutUint32(by[:], uint32(i))
98-
bf.Add(by[:], []byte("bar"))
98+
bf.Add(by[:])
9999
}
100-
bf.Add([]byte("test"), []byte("bar"))
100+
bf.Add([]byte("test"))
101101
t.Errorf("Expected function to panic when number of entries exceed the capacity")
102102
})
103103

@@ -121,7 +121,7 @@ func TestBloomFilter_Add(t *testing.T) {
121121
os.Remove(opts.Path)
122122
}()
123123

124-
bf.Add([]byte("foo"), []byte("bar"))
124+
bf.Put([]byte("foo"), []byte("bar"))
125125
val := bf.Get([]byte("foo"))
126126
t.Errorf("Expected function to panic when there is no persistent store, got %s", val)
127127
})
@@ -169,10 +169,10 @@ func TestBloomFilter_Merge(t *testing.T) {
169169
})
170170

171171
t.Run("object added to the single filters should be found in the resulting merge", func(t *testing.T) {
172-
key, val := []byte("foo"), []byte("bar")
172+
key := []byte("foo")
173173
bf := NewBloom(opts)
174174
bf2 := NewBloom(opts)
175-
bf2.Add(key, val)
175+
bf2.Add(key)
176176
err := bf.Merge(bf2)
177177
if err != nil {
178178
t.Errorf("Expected no error, got %v", err)
@@ -202,15 +202,15 @@ func TestBloomFilter_AddToDB(t *testing.T) {
202202

203203
t.Run("success", func(t *testing.T) {
204204
key, val := []byte("foo"), []byte("var")
205-
bf.Add(key, val)
205+
bf.Put(key, val)
206206

207207
if val, err := bf.db.Get([]byte(key)); err != nil || val == nil {
208208
t.Errorf("bf.cache[%s] not found; error: %v", key, err)
209209
}
210210
})
211211
t.Run("should not find key that was not added", func(t *testing.T) {
212212
key, val := []byte("foo"), []byte("var")
213-
bf.Add(key, val)
213+
bf.Put(key, val)
214214

215215
if val, err := bf.db.Get([]byte("bar")); err != nil || val != nil {
216216
t.Errorf("expected value to be nil, got %s; error: %v", val, err)
@@ -234,15 +234,15 @@ func TestBloomFilter_AddToBadgerDB(t *testing.T) {
234234

235235
t.Run("success", func(t *testing.T) {
236236
key, val := []byte("foo"), []byte("var")
237-
bf.Add(key, val)
237+
bf.Put(key, val)
238238

239239
if val, err := bf.db.Get([]byte(key)); err != nil || val == nil {
240240
t.Errorf("bf.cache[%s] not found; error: %v", key, err)
241241
}
242242
})
243243
t.Run("should not find key that was not added", func(t *testing.T) {
244244
key, val := []byte("foo"), []byte("var")
245-
bf.Add(key, val)
245+
bf.Put(key, val)
246246

247247
if val, err := bf.db.Get([]byte("bar")); err != nil || val != nil {
248248
t.Errorf("expected value to be nil, got %s; error: %v", val, err)
@@ -266,10 +266,12 @@ func TestBloomFilter(t *testing.T) {
266266
os.Remove(opts.Path)
267267
}()
268268

269-
bf.Add([]byte("foo"), []byte("bar"))
270-
bf.Add([]byte("baz"), []byte("qux"))
269+
bf.Add([]byte("foo"))
270+
bf.Add([]byte("baz"))
271271

272272
t.Run("key may be in cache if found in bloom, def not in cache if not found", func(t *testing.T) {
273+
bf.Put([]byte("foo"), []byte("bar"))
274+
bf.Put([]byte("baz"), []byte("qux"))
273275
table := []struct {
274276
key string
275277
expected bool

cmd/main.go

Lines changed: 44 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package main
33
import (
44
"fmt"
55
"log"
6-
"os"
76
"runtime"
87
"time"
98

@@ -12,9 +11,9 @@ import (
1211
)
1312

1413
func main() {
15-
num := 20_000_000
14+
num := 20_000_00
1615
// div := num / 10
17-
// main2(num)
16+
// main5(num)
1817
// return
1918
opts := &sprout.BloomOptions{
2019
Err_rate: 0.001,
@@ -23,22 +22,21 @@ func main() {
2322
}
2423
bf := sprout.NewBloom(opts)
2524
defer bf.Close()
26-
// return
25+
26+
// reset filter
27+
bf.Clear()
28+
2729
start := time.Now()
28-
bf.Add([]byte("foo"), []byte("bar"))
30+
bf.Add([]byte("foo"))
2931

30-
for i := 0; i < num-1; i++ {
31-
bf.Add([]byte(fmt.Sprintf("%d", i)), []byte("bar"))
32-
// if i%div == 0 {
33-
// time.Sleep(time.Second * 3)
34-
// fmt.Println(i, "added")
35-
// }
32+
for i := 0; i < num-2; i++ {
33+
bf.Add([]byte(fmt.Sprintf("%d", i)))
3634
// fmt.Println(i+1, bf.Contains([]byte(fmt.Sprintf("%d", i+1))))
3735
}
3836
fmt.Println(bf.Contains([]byte("foo")))
3937
fmt.Println(bf.Contains([]byte("bar")))
38+
fmt.Printf("%+v\n", bf.Stats())
4039
fmt.Printf("Added %d elements in %v\n", bf.Capacity(), time.Since(start))
41-
PrintMemUsage()
4240
}
4341

4442
// Using sprout with a persistent storage
@@ -69,62 +67,13 @@ func main2(num int) {
6967
bf := sprout.NewScalableBloom(opts)
7068
start := time.Now()
7169
for i := 0; i < num*10; i++ {
72-
bf.Add([]byte{byte(i)}, []byte("bar"))
70+
bf.Add([]byte{byte(i)})
7371
}
74-
bf.Add([]byte("foo"), []byte("bar"))
72+
bf.Add([]byte("foo"))
7573
fmt.Println(bf.Contains([]byte("foo")))
7674
fmt.Println("Added", num*10, "elements in", time.Since(start))
7775
}
7876

79-
func main4(num int) {
80-
db, err := bolt.Open("store.db", 0600, nil)
81-
if err != nil {
82-
panic(err)
83-
}
84-
85-
err = db.Update(func(tx *bolt.Tx) error {
86-
_, err := tx.CreateBucketIfNotExists([]byte("store.name"))
87-
return err
88-
})
89-
if err != nil {
90-
panic(err)
91-
}
92-
93-
w, err := os.OpenFile("storebolt.db", os.O_RDWR|os.O_CREATE, 0600)
94-
if err != nil {
95-
panic(err)
96-
}
97-
98-
// defer os.Remove("storebolt.db")
99-
100-
start := time.Now()
101-
tx, err := db.Begin(true)
102-
if err != nil {
103-
panic(err)
104-
}
105-
defer tx.Rollback()
106-
size := tx.Size()
107-
108-
b := tx.Bucket([]byte("store.name"))
109-
110-
for i := 0; i < num; i++ {
111-
b.Put([]byte{byte(i)}, []byte("bar"))
112-
}
113-
114-
// write snapshot to pipe
115-
go func() {
116-
defer w.Close()
117-
_, err := tx.WriteTo(w)
118-
if err != nil {
119-
log.Println("Erroring writing to pipe", err)
120-
}
121-
}()
122-
if err != nil {
123-
panic(err)
124-
}
125-
fmt.Println("Added", num, "elements in", time.Since(start), "bytes=", size)
126-
}
127-
12877
func PrintMemUsage() {
12978
var m runtime.MemStats
13079
runtime.ReadMemStats(&m)
@@ -137,3 +86,35 @@ func PrintMemUsage() {
13786
func bToMb(b uint64) uint64 {
13887
return b / 1024 / 1024
13988
}
89+
90+
func main6() {
91+
num := 2_000_000
92+
db, err := bolt.Open("store1.db", 0644, nil)
93+
if err != nil {
94+
panic(err)
95+
}
96+
err = db.Update(func(tx *bolt.Tx) error {
97+
_, err := tx.CreateBucketIfNotExists([]byte("test"))
98+
return err
99+
})
100+
if err != nil {
101+
log.Fatal(err)
102+
}
103+
for i := 0; i < 10; i++ {
104+
fmt.Printf("Starting %d: ", i)
105+
err = db.Update(func(tx *bolt.Tx) error {
106+
b := tx.Bucket([]byte("test"))
107+
for j := 0; j < num/10; j++ {
108+
err := b.Put([]byte(fmt.Sprintf("foo-i%d-j%d", i, j)), []byte("bar"))
109+
if err != nil {
110+
return err
111+
}
112+
}
113+
fmt.Printf("%+v\n", b.Stats().KeyN)
114+
return nil
115+
})
116+
if err != nil {
117+
log.Fatal(err)
118+
}
119+
}
120+
}

cmd/main_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func Benchmark_NewBloom(b *testing.B) {
3737
bf := sprout.NewBloom(opts)
3838
n := 0
3939
for i := 0; i < b.N; i++ {
40-
bf.Add([]byte{byte(n)}, []byte("bar"))
40+
bf.Add([]byte{byte(n)})
4141
n++
4242
}
4343

@@ -59,7 +59,7 @@ func Benchmark_NewBloomFind(b *testing.B) {
5959

6060
n := 0
6161
for i := 0; i < b.N; i++ {
62-
bf.Add([]byte{byte(n)}, []byte("bar"))
62+
bf.Add([]byte{byte(n)})
6363
n++
6464
}
6565
n = 0

0 commit comments

Comments
 (0)