Skip to content

Commit eccab33

Browse files
committed
Respect the Rust tradition, and make breaking changes to the API
But there's nothing wrong with version 2 which will keep being maintained. Serialization and deserialization are now less confusing, and require less copies.
1 parent 5a834b4 commit eccab33

File tree

5 files changed

+382
-128
lines changed

5 files changed

+382
-128
lines changed

Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "bloomfilter"
3-
version = "2.0.0"
3+
version = "3.0.0"
44
authors = ["Frank Denis <github@pureftpd.org>"]
55
description = "Bloom filter implementation"
66
license = "ISC"
@@ -17,10 +17,9 @@ getrandom = { version = "0.2", optional = true, features = ["js"] }
1717
getrandom = { version = "0.2", optional = true }
1818

1919
[dependencies]
20-
bit-vec = "0.8.0"
2120
siphasher = "1.0.1"
2221

2322
[features]
2423
default = ["random"]
2524
random = ["getrandom"]
26-
serde = ["siphasher/serde_std", "bit-vec/serde"]
25+
serde = ["siphasher/serde_std"]

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,25 @@
11
# bloomfilter <img src="img/logo.png" align="right" width="150" />
2+
23
[![Crates.io](https://img.shields.io/crates/v/bloomfilter.svg)](https://crates.io/crates/bloomfilter)
34
[![docs.rs](https://docs.rs/bloomfilter/badge.svg)](https://docs.rs/bloomfilter)
45
[![License: ISC](https://img.shields.io/badge/License-ISC-blue.svg)](https://github.com/jedisct1/rust-bloom-filter/blob/master/LICENSE)
56
<a href="https://codecov.io/gh/jedisct1/rust-bloom-filter">
67
<img src="https://codecov.io/gh/jedisct1/rust-bloom-filter/branch/main/graph/badge.svg">
78
</a>
8-
99

1010
A simple but fast implementation of the Bloom filter in Rust. The Bloom filter is a a space-efficient probabilistic data structure supporting dynamic set membership queries with false positives. It was introduced by Burton H. Bloom in 1970 [(Bloom, 1970)](https://dl.acm.org/doi/10.1145/362686.362692) and have since been increasingly used in computing applications and bioinformatics.
1111

1212
### Documentation
1313

14-
Library documentation with examples is available on [docs.rs](https://docs.rs/bloomfilter).
15-
14+
Library documentation is available on [docs.rs](https://docs.rs/bloomfilter).
1615

1716
### Usage
1817

1918
Add this to your `Cargo.toml`:
2019

2120
```toml
2221
[dependencies]
23-
bloomfilter = "2"
22+
bloomfilter = "3"
2423
```
2524

2625
Here is a simple example for creating a bloom filter with a false positive rate of 0.001 and query for presence of some numbers.
@@ -31,7 +30,7 @@ use bloomfilter::Bloom;
3130
let num_items = 100000;
3231
let fp_rate = 0.001;
3332

34-
let mut bloom = Bloom::new_for_fp_rate(num_items, fp_rate);
33+
let mut bloom = Bloom::new_for_fp_rate(num_items, fp_rate).unwrap();
3534
bloom.set(&10); // insert 10 in the bloom filter
3635
bloom.check(&10); // return true
3736
bloom.check(&20); // return false

src/bitmap.rs

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
use std::convert::{TryFrom, TryInto};
2+
use std::fmt::Debug;
3+
4+
pub const VERSION: u8 = 1;
5+
pub const BITMAP_HEADER_SIZE: usize = 1 + 8 + 4 + 32;
6+
7+
#[derive(Clone, Debug)]
8+
pub(crate) struct BitMap {
9+
header_and_bits: Vec<u8>,
10+
}
11+
12+
impl BitMap {
13+
pub fn new(len_bytes: usize) -> Self {
14+
let mut header_and_bits = vec![0; BITMAP_HEADER_SIZE + len_bytes];
15+
let header = &mut header_and_bits[0..BITMAP_HEADER_SIZE];
16+
Self::set_version(header, VERSION);
17+
Self::set_len_bytes(header, len_bytes as u64);
18+
Self::set_k_num(header, 0);
19+
Self { header_and_bits }
20+
}
21+
22+
fn bits(&self) -> &[u8] {
23+
&self.header_and_bits[BITMAP_HEADER_SIZE..]
24+
}
25+
26+
fn bits_mut(&mut self) -> &mut [u8] {
27+
&mut self.header_and_bits[BITMAP_HEADER_SIZE..]
28+
}
29+
30+
pub fn header(&self) -> &[u8] {
31+
&self.header_and_bits[0..BITMAP_HEADER_SIZE]
32+
}
33+
34+
pub fn header_mut(&mut self) -> &mut [u8] {
35+
&mut self.header_and_bits[0..BITMAP_HEADER_SIZE]
36+
}
37+
38+
fn get_version(header: &[u8]) -> u8 {
39+
header[0]
40+
}
41+
42+
fn set_version(header: &mut [u8], version: u8) {
43+
header[0] = version;
44+
}
45+
46+
fn get_len_bytes(header: &[u8]) -> u64 {
47+
u64::from_le_bytes(header[1..][0..8].try_into().unwrap())
48+
}
49+
50+
fn set_len_bytes(header: &mut [u8], len_bytes: u64) {
51+
header[1..][0..8].copy_from_slice(&len_bytes.to_le_bytes());
52+
}
53+
54+
pub fn get_k_num(header: &[u8]) -> u32 {
55+
u32::from_le_bytes(header[9..][0..4].try_into().unwrap())
56+
}
57+
58+
pub fn set_k_num(header: &mut [u8], k_num: u32) {
59+
header[9..][0..4].copy_from_slice(&k_num.to_le_bytes());
60+
}
61+
62+
pub fn get_seed(header: &[u8]) -> [u8; 32] {
63+
header[13..][0..32].try_into().unwrap()
64+
}
65+
66+
pub fn set_seed(header: &mut [u8], seed: &[u8; 32]) {
67+
header[13..][0..32].copy_from_slice(seed);
68+
}
69+
70+
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self, &'static str> {
71+
if bytes.len() < BITMAP_HEADER_SIZE {
72+
return Err("Invalid size");
73+
}
74+
let header = &bytes[0..BITMAP_HEADER_SIZE];
75+
let bits = &bytes[BITMAP_HEADER_SIZE..];
76+
if Self::get_version(header) != VERSION {
77+
return Err("Version mismatch");
78+
}
79+
if Self::get_k_num(header) == 0 {
80+
return Err("Invalid number of keys");
81+
}
82+
let len_bytes_u64 = Self::get_len_bytes(header);
83+
let len_bytes: usize = len_bytes_u64.try_into().map_err(|_| "Too big")?;
84+
if bits.len() != len_bytes {
85+
return Err("Invalid size");
86+
}
87+
let res = Self {
88+
header_and_bits: bytes,
89+
};
90+
Ok(res)
91+
}
92+
93+
pub fn from_slice(bytes: &[u8]) -> Result<Self, &'static str> {
94+
if bytes.len() < BITMAP_HEADER_SIZE {
95+
return Err("Invalid size");
96+
}
97+
let header = &bytes[0..BITMAP_HEADER_SIZE];
98+
let bits = &bytes[BITMAP_HEADER_SIZE..];
99+
if Self::get_version(header) != VERSION {
100+
return Err("Version mismatch");
101+
}
102+
if Self::get_k_num(header) == 0 {
103+
return Err("Invalid number of keys");
104+
}
105+
let len_bytes_u64 = Self::get_len_bytes(header);
106+
let len_bytes: usize = len_bytes_u64.try_into().map_err(|_| "Too big")?;
107+
if bits.len() != len_bytes {
108+
return Err("Invalid size");
109+
}
110+
let res = Self {
111+
header_and_bits: bytes.to_vec(),
112+
};
113+
Ok(res)
114+
}
115+
116+
pub fn as_slice(&self) -> &[u8] {
117+
&self.header_and_bits
118+
}
119+
120+
pub fn into_bytes(self) -> Vec<u8> {
121+
self.header_and_bits
122+
}
123+
124+
pub fn to_bytes(&self) -> Vec<u8> {
125+
self.header_and_bits.clone()
126+
}
127+
128+
pub fn get(&self, bit_offset: usize) -> bool {
129+
let byte_offset = bit_offset / 8;
130+
let bit_shift = bit_offset % 8;
131+
(self.bits()[byte_offset] & (1 << bit_shift)) != 0
132+
}
133+
134+
pub fn set(&mut self, bit_offset: usize) {
135+
let byte_offset = bit_offset / 8;
136+
let bit_shift = bit_offset % 8;
137+
self.bits_mut()[byte_offset] |= 1 << bit_shift;
138+
}
139+
140+
pub fn clear(&mut self) {
141+
for byte in self.bits_mut().iter_mut() {
142+
*byte = 0;
143+
}
144+
}
145+
146+
pub fn set_all(&mut self) {
147+
for byte in self.bits_mut().iter_mut() {
148+
*byte = !0;
149+
}
150+
}
151+
152+
pub fn any(&self) -> bool {
153+
self.bits().iter().any(|&byte| byte != 0)
154+
}
155+
156+
pub fn len_bits(&self) -> u64 {
157+
u64::try_from(self.bits().len())
158+
.unwrap()
159+
.checked_mul(8)
160+
.unwrap()
161+
}
162+
163+
#[doc(hidden)]
164+
pub fn realloc_large_heap_allocated_objects(mut self, f: fn(Vec<u8>) -> Vec<u8>) -> Self {
165+
let previous_len = self.header_and_bits.len();
166+
self.header_and_bits = f(self.header_and_bits);
167+
assert_eq!(previous_len, self.header_and_bits.len());
168+
assert_eq!(Self::get_version(self.header()), VERSION);
169+
self
170+
}
171+
}

0 commit comments

Comments
 (0)