Skip to content

Commit 3471dcb

Browse files
author
Roderick Bovee
committed
Use "inline" get_range for speed-up and pin v0.1.1
1 parent 8aff551 commit 3471dcb

File tree

2 files changed

+49
-3
lines changed

2 files changed

+49
-3
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[package]
22
name = "bfield"
3-
version = "0.1.0"
3+
version = "0.1.1"
44
authors = ["Roderick Bovee <[email protected]>"]
55

66
[dependencies]
77
bincode = "0.9.2"
8-
mmap-bitvec = { git = "ssh://[email protected]/onecodex/mmap-bitvec.git", rev = "171bf99df62ea3a1c57279d3ef2b99efdc490aba" }
8+
mmap-bitvec = { git="ssh://[email protected]/onecodex/mmap-bitvec.git", tag="v0.1.1" }
99
murmurhash3 = "0.0.5"
1010
serde = "1.0.15"
1111
serde_derive = "1.0.15"

src/bfield_member.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ use std::cmp::Ordering;
22
#[cfg(feature = "legacy")]
33
use std::fs::File;
44
use std::io;
5+
use std::ops::Range;
56
use std::path::Path;
67

78
use bincode::{serialize, deserialize, Infinite};
89
use mmap_bitvec::{BitVector, MmapBitVec, BitVecSlice};
10+
use mmap_bitvec::bitvec::BIT_VEC_SLICE_SIZE;
911
use murmurhash3::murmurhash3_x64_128;
1012
use serde::Serialize;
1113
use serde::de::DeserializeOwned;
@@ -171,7 +173,7 @@ impl<T: Clone + DeserializeOwned + Serialize> BFieldMember<T> {
171173
let mut merged_marker = BitVecSlice::max_value();
172174
for marker_ix in 0usize..self.params.n_hashes as usize {
173175
let pos = marker_pos(hash, marker_ix, self.bitvec.size(), marker_width);
174-
let marker = self.bitvec.get_range(pos..pos + marker_width);
176+
let marker = get_range(&self.bitvec, pos..pos + marker_width);
175177
merged_marker &= marker;
176178
if merged_marker.count_ones().cmp(&k) == Ordering::Less {
177179
return 0;
@@ -224,6 +226,50 @@ fn marker_pos(hash: (u64, u64), n: usize, total_size: usize, _: usize) -> usize
224226
i64::abs(mashed_hash % (total_size as i64 - 64)) as usize
225227
}
226228

229+
/// This is totally messed up, but we get a speed bump by doing this
230+
/// instead of using the _exact same_ function on the struct.
231+
#[cfg(not(feature = "legacy"))]
232+
fn get_range(bitvec: &MmapBitVec, r: Range<usize>) -> BitVecSlice {
233+
if r.end - r.start > BIT_VEC_SLICE_SIZE as usize {
234+
panic!(format!("Range too large (>{})", BIT_VEC_SLICE_SIZE))
235+
} else if r.end > bitvec.size {
236+
panic!("Range ends outside of BitVec")
237+
}
238+
let byte_idx_st = (r.start >> 3) as usize;
239+
let byte_idx_en = ((r.end - 1) >> 3) as usize;
240+
let new_size: u8 = (r.end - r.start) as u8;
241+
242+
let mut v;
243+
let ptr: *const u8 = bitvec.mmap.as_ptr();
244+
245+
// read the last byte first
246+
unsafe {
247+
v = BitVecSlice::from(*ptr.offset(byte_idx_en as isize));
248+
}
249+
// align the end of the data with the end of the u64/u128
250+
v >>= 7 - ((r.end - 1) & 7);
251+
252+
let bit_offset = new_size + (r.start & 7) as u8;
253+
// copy over byte by byte
254+
// it would be faster to coerce into a u8 and a u64 (in case it spans 9 bytes) and then
255+
// copy over, but this doesn't work if the start is <8 bytes from the end, so we're doing
256+
// this for now and we can add a special case for that later
257+
for (new_idx, old_idx) in (byte_idx_st..byte_idx_en).enumerate() {
258+
unsafe {
259+
v |= BitVecSlice::from(*ptr.offset(old_idx as isize)) <<
260+
(bit_offset - 8u8 * (new_idx as u8 + 1));
261+
}
262+
}
263+
264+
// mask out the high bits in case we copied extra
265+
v & BitVecSlice::max_value() >> (BIT_VEC_SLICE_SIZE - new_size)
266+
}
267+
268+
#[cfg(feature = "legacy")]
269+
fn get_range(bitvec: &MmapBitVec, r: Range<usize>) -> BitVecSlice {
270+
bitvec.get_range(r)
271+
}
272+
227273
#[test]
228274
fn test_bfield() {
229275
let mut bfield: BFieldMember<usize> = BFieldMember::in_memory(1024, 3, 64, 4).unwrap();

0 commit comments

Comments
 (0)