@@ -20,9 +20,10 @@ use std::hash::Hasher;
2020
2121use crate :: codec:: SketchBytes ;
2222use crate :: codec:: SketchSlice ;
23+ use crate :: codec:: assert:: ensure_preamble_longs_in_range;
24+ use crate :: codec:: assert:: ensure_serial_version_is;
25+ use crate :: codec:: assert:: insufficient_data;
2326use crate :: codec:: family:: Family ;
24- use crate :: codec:: utility:: ensure_preamble_longs_in_range;
25- use crate :: codec:: utility:: ensure_serial_version_is;
2627use crate :: error:: Error ;
2728use crate :: hash:: XxHash64 ;
2829
@@ -33,9 +34,9 @@ const EMPTY_FLAG_MASK: u8 = 1 << 2;
3334/// A Bloom filter for probabilistic set membership testing.
3435///
3536/// Provides fast membership queries with:
36- /// - No false negatives (inserted items always return `true`)
37- /// - Tunable false positive rate
38- /// - Constant space usage
37+ /// * No false negatives (inserted items always return `true`)
38+ /// * Tunable false positive rate
39+ /// * Constant space usage
3940///
4041/// Use [`super::BloomFilterBuilder`] to construct instances.
4142#[ derive( Debug , Clone , PartialEq ) ]
@@ -54,8 +55,8 @@ impl BloomFilter {
5455 /// Tests whether an item is possibly in the set.
5556 ///
5657 /// Returns:
57- /// - `true`: Item was **possibly** inserted (or false positive)
58- /// - `false`: Item was **definitely not** inserted
58+ /// * `true`: Item was **possibly** inserted (or false positive)
59+ /// * `false`: Item was **definitely not** inserted
5960 ///
6061 /// # Examples
6162 ///
@@ -290,8 +291,8 @@ impl BloomFilter {
290291 ///
291292 /// Uses the approximation: `load_factor^k`
292293 /// where:
293- /// - load_factor = fraction of bits set (bits_used / capacity)
294- /// - k = num_hashes
294+ /// * load_factor = fraction of bits set (bits_used / capacity)
295+ /// * k = num_hashes
295296 ///
296297 /// This assumes uniform bit distribution and is more accurate than
297298 /// trying to estimate insertion count from the load factor.
@@ -307,9 +308,9 @@ impl BloomFilter {
307308 /// Checks if two filters are compatible for merging.
308309 ///
309310 /// Filters are compatible if they have the same:
310- /// - Capacity (number of bits)
311- /// - Number of hash functions
312- /// - Seed
311+ /// * Capacity (number of bits)
312+ /// * Number of hash functions
313+ /// * Seed
313314 pub fn is_compatible ( & self , other : & Self ) -> bool {
314315 self . bit_array . len ( ) == other. bit_array . len ( )
315316 && self . num_hashes == other. num_hashes
@@ -379,9 +380,9 @@ impl BloomFilter {
379380 /// # Errors
380381 ///
381382 /// Returns an error if:
382- /// - The data is truncated or corrupted
383- /// - The family ID doesn't match (not a Bloom filter)
384- /// - The serial version is unsupported
383+ /// * The data is truncated or corrupted
384+ /// * The family ID doesn't match (not a Bloom filter)
385+ /// * The serial version is unsupported
385386 ///
386387 /// # Examples
387388 ///
@@ -399,18 +400,14 @@ impl BloomFilter {
399400 // Read preamble
400401 let preamble_longs = cursor
401402 . read_u8 ( )
402- . map_err ( |_| Error :: insufficient_data ( "preamble_longs" ) ) ?;
403+ . map_err ( insufficient_data ( "preamble_longs" ) ) ?;
403404 let serial_version = cursor
404405 . read_u8 ( )
405- . map_err ( |_| Error :: insufficient_data ( "serial_version" ) ) ?;
406- let family_id = cursor
407- . read_u8 ( )
408- . map_err ( |_| Error :: insufficient_data ( "family_id" ) ) ?;
406+ . map_err ( insufficient_data ( "serial_version" ) ) ?;
407+ let family_id = cursor. read_u8 ( ) . map_err ( insufficient_data ( "family_id" ) ) ?;
409408
410409 // Byte 3: flags byte (directly after family_id)
411- let flags = cursor
412- . read_u8 ( )
413- . map_err ( |_| Error :: insufficient_data ( "flags" ) ) ?;
410+ let flags = cursor. read_u8 ( ) . map_err ( insufficient_data ( "flags" ) ) ?;
414411
415412 // Validate
416413 Family :: BLOOMFILTER . validate_id ( family_id) ?;
@@ -425,7 +422,7 @@ impl BloomFilter {
425422 // Bytes 4-5: num_hashes (u16)
426423 let num_hashes = cursor
427424 . read_u16_le ( )
428- . map_err ( |_| Error :: insufficient_data ( "num_hashes" ) ) ?;
425+ . map_err ( insufficient_data ( "num_hashes" ) ) ?;
429426 if num_hashes == 0 || num_hashes > i16:: MAX as u16 {
430427 return Err ( Error :: deserial ( format ! (
431428 "invalid num_hashes: expected [1, {}], got {}" ,
@@ -436,18 +433,14 @@ impl BloomFilter {
436433 // Bytes 6-7: unused (u16)
437434 let _unused = cursor
438435 . read_u16_le ( )
439- . map_err ( |_| Error :: insufficient_data ( "unused_header" ) ) ?;
440- let seed = cursor
441- . read_u64_le ( )
442- . map_err ( |_| Error :: insufficient_data ( "seed" ) ) ?;
436+ . map_err ( insufficient_data ( "unused_header" ) ) ?;
437+ let seed = cursor. read_u64_le ( ) . map_err ( insufficient_data ( "seed" ) ) ?;
443438
444439 // Bit array capacity is stored as number of 64-bit words (int32) + unused padding (uint32).
445440 let num_longs = cursor
446441 . read_i32_le ( )
447- . map_err ( |_| Error :: insufficient_data ( "num_longs" ) ) ?;
448- let _unused = cursor
449- . read_u32_le ( )
450- . map_err ( |_| Error :: insufficient_data ( "unused" ) ) ?;
442+ . map_err ( insufficient_data ( "num_longs" ) ) ?;
443+ let _unused = cursor. read_u32_le ( ) . map_err ( insufficient_data ( "unused" ) ) ?;
451444
452445 if num_longs <= 0 {
453446 return Err ( Error :: deserial ( format ! (
@@ -465,12 +458,12 @@ impl BloomFilter {
465458 } else {
466459 let raw_num_bits_set = cursor
467460 . read_u64_le ( )
468- . map_err ( |_| Error :: insufficient_data ( "num_bits_set" ) ) ?;
461+ . map_err ( insufficient_data ( "num_bits_set" ) ) ?;
469462
470463 for word in & mut bit_array {
471464 * word = cursor
472465 . read_u64_le ( )
473- . map_err ( |_| Error :: insufficient_data ( "bit_array" ) ) ?;
466+ . map_err ( insufficient_data ( "bit_array" ) ) ?;
474467 }
475468
476469 // Handle "dirty" state: 0xFFFFFFFFFFFFFFFF indicates bits need recounting
@@ -501,8 +494,8 @@ impl BloomFilter {
501494 /// Computes the two base hash values using XXHash64.
502495 ///
503496 /// Uses a two-hash approach:
504- /// - h0 = XXHash64(item, seed)
505- /// - h1 = XXHash64(item, h0)
497+ /// * h0 = XXHash64(item, seed)
498+ /// * h1 = XXHash64(item, h0)
506499 fn compute_hash < T : Hash > ( & self , item : & T ) -> ( u64 , u64 ) {
507500 // First hash with the configured seed
508501 let mut hasher = XxHash64 :: with_seed ( self . seed ) ;
0 commit comments