Skip to content

Commit c41fe90

Browse files
authored
chore: simplify codec checks code (#102)
Signed-off-by: tison <wander4096@gmail.com>
1 parent d6cc4bb commit c41fe90

File tree

20 files changed

+400
-257
lines changed

20 files changed

+400
-257
lines changed

datasketches/src/bloom/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
//!
3131
//! # Usage
3232
//!
33-
//! ```rust
33+
//! ```
3434
//! use datasketches::bloom::BloomFilter;
3535
//! use datasketches::bloom::BloomFilterBuilder;
3636
//!
@@ -60,7 +60,7 @@
6060
//!
6161
//! Automatically calculates optimal size and hash functions:
6262
//!
63-
//! ```rust
63+
//! ```
6464
//! # use datasketches::bloom::BloomFilterBuilder;
6565
//! let filter = BloomFilterBuilder::with_accuracy(
6666
//! 10_000, // Expected max items
@@ -74,7 +74,7 @@
7474
//!
7575
//! Specify requested bit count and hash functions (rounded up to a multiple of 64 bits):
7676
//!
77-
//! ```rust
77+
//! ```
7878
//! # use datasketches::bloom::BloomFilterBuilder;
7979
//! let filter = BloomFilterBuilder::with_size(
8080
//! 95_851, // Number of bits
@@ -87,7 +87,7 @@
8787
//!
8888
//! Bloom filters support efficient set operations:
8989
//!
90-
//! ```rust
90+
//! ```
9191
//! # use datasketches::bloom::BloomFilterBuilder;
9292
//! let mut filter1 = BloomFilterBuilder::with_accuracy(100, 0.01).build();
9393
//! let mut filter2 = BloomFilterBuilder::with_accuracy(100, 0.01).build();

datasketches/src/bloom/sketch.rs

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ use std::hash::Hasher;
2020

2121
use crate::codec::SketchBytes;
2222
use crate::codec::SketchSlice;
23+
use crate::codec::assert::ensure_preamble_longs_in_range;
24+
use crate::codec::assert::ensure_serial_version_is;
25+
use crate::codec::assert::insufficient_data;
2326
use crate::codec::family::Family;
24-
use crate::codec::utility::ensure_preamble_longs_in_range;
25-
use crate::codec::utility::ensure_serial_version_is;
2627
use crate::error::Error;
2728
use crate::hash::XxHash64;
2829

@@ -399,18 +400,14 @@ impl BloomFilter {
399400
// Read preamble
400401
let preamble_longs = cursor
401402
.read_u8()
402-
.map_err(|_| Error::insufficient_data("preamble_longs"))?;
403+
.map_err(insufficient_data("preamble_longs"))?;
403404
let serial_version = cursor
404405
.read_u8()
405-
.map_err(|_| Error::insufficient_data("serial_version"))?;
406-
let family_id = cursor
407-
.read_u8()
408-
.map_err(|_| Error::insufficient_data("family_id"))?;
406+
.map_err(insufficient_data("serial_version"))?;
407+
let family_id = cursor.read_u8().map_err(insufficient_data("family_id"))?;
409408

410409
// Byte 3: flags byte (directly after family_id)
411-
let flags = cursor
412-
.read_u8()
413-
.map_err(|_| Error::insufficient_data("flags"))?;
410+
let flags = cursor.read_u8().map_err(insufficient_data("flags"))?;
414411

415412
// Validate
416413
Family::BLOOMFILTER.validate_id(family_id)?;
@@ -425,7 +422,7 @@ impl BloomFilter {
425422
// Bytes 4-5: num_hashes (u16)
426423
let num_hashes = cursor
427424
.read_u16_le()
428-
.map_err(|_| Error::insufficient_data("num_hashes"))?;
425+
.map_err(insufficient_data("num_hashes"))?;
429426
if num_hashes == 0 || num_hashes > i16::MAX as u16 {
430427
return Err(Error::deserial(format!(
431428
"invalid num_hashes: expected [1, {}], got {}",
@@ -436,18 +433,14 @@ impl BloomFilter {
436433
// Bytes 6-7: unused (u16)
437434
let _unused = cursor
438435
.read_u16_le()
439-
.map_err(|_| Error::insufficient_data("unused_header"))?;
440-
let seed = cursor
441-
.read_u64_le()
442-
.map_err(|_| Error::insufficient_data("seed"))?;
436+
.map_err(insufficient_data("unused_header"))?;
437+
let seed = cursor.read_u64_le().map_err(insufficient_data("seed"))?;
443438

444439
// Bit array capacity is stored as number of 64-bit words (int32) + unused padding (uint32).
445440
let num_longs = cursor
446441
.read_i32_le()
447-
.map_err(|_| Error::insufficient_data("num_longs"))?;
448-
let _unused = cursor
449-
.read_u32_le()
450-
.map_err(|_| Error::insufficient_data("unused"))?;
442+
.map_err(insufficient_data("num_longs"))?;
443+
let _unused = cursor.read_u32_le().map_err(insufficient_data("unused"))?;
451444

452445
if num_longs <= 0 {
453446
return Err(Error::deserial(format!(
@@ -465,12 +458,12 @@ impl BloomFilter {
465458
} else {
466459
let raw_num_bits_set = cursor
467460
.read_u64_le()
468-
.map_err(|_| Error::insufficient_data("num_bits_set"))?;
461+
.map_err(insufficient_data("num_bits_set"))?;
469462

470463
for word in &mut bit_array {
471464
*word = cursor
472465
.read_u64_le()
473-
.map_err(|_| Error::insufficient_data("bit_array"))?;
466+
.map_err(insufficient_data("bit_array"))?;
474467
}
475468

476469
// Handle "dirty" state: 0xFFFFFFFFFFFFFFFF indicates bits need recounting
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ use std::ops::RangeBounds;
2020

2121
use crate::error::Error;
2222

23+
pub(crate) fn insufficient_data(tag: &'static str) -> impl FnOnce(std::io::Error) -> Error {
24+
move |_| Error::insufficient_data(tag)
25+
}
26+
2327
pub(crate) fn ensure_serial_version_is(expected: u8, actual: u8) -> Result<(), Error> {
2428
if expected == actual {
2529
Ok(())

datasketches/src/codec/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ pub use self::decode::SketchSlice;
2424
pub use self::encode::SketchBytes;
2525

2626
// private to datasketches crate
27+
pub(crate) mod assert;
2728
pub(crate) mod family;
28-
pub(crate) mod utility;

datasketches/src/countmin/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
//!
2323
//! # Usage
2424
//!
25-
//! ```rust
25+
//! ```
2626
//! # use datasketches::countmin::CountMinSketch;
2727
//! let mut sketch = CountMinSketch::<i64>::new(5, 256);
2828
//! sketch.update("apple");
@@ -32,7 +32,7 @@
3232
//!
3333
//! # Configuration Helpers
3434
//!
35-
//! ```rust
35+
//! ```
3636
//! # use datasketches::countmin::CountMinSketch;
3737
//! let buckets = CountMinSketch::<i64>::suggest_num_buckets(0.01);
3838
//! let hashes = CountMinSketch::<i64>::suggest_num_hashes(0.99);

datasketches/src/countmin/sketch.rs

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ use std::hash::Hasher;
2020

2121
use crate::codec::SketchBytes;
2222
use crate::codec::SketchSlice;
23+
use crate::codec::assert::ensure_preamble_longs_in;
24+
use crate::codec::assert::ensure_serial_version_is;
25+
use crate::codec::assert::insufficient_data;
2326
use crate::codec::family::Family;
24-
use crate::codec::utility::ensure_preamble_longs_in;
25-
use crate::codec::utility::ensure_serial_version_is;
2627
use crate::countmin::CountMinValue;
2728
use crate::countmin::UnsignedCountMinValue;
2829
use crate::countmin::serialization::FLAGS_IS_EMPTY;
@@ -61,7 +62,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
6162
///
6263
/// # Examples
6364
///
64-
/// ```rust
65+
/// ```
6566
/// # use datasketches::countmin::CountMinSketch;
6667
/// let sketch = CountMinSketch::<i64>::new(4, 128);
6768
/// assert_eq!(sketch.num_buckets(), 128);
@@ -82,7 +83,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
8283
///
8384
/// # Examples
8485
///
85-
/// ```rust
86+
/// ```
8687
/// # use datasketches::countmin::CountMinSketch;
8788
/// let sketch = CountMinSketch::<i64>::with_seed(4, 64, 42);
8889
/// assert_eq!(sketch.seed(), 42);
@@ -153,7 +154,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
153154
///
154155
/// # Examples
155156
///
156-
/// ```rust
157+
/// ```
157158
/// # use datasketches::countmin::CountMinSketch;
158159
/// let mut sketch = CountMinSketch::<i64>::new(4, 128);
159160
/// sketch.update("apple");
@@ -167,7 +168,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
167168
///
168169
/// # Examples
169170
///
170-
/// ```rust
171+
/// ```
171172
/// # use datasketches::countmin::CountMinSketch;
172173
/// let mut sketch = CountMinSketch::<i64>::new(4, 128);
173174
/// sketch.update_with_weight("banana", 3);
@@ -191,7 +192,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
191192
///
192193
/// # Examples
193194
///
194-
/// ```rust
195+
/// ```
195196
/// # use datasketches::countmin::CountMinSketch;
196197
/// let mut sketch = CountMinSketch::<i64>::new(4, 128);
197198
/// sketch.update_with_weight("pear", 2);
@@ -231,7 +232,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
231232
///
232233
/// # Examples
233234
///
234-
/// ```rust
235+
/// ```
235236
/// # use datasketches::countmin::CountMinSketch;
236237
/// let mut left = CountMinSketch::<i64>::new(4, 128);
237238
/// let mut right = CountMinSketch::<i64>::new(4, 128);
@@ -261,7 +262,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
261262
///
262263
/// # Examples
263264
///
264-
/// ```rust
265+
/// ```
265266
/// # use datasketches::countmin::CountMinSketch;
266267
/// # let mut sketch = CountMinSketch::<i64>::new(4, 128);
267268
/// # sketch.update("apple");
@@ -306,7 +307,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
306307
///
307308
/// # Examples
308309
///
309-
/// ```rust
310+
/// ```
310311
/// # use datasketches::countmin::CountMinSketch;
311312
/// # let mut sketch = CountMinSketch::<i64>::new(4, 64);
312313
/// # sketch.update("apple");
@@ -322,7 +323,7 @@ impl<T: CountMinValue> CountMinSketch<T> {
322323
///
323324
/// # Examples
324325
///
325-
/// ```rust
326+
/// ```
326327
/// # use datasketches::countmin::CountMinSketch;
327328
/// # let mut sketch = CountMinSketch::<i64>::with_seed(4, 64, 7);
328329
/// # sketch.update("apple");
@@ -331,34 +332,40 @@ impl<T: CountMinValue> CountMinSketch<T> {
331332
/// assert!(decoded.estimate("apple") >= 1);
332333
/// ```
333334
pub fn deserialize_with_seed(bytes: &[u8], seed: u64) -> Result<Self, Error> {
334-
fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) -> Error {
335-
move |_| Error::insufficient_data(tag)
336-
}
337-
338335
fn read_value<T: CountMinValue>(
339336
cursor: &mut SketchSlice<'_>,
340337
tag: &'static str,
341338
) -> Result<T, Error> {
342339
let mut bs = [0u8; 8];
343-
cursor.read_exact(&mut bs).map_err(make_error(tag))?;
340+
cursor.read_exact(&mut bs).map_err(insufficient_data(tag))?;
344341
T::try_from_bytes(bs)
345342
}
346343

347344
let mut cursor = SketchSlice::new(bytes);
348-
let preamble_longs = cursor.read_u8().map_err(make_error("preamble_longs"))?;
349-
let serial_version = cursor.read_u8().map_err(make_error("serial_version"))?;
350-
let family_id = cursor.read_u8().map_err(make_error("family_id"))?;
351-
let flags = cursor.read_u8().map_err(make_error("flags"))?;
352-
cursor.read_u32_le().map_err(make_error("<unused>"))?;
345+
let preamble_longs = cursor
346+
.read_u8()
347+
.map_err(insufficient_data("preamble_longs"))?;
348+
let serial_version = cursor
349+
.read_u8()
350+
.map_err(insufficient_data("serial_version"))?;
351+
let family_id = cursor.read_u8().map_err(insufficient_data("family_id"))?;
352+
let flags = cursor.read_u8().map_err(insufficient_data("flags"))?;
353+
cursor
354+
.read_u32_le()
355+
.map_err(insufficient_data("<unused>"))?;
353356

354357
Family::COUNTMIN.validate_id(family_id)?;
355358
ensure_serial_version_is(SERIAL_VERSION, serial_version)?;
356359
ensure_preamble_longs_in(&[PREAMBLE_LONGS_SHORT], preamble_longs)?;
357360

358-
let num_buckets = cursor.read_u32_le().map_err(make_error("num_buckets"))?;
359-
let num_hashes = cursor.read_u8().map_err(make_error("num_hashes"))?;
360-
let seed_hash = cursor.read_u16_le().map_err(make_error("seed_hash"))?;
361-
cursor.read_u8().map_err(make_error("unused8"))?;
361+
let num_buckets = cursor
362+
.read_u32_le()
363+
.map_err(insufficient_data("num_buckets"))?;
364+
let num_hashes = cursor.read_u8().map_err(insufficient_data("num_hashes"))?;
365+
let seed_hash = cursor
366+
.read_u16_le()
367+
.map_err(insufficient_data("seed_hash"))?;
368+
cursor.read_u8().map_err(insufficient_data("unused8"))?;
362369

363370
let expected_seed_hash = compute_seed_hash(seed);
364371
if seed_hash != expected_seed_hash {
@@ -410,7 +417,7 @@ impl<T: UnsignedCountMinValue> CountMinSketch<T> {
410417
///
411418
/// # Examples
412419
///
413-
/// ```rust
420+
/// ```
414421
/// # use datasketches::countmin::CountMinSketch;
415422
/// let mut sketch = CountMinSketch::<u64>::new(4, 128);
416423
/// sketch.update_with_weight("apple", 3);
@@ -431,7 +438,7 @@ impl<T: UnsignedCountMinValue> CountMinSketch<T> {
431438
///
432439
/// # Examples
433440
///
434-
/// ```rust
441+
/// ```
435442
/// # use datasketches::countmin::CountMinSketch;
436443
/// let mut sketch = CountMinSketch::<u64>::new(4, 128);
437444
/// sketch.update_with_weight("apple", 3);

0 commit comments

Comments
 (0)