Skip to content

Commit 5a1753c

Browse files
authored
Merge pull request #409 from filecoin-project/feat/bitfield-refactor
bitfield improvements
2 parents 8b11754 + 6f7fce7 commit 5a1753c

File tree

8 files changed

+309
-119
lines changed

8 files changed

+309
-119
lines changed

ipld/bitfield/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@ description = "Bitfield logic for use in Filecoin actors"
44
version = "0.3.0"
55
license = "MIT OR Apache-2.0"
66
authors = ["ChainSafe Systems <[email protected]>", "Protocol Labs", "Filecoin Core Devs"]
7-
edition = "2018"
7+
edition = "2021"
88
repository = "https://github.com/filecoin-project/ref-fvm"
99

1010
[dependencies]
1111
unsigned-varint = "0.7.1"
1212
serde = { version = "1.0", features = ["derive"] }
1313
serde_bytes = { package = "cs_serde_bytes", version = "0.12" }
1414
fvm_shared = { version = "0.3.1", path = "../../shared" }
15+
thiserror = "1.0.30"
1516

1617
[dev-dependencies]
1718
rand_xorshift = "0.2.0"

ipld/bitfield/src/iter/mod.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ where
133133
I: Iterator<Item = Range<u64>>,
134134
{
135135
/// Creates a new `Ranges` instance.
136+
///
137+
/// WARNING: This is asserting that the underlying iterator obeys the `RangeIterator`
138+
/// constraints. Using this incorrectly could lead to panics, etc.
136139
pub fn new<II>(iter: II) -> Self
137140
where
138141
II: IntoIterator<IntoIter = I, Item = Range<u64>>,
@@ -155,16 +158,21 @@ where
155158
impl<I> RangeIterator for Ranges<I> where I: Iterator<Item = Range<u64>> {}
156159

157160
/// Returns a `RangeIterator` which ranges contain the values from the provided iterator.
158-
/// The values need to be in ascending order — if not, the returned iterator may not satisfy
159-
/// all `RangeIterator` requirements.
160-
pub fn ranges_from_bits(bits: impl IntoIterator<Item = u64>) -> impl RangeIterator {
161+
/// The values need to be in ascending order and may not include u64::MAX. Otherwise, the iterator
162+
/// will panic.
163+
pub(crate) fn ranges_from_bits(bits: impl IntoIterator<Item = u64>) -> impl RangeIterator {
161164
let mut iter = bits.into_iter().peekable();
162165

163166
Ranges::new(iter::from_fn(move || {
164167
let start = iter.next()?;
165-
let mut end = start + 1;
166-
while iter.peek() == Some(&end) {
167-
end += 1;
168+
let mut end = start.checked_add(1).expect("bitfield overflow");
169+
while let Some(&next) = iter.peek() {
170+
if next < end {
171+
panic!("out of order bitfield")
172+
} else if next > end {
173+
break;
174+
}
175+
end = end.checked_add(1).expect("bitfield overflow");
168176
iter.next();
169177
}
170178
Some(start..end)

ipld/bitfield/src/lib.rs

Lines changed: 118 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,35 @@
11
// Copyright 2019-2022 ChainSafe Systems
22
// SPDX-License-Identifier: Apache-2.0, MIT
33

4+
// disable this lint because it can actually cause performance regressions, and usually leads to
5+
// hard to read code.
6+
#![allow(clippy::comparison_chain)]
7+
48
pub mod iter;
59
mod range;
610
mod rleplus;
711
mod unvalidated;
812

913
use std::collections::BTreeSet;
10-
use std::iter::FromIterator;
1114
use std::ops::{
1215
BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Range, Sub, SubAssign,
1316
};
1417

1518
use iter::{ranges_from_bits, RangeIterator};
1619
pub(crate) use range::RangeSize;
20+
pub use rleplus::Error;
21+
use thiserror::Error;
1722
pub use unvalidated::{UnvalidatedBitField, Validate};
1823

19-
type Result<T> = std::result::Result<T, &'static str>;
24+
#[derive(Clone, Error, Debug)]
25+
#[error("bitfields may not include u64::MAX")]
26+
pub struct OutOfRangeError;
27+
28+
impl From<OutOfRangeError> for Error {
29+
fn from(_: OutOfRangeError) -> Self {
30+
Error::RLEOverflow
31+
}
32+
}
2033

2134
/// A bit field with buffered insertion/removal that serializes to/from RLE+. Similar to
2235
/// `HashSet<u64>`, but more memory-efficient when long runs of 1s and 0s are present.
@@ -36,22 +49,79 @@ impl PartialEq for BitField {
3649
}
3750
}
3851

39-
impl FromIterator<u64> for BitField {
40-
fn from_iter<I: IntoIterator<Item = u64>>(iter: I) -> Self {
41-
let mut vec: Vec<_> = iter.into_iter().collect();
42-
if vec.is_empty() {
43-
Self::new()
52+
/// Possibly a valid bitfield, or an out of bounds error. Ideally we'd just use a result, but we
53+
/// can't implement [`FromIterator`] on a [`Result`] due to coherence.
54+
///
55+
/// You probably want to call [`BitField::try_from_bits`] instead of using this directly.
56+
#[doc(hidden)]
57+
pub enum MaybeBitField {
58+
/// A valid bitfield.
59+
Ok(BitField),
60+
/// Out of bounds.
61+
OutOfBounds,
62+
}
63+
64+
impl MaybeBitField {
65+
pub fn unwrap(self) -> BitField {
66+
use MaybeBitField::*;
67+
match self {
68+
Ok(bf) => bf,
69+
OutOfBounds => panic!("bitfield bit out of bounds"),
70+
}
71+
}
72+
73+
pub fn expect(self, message: &str) -> BitField {
74+
use MaybeBitField::*;
75+
match self {
76+
Ok(bf) => bf,
77+
OutOfBounds => panic!("{}", message),
78+
}
79+
}
80+
}
81+
82+
impl TryFrom<MaybeBitField> for BitField {
83+
type Error = OutOfRangeError;
84+
85+
fn try_from(value: MaybeBitField) -> Result<Self, Self::Error> {
86+
match value {
87+
MaybeBitField::Ok(bf) => Ok(bf),
88+
MaybeBitField::OutOfBounds => Err(OutOfRangeError),
89+
}
90+
}
91+
}
92+
93+
impl FromIterator<bool> for MaybeBitField {
94+
fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> MaybeBitField {
95+
let mut iter = iter.into_iter().fuse();
96+
let bits = (0u64..u64::MAX)
97+
.zip(&mut iter)
98+
.filter(|&(_, b)| b)
99+
.map(|(i, _)| i);
100+
let bf = BitField::from_ranges(ranges_from_bits(bits));
101+
102+
// Now, if we have remaining bits, raise an error. Otherwise, we're good.
103+
if iter.next().is_some() {
104+
MaybeBitField::OutOfBounds
44105
} else {
45-
vec.sort_unstable();
46-
Self::from_ranges(ranges_from_bits(vec))
106+
MaybeBitField::Ok(bf)
47107
}
48108
}
49109
}
50110

51-
impl FromIterator<bool> for BitField {
52-
fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
53-
let bits = (0u64..).zip(iter).filter(|&(_, b)| b).map(|(i, _)| i);
54-
Self::from_ranges(ranges_from_bits(bits))
111+
impl FromIterator<u64> for MaybeBitField {
112+
fn from_iter<T: IntoIterator<Item = u64>>(iter: T) -> MaybeBitField {
113+
let mut vec: Vec<_> = iter.into_iter().collect();
114+
if vec.is_empty() {
115+
MaybeBitField::Ok(BitField::new())
116+
} else {
117+
vec.sort_unstable();
118+
vec.dedup();
119+
if vec.last() == Some(&u64::MAX) {
120+
MaybeBitField::OutOfBounds
121+
} else {
122+
MaybeBitField::Ok(BitField::from_ranges(ranges_from_bits(vec)))
123+
}
124+
}
55125
}
56126
}
57127

@@ -69,10 +139,33 @@ impl BitField {
69139
}
70140
}
71141

72-
/// Adds the bit at a given index to the bit field.
142+
/// Tries to create a new bitfield from a bit iterator. It fails if the resulting bitfield would
143+
/// contain values not in the range `0..u64::MAX` (non-inclusive).
144+
pub fn try_from_bits<I>(iter: I) -> Result<Self, OutOfRangeError>
145+
where
146+
I: IntoIterator,
147+
MaybeBitField: FromIterator<I::Item>,
148+
{
149+
iter.into_iter().collect::<MaybeBitField>().try_into()
150+
}
151+
152+
/// Adds the bit at a given index to the bit field, panicing if it's out of range.
153+
///
154+
/// # Panics
155+
///
156+
/// Panics if `bit` is `u64::MAX`.
73157
pub fn set(&mut self, bit: u64) {
158+
self.try_set(bit).unwrap()
159+
}
160+
161+
/// Adds the bit at a given index to the bit field, returning an error if it's out of range.
162+
pub fn try_set(&mut self, bit: u64) -> Result<(), OutOfRangeError> {
163+
if bit == u64::MAX {
164+
return Err(OutOfRangeError);
165+
}
74166
self.unset.remove(&bit);
75167
self.set.insert(bit);
168+
Ok(())
76169
}
77170

78171
/// Removes the bit at a given index from the bit field.
@@ -114,8 +207,7 @@ impl BitField {
114207
self.set.iter().min().copied(),
115208
self.ranges
116209
.iter()
117-
.filter_map(|r| r.clone().find(|i| !self.unset.contains(i)))
118-
.next(),
210+
.find_map(|r| r.clone().find(|i| !self.unset.contains(i))),
119211
) {
120212
(None, None) => None,
121213
(Some(v), None) | (None, Some(v)) => Some(v),
@@ -165,12 +257,12 @@ impl BitField {
165257
}
166258

167259
/// Returns an iterator over the indices of the bit field's set bits if the number
168-
/// of set bits in the bit field does not exceed `max`. Returns an error otherwise.
169-
pub fn bounded_iter(&self, max: u64) -> Result<impl Iterator<Item = u64> + '_> {
260+
/// of set bits in the bit field does not exceed `max`. Returns `None` otherwise.
261+
pub fn bounded_iter(&self, max: u64) -> Option<impl Iterator<Item = u64> + '_> {
170262
if self.len() <= max {
171-
Ok(self.iter())
263+
Some(self.iter())
172264
} else {
173-
Err("Bits set exceeds max in retrieval")
265+
None
174266
}
175267
}
176268

@@ -197,15 +289,15 @@ impl BitField {
197289
}
198290

199291
/// Returns a slice of the bit field with the start index of set bits
200-
/// and number of bits to include in the slice. Returns an error if the
201-
/// bit field contains fewer than `start + len` set bits.
202-
pub fn slice(&self, start: u64, len: u64) -> Result<Self> {
292+
/// and number of bits to include in the slice. Returns `None` if the bit
293+
/// field contains fewer than `start + len` set bits.
294+
pub fn slice(&self, start: u64, len: u64) -> Option<Self> {
203295
let slice = BitField::from_ranges(self.ranges().skip_bits(start).take_bits(len));
204296

205297
if slice.len() == len {
206-
Ok(slice)
298+
Some(slice)
207299
} else {
208-
Err("Not enough bits")
300+
None
209301
}
210302
}
211303

@@ -330,7 +422,7 @@ macro_rules! bitfield {
330422
std::iter::once($head != 0_u32).chain(bitfield!(@iter $($tail),*))
331423
};
332424
($($val:literal),* $(,)?) => {
333-
bitfield!(@iter $($val),*).collect::<$crate::BitField>()
425+
bitfield!(@iter $($val),*).collect::<$crate::MaybeBitField>().unwrap()
334426
};
335427
}
336428

ipld/bitfield/src/rleplus/error.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
use thiserror::Error;
2+
3+
#[derive(PartialEq, Eq, Clone, Debug, Error)]
4+
pub enum Error {
5+
#[error("bitfield not minimally encoded")]
6+
NotMinimal,
7+
#[error("bitfield specifies an unsupported version")]
8+
UnsupportedVersion,
9+
#[error("bitfield overflows 2^63-1")]
10+
RLEOverflow,
11+
#[error("invalid varint")]
12+
InvalidVarint,
13+
}

0 commit comments

Comments
 (0)