diff --git a/firewood/src/iter.rs b/firewood/src/iter.rs index 0caa03b923..cdf2d9661e 100644 --- a/firewood/src/iter.rs +++ b/firewood/src/iter.rs @@ -1,9 +1,12 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. +mod filtered_key_range; +pub(crate) mod returnable; + use crate::merkle::{Key, Value}; -use crate::v2::api::{KeyType, KeyValuePair}; +pub use self::filtered_key_range::{FilteredKeyRangeExt, FilteredKeyRangeIter}; use firewood_storage::{ BranchNode, Child, FileIoError, NibblesIterator, Node, PathBuf, PathComponent, PathIterItem, SharedNode, TriePathFromUnpackedBytes, TrieReader, @@ -298,12 +301,6 @@ impl<'a, T: TrieReader> MerkleKeyValueIter<'a, T> { iter: MerkleNodeIter::new(merkle, key.as_ref().into()), } } - - /// Returns a new iterator that will emit key-value pairs up to and - /// including `last_key`. - pub fn stop_after_key(self, last_key: Option) -> FilteredKeyRangeIter { - FilteredKeyRangeIter::new(self, last_key) - } } impl Iterator for MerkleKeyValueIter<'_, T> { @@ -568,61 +565,6 @@ fn key_from_nibble_iter>(mut nibbles: Iter) -> Key { data.into_boxed_slice() } -/// An iterator over key-value pairs that stops after a specified final key. -#[derive(Debug)] -#[must_use = "iterators are lazy and do nothing unless consumed"] -pub enum FilteredKeyRangeIter { - Unfiltered { iter: I }, - Filtered { iter: I, last_key: K }, - Exhausted, -} - -impl, T: KeyValuePair, K: KeyType> FilteredKeyRangeIter { - /// Creates a new [`FilteredKeyRangeIter`] that will iterate over `iter` - /// stopping early if `last_key` is `Some` and a key greater than it is - /// encountered. - pub fn new(iter: I, last_key: Option) -> Self { - match last_key { - Some(k) => FilteredKeyRangeIter::Filtered { iter, last_key: k }, - None => FilteredKeyRangeIter::Unfiltered { iter }, - } - } -} - -impl, T: KeyValuePair, K: KeyType> Iterator for FilteredKeyRangeIter { - type Item = Result<(T::Key, T::Value), T::Error>; - - fn next(&mut self) -> Option { - match self { - FilteredKeyRangeIter::Unfiltered { iter } => iter.next().map(T::try_into_tuple), - FilteredKeyRangeIter::Filtered { iter, last_key } => { - match iter.next().map(T::try_into_tuple) { - Some(Ok((key, value))) if key.as_ref() <= last_key.as_ref() => { - Some(Ok((key, value))) - } - Some(Err(e)) => Some(Err(e)), - _ => { - *self = FilteredKeyRangeIter::Exhausted; - None - } - } - } - FilteredKeyRangeIter::Exhausted => None, - } - } - - fn size_hint(&self) -> (usize, Option) { - match self { - FilteredKeyRangeIter::Unfiltered { iter } => iter.size_hint(), - FilteredKeyRangeIter::Filtered { iter, .. } => { - let (_, upper) = iter.size_hint(); - (0, upper) - } - FilteredKeyRangeIter::Exhausted => (0, Some(0)), - } - } -} - #[cfg(test)] #[expect(clippy::indexing_slicing, clippy::unwrap_used)] mod tests { diff --git a/firewood/src/iter/filtered_key_range.rs b/firewood/src/iter/filtered_key_range.rs new file mode 100644 index 0000000000..0765b81c0f --- /dev/null +++ b/firewood/src/iter/filtered_key_range.rs @@ -0,0 +1,69 @@ +// Copyright (C) 2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + +use crate::v2::api::{KeyType, KeyValuePair}; + +pub trait FilteredKeyRangeExt: Iterator + Sized { + /// Returns a new iterator that will emit key-value pairs up to and + /// including `last_key`. + fn stop_after_key(self, last_key: Option) -> FilteredKeyRangeIter { + FilteredKeyRangeIter::new(self, last_key) + } +} + +impl> FilteredKeyRangeExt for I {} + +/// An iterator over key-value pairs that stops after a specified final key. +#[derive(Debug)] +#[must_use = "iterators are lazy and do nothing unless consumed"] +pub enum FilteredKeyRangeIter { + Unfiltered { iter: I }, + Filtered { iter: I, last_key: K }, + Exhausted, +} + +impl, T: KeyValuePair, K: KeyType> FilteredKeyRangeIter { + /// Creates a new [`FilteredKeyRangeIter`] that will iterate over `iter` + /// stopping early if `last_key` is `Some` and a key greater than it is + /// encountered. + pub fn new(iter: I, last_key: Option) -> Self { + match last_key { + Some(k) => FilteredKeyRangeIter::Filtered { iter, last_key: k }, + None => FilteredKeyRangeIter::Unfiltered { iter }, + } + } +} + +impl, T: KeyValuePair, K: KeyType> Iterator for FilteredKeyRangeIter { + type Item = Result<(T::Key, T::Value), T::Error>; + + fn next(&mut self) -> Option { + match self { + FilteredKeyRangeIter::Unfiltered { iter } => iter.next().map(T::try_into_tuple), + FilteredKeyRangeIter::Filtered { iter, last_key } => { + match iter.next().map(T::try_into_tuple) { + Some(Ok((key, value))) if key.as_ref() <= last_key.as_ref() => { + Some(Ok((key, value))) + } + Some(Err(e)) => Some(Err(e)), + _ => { + *self = FilteredKeyRangeIter::Exhausted; + None + } + } + } + FilteredKeyRangeIter::Exhausted => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + FilteredKeyRangeIter::Unfiltered { iter } => iter.size_hint(), + FilteredKeyRangeIter::Filtered { iter, .. } => { + let (_, upper) = iter.size_hint(); + (0, upper) + } + FilteredKeyRangeIter::Exhausted => (0, Some(0)), + } + } +} diff --git a/firewood/src/iter/returnable.rs b/firewood/src/iter/returnable.rs new file mode 100644 index 0000000000..4be31b10c1 --- /dev/null +++ b/firewood/src/iter/returnable.rs @@ -0,0 +1,68 @@ +// Copyright (C) 2025, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + +pub(crate) trait ReturnableIteratorExt: Iterator + Sized { + /// Wraps this iterator in a [`ReturnableIterator`]. + fn returnable(self) -> ReturnableIterator { + ReturnableIterator::new(self) + } +} + +impl ReturnableIteratorExt for I {} + +/// Similar to a peekable iterator. In addition to being able to peek at the +/// next item without consuming it, it also allows "returning" an item back to +/// the iterator to be yielded on the next call to [`next()`]. +/// +/// [`next()`]: Iterator::next +pub(crate) struct ReturnableIterator { + iter: I, + next: Option, +} + +impl ReturnableIterator { + pub(crate) const fn new(iter: I) -> Self { + Self { iter, next: None } + } + + /// Peeks at the next item without consuming it. The next call to + /// [`next()`] will still return this item. + /// + /// [`next()`]: Iterator::next + pub(crate) fn peek(&mut self) -> Option<&mut I::Item> { + if self.next.is_none() { + self.next = self.iter.next(); + } + + self.next.as_mut() + } + + /// Puts an item back to be returned on the next call to [`next()`]. This + /// makes it easy to "un-read" a single item from the iterator without + /// needing to implement complex buffering logic. + /// + /// NOTE: This will replace and return any item that was already in the + /// return slot. + /// + /// [`next()`]: Iterator::next + pub(crate) const fn return_item(&mut self, head: I::Item) -> Option { + self.next.replace(head) + } +} + +impl Iterator for ReturnableIterator { + type Item = I::Item; + + fn next(&mut self) -> Option { + self.next.take().or_else(|| self.iter.next()) + } + + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.iter.size_hint(); + let head_count = usize::from(self.next.is_some()); + ( + lower.saturating_add(head_count), + upper.and_then(|u| u.checked_add(head_count)), + ) + } +} diff --git a/firewood/src/merkle/merge.rs b/firewood/src/merkle/merge.rs index 5af8367efa..a987efd655 100644 --- a/firewood/src/merkle/merge.rs +++ b/firewood/src/merkle/merge.rs @@ -5,7 +5,10 @@ use firewood_storage::{FileIoError, TrieReader}; use crate::{ db::BatchOp, - iter::{FilteredKeyRangeIter, MerkleKeyValueIter}, + iter::{ + FilteredKeyRangeExt, FilteredKeyRangeIter, MerkleKeyValueIter, + returnable::{ReturnableIterator, ReturnableIteratorExt}, + }, merkle::Key, v2::api::{BatchIter, KeyType, KeyValuePair}, }; @@ -39,12 +42,12 @@ where last_key: Option, kvp_iter: impl IntoIterator, ) -> Self { - let base_iter = merkle - .key_value_iter_from_key(first_key.as_ref().map(AsRef::as_ref).unwrap_or_default()) - .stop_after_key(last_key); Self { - trie: ReturnableIterator::new(base_iter), - kvp: ReturnableIterator::new(FilteredKeyRangeIter::new(kvp_iter.into_iter(), None)), + trie: merkle + .key_value_iter_from_key(first_key.as_ref().map(AsRef::as_ref).unwrap_or_default()) + .stop_after_key(last_key) + .returnable(), + kvp: kvp_iter.into_iter().stop_after_key(None).returnable(), } } } @@ -68,14 +71,14 @@ where (Some(Err(err)), kvp) => { if let Some(kvp) = kvp { - self.kvp.set_next(kvp); + self.kvp.return_item(kvp); } Some(Err(err)) } (trie, Some(Err(err))) => { if let Some(trie) = trie { - self.trie.set_next(trie); + self.trie.return_item(trie); } Some(Err(err.into())) @@ -96,7 +99,7 @@ where match <[u8] as Ord>::cmp(&base_key, kvp_key.as_ref()) { std::cmp::Ordering::Less => { // retain the kvp iterator's current item. - self.kvp.set_next(Ok((kvp_key, kvp_value))); + self.kvp.return_item(Ok((kvp_key, kvp_value))); // trie key is less than next kvp key, so it must be deleted. Some(Ok(BatchOp::Delete { @@ -119,7 +122,7 @@ where } std::cmp::Ordering::Greater => { // retain the trie iterator's current item. - self.trie.set_next(Ok((base_key, node_value))); + self.trie.return_item(Ok((base_key, node_value))); // trie key is greater than next kvp key, so we need to insert it. Some(Ok(BatchOp::Put { key: EitherKey::Right(kvp_key), @@ -133,40 +136,6 @@ where } } -/// Similar to a peekable iterator. Instead of peeking at the next item, it allows -/// you to put it back to be returned on the next call to `next()`. -struct ReturnableIterator { - iter: I, - next: Option, -} - -impl ReturnableIterator { - const fn new(iter: I) -> Self { - Self { iter, next: None } - } - - const fn set_next(&mut self, head: I::Item) -> Option { - self.next.replace(head) - } -} - -impl Iterator for ReturnableIterator { - type Item = I::Item; - - fn next(&mut self) -> Option { - self.next.take().or_else(|| self.iter.next()) - } - - fn size_hint(&self) -> (usize, Option) { - let (lower, upper) = self.iter.size_hint(); - let head_count = usize::from(self.next.is_some()); - ( - lower.saturating_add(head_count), - upper.and_then(|u| u.checked_add(head_count)), - ) - } -} - #[derive(Debug)] pub(super) enum EitherKey { Left(L), diff --git a/firewood/src/merkle/mod.rs b/firewood/src/merkle/mod.rs index f1f486e95b..61ecb93260 100644 --- a/firewood/src/merkle/mod.rs +++ b/firewood/src/merkle/mod.rs @@ -8,7 +8,8 @@ mod merge; /// Parallel merkle pub mod parallel; -use crate::iter::{MerkleKeyValueIter, PathIterator}; +use crate::iter::FilteredKeyRangeExt; +use crate::iter::{MerkleKeyValueIter, PathIterator, returnable::ReturnableIteratorExt}; use crate::v2::api::{ self, BatchIter, FrozenProof, FrozenRangeProof, KeyType, KeyValuePair, ValueType, }; @@ -406,7 +407,8 @@ impl Merkle { let mut iter = self .key_value_iter_from_key(start_key.unwrap_or_default()) - .stop_after_key(end_key); + .stop_after_key(end_key) + .returnable(); // don't consume the iterator so we can determine if we hit the // limit or exhausted the iterator later @@ -422,7 +424,7 @@ impl Merkle { let end_proof = if let Some(limit) = limit && limit.get() <= key_values.len() - && iter.next().is_some() + && iter.peek().is_some() { // limit was provided, we hit it, and there is at least one more key // end proof is for the last key provided