From 0979a8de222264e83273ca1dc97b4abbd5335081 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 4 Sep 2025 20:05:58 +0800 Subject: [PATCH 1/5] feat: cacheless hamt iteration --- Cargo.lock | 1 + fvm/src/state_tree.rs | 11 ++ ipld/hamt/Cargo.toml | 1 + ipld/hamt/src/hamt.rs | 32 +++++ ipld/hamt/src/hash_algorithm.rs | 4 +- ipld/hamt/src/lib.rs | 2 +- ipld/hamt/src/node.rs | 64 ++++++++++ ipld/hamt/src/pointer.rs | 17 +++ ipld/hamt/tests/hamt_tests.rs | 215 ++++++++++++++++++++++++++------ 9 files changed, 305 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7783e3466a..f9cb43d9b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2527,6 +2527,7 @@ dependencies = [ "fvm_ipld_encoding 0.5.3", "hex", "ipld-core", + "itertools 0.14.0", "multihash-codetable", "once_cell", "quickcheck", diff --git a/fvm/src/state_tree.rs b/fvm/src/state_tree.rs index 5e7b59500c..9cd9a129ab 100644 --- a/fvm/src/state_tree.rs +++ b/fvm/src/state_tree.rs @@ -374,4 +374,15 @@ where })?; Ok(()) } + + pub fn for_each_cacheless(&self, mut f: F) -> anyhow::Result<()> + where + F: FnMut(Address, &ActorState) -> anyhow::Result<()>, + { + self.hamt.for_each_cacheless(|k, v| { + let addr = Address::from_bytes(&k.0)?; + f(addr, v) + })?; + Ok(()) + } } diff --git a/ipld/hamt/Cargo.toml b/ipld/hamt/Cargo.toml index a8a32f8b50..43e5b662f5 100644 --- a/ipld/hamt/Cargo.toml +++ b/ipld/hamt/Cargo.toml @@ -32,6 +32,7 @@ unsigned-varint = { workspace = true } quickcheck = { workspace = true } quickcheck_macros = { workspace = true } rand = { workspace = true } +itertools = { workspace = true } [[bench]] name = "hamt_beckmark" diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index 73292c08ce..3b7dabdfe5 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -382,6 +382,38 @@ where Ok(()) } + /// Iterates over each KV in the Hamt and runs a function on the values. This is a + /// non-caching version of [`Self::for_each`]. It can potentially be more efficient, especially memory-wise, + /// for large HAMTs or when the iteration occurs only once. + /// + /// # Examples + /// + /// ``` + /// use fvm_ipld_hamt::Hamt; + /// + /// let store = fvm_ipld_blockstore::MemoryBlockstore::default(); + /// + /// let mut map: Hamt<_, _, usize> = Hamt::new(store); + /// map.set(1, 1).unwrap(); + /// map.set(4, 2).unwrap(); + /// + /// let mut total = 0; + /// map.for_each_cacheless(|_, v: &u64| { + /// total += v; + /// Ok(()) + /// }).unwrap(); + /// assert_eq!(total, 3); + /// ``` + pub fn for_each_cacheless(&self, mut f: F) -> Result<(), Error> + where + K: Clone, + V: DeserializeOwned + Clone, + F: FnMut(&K, &V) -> anyhow::Result<()>, + { + self.root + .for_each_cacheless(&self.store, &self.conf, &mut f) + } + /// Iterates over each KV in the Hamt and runs a function on the values. If starting key is /// provided, iteration will start from that key. If max is provided, iteration will stop after /// max number of items have been traversed. The number of items that were traversed is diff --git a/ipld/hamt/src/hash_algorithm.rs b/ipld/hamt/src/hash_algorithm.rs index 83a26665d0..7bdf8d3f2a 100644 --- a/ipld/hamt/src/hash_algorithm.rs +++ b/ipld/hamt/src/hash_algorithm.rs @@ -72,9 +72,9 @@ pub enum Identity {} #[cfg(feature = "identity")] impl HashAlgorithm for Identity { - fn hash(key: &X) -> HashedKey + fn hash(key: &X) -> HashedKey where - X: Hash, + X: Hash + ?Sized, { let mut ident_hasher = IdentityHasher::default(); key.hash(&mut ident_hasher); diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index b8425d551f..17d47c888d 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -75,7 +75,7 @@ impl Default for Config { type HashedKey = [u8; 32]; -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] struct KeyValuePair(K, V); impl KeyValuePair { diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 6c7a34090f..3f8a0755fd 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -29,6 +29,20 @@ pub(crate) struct Node { hash: PhantomData, } +impl Clone for Node +where + K: Clone, + V: Clone, +{ + fn clone(&self) -> Self { + Self { + bitfield: self.bitfield, + pointers: self.pointers.clone(), + hash: Default::default(), + } + } +} + impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { (self.bitfield == other.bitfield) && (self.pointers == other.pointers) @@ -206,6 +220,56 @@ where self.pointers.is_empty() } + /// Non-caching iteration over the values in the node. + pub(super) fn for_each_cacheless( + &self, + bs: &S, + conf: &Config, + f: &mut F, + ) -> Result<(), Error> + where + F: FnMut(&K, &V) -> anyhow::Result<()>, + S: Blockstore, + K: Clone, + V: Clone, + { + enum StackItem<'a, T> { + Borrowed(&'a T), + Owned(T), + } + + impl AsRef for StackItem<'_, T> { + fn as_ref(&self) -> &T { + match self { + Self::Borrowed(i) => i, + Self::Owned(i) => i, + } + } + } + + let mut stack = vec![StackItem::Borrowed(&self.pointers)]; + loop { + let Some(stack_last_item) = stack.pop() else { + // Terminate here since both `current` and `stack` reach the end + return Ok(()); + }; + for pointer in stack_last_item.as_ref() { + match pointer { + Pointer::Link { cid, cache: _ } => { + let node = Node::load(conf, bs, cid, stack.len() as u32)?; + stack.push(StackItem::Owned(node.pointers)) + } + Pointer::Dirty(node) => stack.push(StackItem::Owned(node.pointers.clone())), + Pointer::Values(kvs) => { + for kv in kvs.iter() { + f(kv.key(), kv.value())?; + } + } + } + } + } + } + /// Search for a key. fn search( &self, diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index b33e6c5834..fd6e4d6d01 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -46,6 +46,23 @@ pub(crate) enum Pointer { Dirty(Box>), } +impl Clone for Pointer +where + K: Clone, + V: Clone, +{ + fn clone(&self) -> Self { + match self { + Self::Values(v) => Self::Values(v.clone()), + Self::Link { cid, cache: _ } => Self::Link { + cid: *cid, + cache: Default::default(), + }, + Self::Dirty(n) => Self::Dirty(n.clone()), + } + } +} + impl PartialEq for Pointer { fn eq(&self, other: &Self) -> bool { match (self, other) { diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index a761502d4b..950fb4e0c2 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0, MIT use std::collections::{HashMap, HashSet}; -use std::fmt::Display; +use std::fmt::{Debug, Display}; use cid::Cid; use fvm_ipld_blockstore::tracking::{BSStats, TrackingBlockstore}; @@ -14,6 +14,7 @@ use fvm_ipld_encoding::strict_bytes::ByteBuf; #[cfg(feature = "identity")] use fvm_ipld_hamt::Identity; use fvm_ipld_hamt::{BytesKey, Config, Error, Hamt, Hash}; +use itertools::Itertools as _; use multihash_codetable::Code; use quickcheck::Arbitrary; use rand::SeedableRng; @@ -84,44 +85,61 @@ impl HamtFactory { } } -/// Check hard-coded CIDs during testing. -struct CidChecker { +type CidChecker = Checker; + +type BSStatsChecker = Checker; + +/// Check hard-coded values during testing. +struct Checker { checked: usize, - cids: Option>, + expected_values: Option>, } -impl CidChecker { - pub fn new(cids: Vec<&'static str>) -> Self { +impl Checker +where + T: Debug + PartialEq, +{ + pub fn new(expected_values: impl IntoIterator) -> Self + where + T: TryFrom, + >::Error: Debug, + { Self { - cids: Some(cids), + expected_values: Some( + expected_values + .into_iter() + .map(T::try_from) + .try_collect() + .unwrap(), + ), checked: 0, } } pub fn empty() -> Self { Self { - cids: None, + expected_values: None, checked: 0, } } - pub fn check_next(&mut self, cid: Cid) { - if let Some(cids) = &self.cids { - assert_ne!(self.checked, cids.len()); - assert_eq!(cid.to_string().as_str(), cids[self.checked]); + pub fn check_next(&mut self, actual_value: &T) { + if let Some(expected_values) = &self.expected_values { + assert_ne!(self.checked, expected_values.len()); + assert_eq!(actual_value, &expected_values[self.checked]); self.checked += 1; } } } -impl Drop for CidChecker { +impl Drop for Checker { fn drop(&mut self) { if std::thread::panicking() { // Already failed, don't double-panic. return; } - if let Some(cids) = &self.cids { - assert_eq!(self.checked, cids.len()) + if let Some(expected_values) = &self.expected_values { + assert_eq!(self.checked, expected_values.len()) } } } @@ -302,7 +320,7 @@ fn test_set_if_absent(factory: HamtFactory, stats: Option, mut cids: Ci .unwrap() ); - cids.check_next(c); + cids.check_next(&c); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); @@ -324,12 +342,12 @@ fn set_with_no_effect_does_not_put( } let c = begn.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); begn.set(tstring("favorite-animal"), tstring("bright green bear")) .unwrap(); let c2 = begn.flush().unwrap(); - cids.check_next(c2); + cids.check_next(&c2); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); } @@ -337,7 +355,7 @@ fn set_with_no_effect_does_not_put( begn.set(tstring("favorite-animal"), tstring("bright green bear")) .unwrap(); let c3 = begn.flush().unwrap(); - cids.check_next(c3); + cids.check_next(&c3); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); @@ -356,7 +374,7 @@ fn delete(factory: HamtFactory, stats: Option, mut cids: CidChecker) { assert!(hamt.contains_key(&tstring("foo")).unwrap()); let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); let mut h2: Hamt<_, BytesKey> = factory.load(&c, &store).unwrap(); assert!(h2.get(&b"foo".to_vec()).unwrap().is_some()); @@ -368,7 +386,7 @@ fn delete(factory: HamtFactory, stats: Option, mut cids: CidChecker) { assert!(h2.delete(&b"nonexistent".to_vec()).unwrap().is_none()); let c2 = h2.flush().unwrap(); - cids.check_next(c2); + cids.check_next(&c2); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); } @@ -384,14 +402,14 @@ fn delete_case(factory: HamtFactory, stats: Option, mut cids: CidChecke .unwrap(); let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); let mut h2: Hamt<_, ByteBuf> = factory.load(&c, &store).unwrap(); assert!(h2.delete(&[0].to_vec()).unwrap().is_some()); assert_eq!(h2.get(&[0].to_vec()).unwrap(), None); let c2 = h2.flush().unwrap(); - cids.check_next(c2); + cids.check_next(&c2); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); } @@ -407,7 +425,7 @@ fn reload_empty(factory: HamtFactory, stats: Option, mut cids: CidCheck let h2: Hamt<_, ()> = factory.load(&c, &store).unwrap(); let c2 = store.put_cbor(&h2, Code::Blake2b256).unwrap(); assert_eq!(c, c2); - cids.check_next(c); + cids.check_next(&c); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); } @@ -430,14 +448,14 @@ fn set_delete_many( } let c1 = hamt.flush().unwrap(); - cids.check_next(c1); + cids.check_next(&c1); for i in size_factor..(size_factor * 2) { hamt.set(tstring(i), tstring(i)).unwrap(); } let cid_all = hamt.flush().unwrap(); - cids.check_next(cid_all); + cids.check_next(&cid_all); for i in size_factor..(size_factor * 2) { assert!(hamt.delete(&tstring(i)).unwrap().is_some()); @@ -449,7 +467,7 @@ fn set_delete_many( } let cid_d = hamt.flush().unwrap(); - cids.check_next(cid_d); + cids.check_next(&cid_d); // Assert that we can empty it. for i in 0..size_factor { @@ -460,7 +478,7 @@ fn set_delete_many( assert_eq!(hamt.iter().count(), 0); let cid_d = hamt.flush().unwrap(); - cids.check_next(cid_d); + cids.check_next(&cid_d); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); } @@ -496,7 +514,7 @@ fn for_each( assert_eq!(count, size_factor); let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); let mut hamt: Hamt<_, BytesKey> = factory.load_with_bit_width(&c, &store, 5).unwrap(); @@ -522,7 +540,7 @@ fn for_each( { let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); } // Iterate with a few modified nodes. @@ -573,13 +591,116 @@ fn for_each( assert_eq!(count, expected_count); let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); } } +fn for_each_cacheless( + size_factor: usize, + factory: HamtFactory, + mut stats: Option, + mut cids: CidChecker, +) { + let mem = MemoryBlockstore::default(); + let store = TrackingBlockstore::new(&mem); + + let mut hamt: Hamt<_, BytesKey> = factory.new_with_bit_width(&store, 5); + + for i in 0..size_factor { + hamt.set(tstring(i), tstring(i)).unwrap(); + } + + // Iterating through hamt with dirty caches. + let mut count = 0; + hamt.for_each_cacheless(|k, v| { + assert_eq!(k, v); + count += 1; + Ok(()) + }) + .unwrap(); + assert_eq!(count, size_factor); + + let c = hamt.flush().unwrap(); + cids.check_next(&c); + if let Some(stats) = &mut stats { + stats.check_next(&*store.stats.borrow()); + } + + let mut hamt: Hamt<_, BytesKey> = factory.load_with_bit_width(&c, &store, 5).unwrap(); + + // Iterating through hamt with no cache. + let mut count = 0; + hamt.for_each_cacheless(|k, v| { + assert_eq!(k, v); + count += 1; + Ok(()) + }) + .unwrap(); + assert_eq!(count, size_factor); + + let c = hamt.flush().unwrap(); + cids.check_next(&c); + if let Some(stats) = &mut stats { + stats.check_next(&*store.stats.borrow()); + } + + // Iterate with a few modified nodes. + if size_factor > 10 { + hamt.set(tstring(10), tstring("modified-10")).unwrap(); + } + if size_factor > 80 { + hamt.set(tstring(80), tstring("modified-80")).unwrap(); + hamt.set(tstring(81), tstring("modified-81")).unwrap(); + } + if size_factor > 30 { + assert!(hamt.delete(&tstring(30)).unwrap().is_some()); + } + + // Delete a non-existent value + assert!(hamt.delete(&tstring(size_factor + 100)).unwrap().is_none()); + + // Iterate and verify modifications + let mut count = 0; + hamt.for_each_cacheless(|k, v| { + if size_factor > 30 { + // Should not see deleted key + assert_ne!(k, &tstring(30)); + } + + if size_factor > 10 && k == &tstring(10) { + assert_eq!(v, &tstring("modified-10")); + } else if size_factor > 80 && k == &tstring(80) { + assert_eq!(v, &tstring("modified-80")); + } else if size_factor > 80 && k == &tstring(81) { + assert_eq!(v, &tstring("modified-81")); + } else if k != &tstring(30) { + // Normal key-value equality except for modified keys + assert_eq!(k, v); + } + + count += 1; + Ok(()) + }) + .unwrap(); + + // Verify count matches expectation: original size - deleted + new entries + let expected_count = if size_factor > 30 { + size_factor - 1 + } else { + size_factor + }; + assert_eq!(count, expected_count); + + let c = hamt.flush().unwrap(); + cids.check_next(&c); + if let Some(stats) = &mut stats { + stats.check_next(&*store.stats.borrow()); + } +} + fn for_each_ranged( size_factor: usize, factory: HamtFactory, @@ -698,7 +819,7 @@ fn for_each_ranged( } let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); // Chain paginated requests over a HAMT with committed nodes let mut hamt: Hamt<_, usize> = factory.load_with_bit_width(&c, &store, 5).unwrap(); @@ -749,7 +870,7 @@ fn for_each_ranged( } let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); // Test modifications and deletions in ranged iteration if size_factor > 10 { @@ -782,7 +903,7 @@ fn for_each_ranged( assert_eq!(kvs_after_mod.len(), expected_count); let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); @@ -828,7 +949,7 @@ fn clear(factory: HamtFactory, mut cids: CidChecker) { assert_eq!(hamt.get(&3).unwrap(), Some(&"c".to_string())); let c = hamt.flush().unwrap(); - cids.check_next(c); + cids.check_next(&c); } #[cfg(feature = "identity")] @@ -973,7 +1094,7 @@ fn clean_child_ordering(factory: HamtFactory, stats: Option, mut cids: } let root = h.flush().unwrap(); - cids.check_next(root); + cids.check_next(&root); let mut h: Hamt<_, u8> = factory.load_with_bit_width(&root, &store, 5).unwrap(); h.delete(&make_key(104)).unwrap(); @@ -981,7 +1102,7 @@ fn clean_child_ordering(factory: HamtFactory, stats: Option, mut cids: let root = h.flush().unwrap(); let _: Hamt<_, u8> = factory.load_with_bit_width(&root, &store, 5).unwrap(); - cids.check_next(root); + cids.check_next(&root); if let Some(stats) = stats { assert_eq!(*store.stats.borrow(), stats); @@ -1181,7 +1302,7 @@ mod test_default { use fvm_ipld_hamt::{Config, Hamtv0}; use quickcheck_macros::quickcheck; - use crate::{CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs}; + use crate::{BSStatsChecker, CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs}; #[test] fn test_basics() { @@ -1285,7 +1406,7 @@ mod test_default { #[test] fn for_each_ranged() { #[rustfmt::skip] - let stats = BSStats {r: 30, w: 33, br: 2895, bw: 4321}; + let stats = BSStats {r: 30, w: 33, br: 2895, bw: 4321}; let cids = CidChecker::new(vec![ "bafy2bzacedy4ypl2vedhdqep3llnwko6vrtfiys5flciz2f3c55pl4whlhlqm", "bafy2bzacedy4ypl2vedhdqep3llnwko6vrtfiys5flciz2f3c55pl4whlhlqm", @@ -1294,6 +1415,22 @@ mod test_default { super::for_each_ranged(200, HamtFactory::default(), Some(stats), cids); } + #[test] + fn for_each_cacheless() { + #[rustfmt::skip] + let stats = BSStatsChecker::new(vec![ + BSStats {r: 0, w: 30, br: 0, bw: 3209}, + BSStats {r: 30, w: 30, br: 3209, bw: 3209}, + BSStats {r: 60, w: 33, br: 5158, bw: 4697}, + ]); + let cids = CidChecker::new(vec![ + "bafy2bzaceczhz54xmmz3xqnbmvxfbaty3qprr6dq7xh5vzwqbirlsnbd36z7a", + "bafy2bzaceczhz54xmmz3xqnbmvxfbaty3qprr6dq7xh5vzwqbirlsnbd36z7a", + "bafy2bzacebln5j7tdfavh2qqhio6mgaoq6mm2jbmcex2ngcmi3uqlx5k3mov4", + ]); + super::for_each_cacheless(200, HamtFactory::default(), Some(stats), cids); + } + #[test] fn clean_child_ordering() { #[rustfmt::skip] From 3cd52f74d2fffa7d76ed3050d72d691e8f4909c3 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 4 Sep 2025 20:27:21 +0800 Subject: [PATCH 2/5] update bench --- ipld/hamt/benches/hamt_benchmark.rs | 62 ++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/ipld/hamt/benches/hamt_benchmark.rs b/ipld/hamt/benches/hamt_benchmark.rs index e115c4f1c8..d36d122ec8 100644 --- a/ipld/hamt/benches/hamt_benchmark.rs +++ b/ipld/hamt/benches/hamt_benchmark.rs @@ -5,9 +5,11 @@ extern crate serde; use criterion::{Criterion, black_box, criterion_group, criterion_main}; +use fvm_ipld_blockstore::{Blockstore, MemoryBlockstore}; use fvm_ipld_encoding::tuple::*; use fvm_ipld_hamt::Hamt; +const BIT_WIDTH: u32 = 5; const ITEM_COUNT: u8 = 40; // Struct to simulate a reasonable amount of data per value into the amt @@ -37,8 +39,8 @@ impl BenchData { fn insert(c: &mut Criterion) { c.bench_function("HAMT bulk insert (no flush)", |b| { b.iter(|| { - let db = fvm_ipld_blockstore::MemoryBlockstore::default(); - let mut a = Hamt::<_, _>::new_with_bit_width(&db, 5); + let db = MemoryBlockstore::default(); + let mut a = Hamt::<_, _>::new_with_bit_width(&db, BIT_WIDTH); for i in 0..black_box(ITEM_COUNT) { a.set(black_box(vec![i; 20].into()), black_box(BenchData::new(i))) @@ -51,12 +53,12 @@ fn insert(c: &mut Criterion) { fn insert_load_flush(c: &mut Criterion) { c.bench_function("HAMT bulk insert with flushing and loading", |b| { b.iter(|| { - let db = fvm_ipld_blockstore::MemoryBlockstore::default(); - let mut empt = Hamt::<_, ()>::new_with_bit_width(&db, 5); + let db = MemoryBlockstore::default(); + let mut empt = Hamt::<_, ()>::new_with_bit_width(&db, BIT_WIDTH); let mut cid = empt.flush().unwrap(); for i in 0..black_box(ITEM_COUNT) { - let mut a = Hamt::<_, _>::load_with_bit_width(&cid, &db, 5).unwrap(); + let mut a = Hamt::<_, _>::load_with_bit_width(&cid, &db, BIT_WIDTH).unwrap(); a.set(black_box(vec![i; 20].into()), black_box(BenchData::new(i))) .unwrap(); cid = a.flush().unwrap(); @@ -66,16 +68,13 @@ fn insert_load_flush(c: &mut Criterion) { } fn delete(c: &mut Criterion) { - let db = fvm_ipld_blockstore::MemoryBlockstore::default(); - let mut a = Hamt::<_, _>::new_with_bit_width(&db, 5); - for i in 0..black_box(ITEM_COUNT) { - a.set(vec![i; 20].into(), BenchData::new(i)).unwrap(); - } + let db = MemoryBlockstore::default(); + let mut a = setup_hamt(&db); let cid = a.flush().unwrap(); c.bench_function("HAMT deleting all nodes", |b| { b.iter(|| { - let mut a = Hamt::<_, BenchData>::load_with_bit_width(&cid, &db, 5).unwrap(); + let mut a = Hamt::<_, BenchData>::load_with_bit_width(&cid, &db, BIT_WIDTH).unwrap(); for i in 0..black_box(ITEM_COUNT) { a.delete(black_box([i; 20].as_ref())).unwrap(); } @@ -84,20 +83,47 @@ fn delete(c: &mut Criterion) { } fn for_each(c: &mut Criterion) { - let db = fvm_ipld_blockstore::MemoryBlockstore::default(); - let mut a = Hamt::<_, _>::new_with_bit_width(&db, 5); - for i in 0..black_box(ITEM_COUNT) { - a.set(vec![i; 20].into(), BenchData::new(i)).unwrap(); - } + let db = MemoryBlockstore::default(); + let mut a = setup_hamt(&db); let cid = a.flush().unwrap(); c.bench_function("HAMT for_each function", |b| { b.iter(|| { - let a = Hamt::<_, _>::load_with_bit_width(&cid, &db, 5).unwrap(); + let a = Hamt::<_, _>::load_with_bit_width(&cid, &db, BIT_WIDTH).unwrap(); black_box(a).for_each(|_k, _v: &BenchData| Ok(())).unwrap(); }) }); } -criterion_group!(benches, insert, insert_load_flush, delete, for_each); +fn for_each_cacheless(c: &mut Criterion) { + let db = MemoryBlockstore::default(); + let mut a = setup_hamt(&db); + let cid = a.flush().unwrap(); + + c.bench_function("HAMT for_each_cacheless function", |b| { + b.iter(|| { + let a = Hamt::<_, _>::load_with_bit_width(&cid, &db, BIT_WIDTH).unwrap(); + black_box(a) + .for_each_cacheless(|_k, _v: &BenchData| Ok(())) + .unwrap(); + }) + }); +} + +fn setup_hamt(db: &BS) -> Hamt<&BS, BenchData> { + let mut a = Hamt::<_, _>::new_with_bit_width(db, BIT_WIDTH); + for i in 0..ITEM_COUNT { + a.set(vec![i; 20].into(), BenchData::new(i)).unwrap(); + } + a +} + +criterion_group!( + benches, + insert, + insert_load_flush, + delete, + for_each, + for_each_cacheless +); criterion_main!(benches); From f60806eade4e5f860bdf6125f176e724421f6143 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 4 Sep 2025 20:58:25 +0800 Subject: [PATCH 3/5] refactor --- ipld/hamt/src/node.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 3f8a0755fd..db3555e152 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -248,12 +248,8 @@ where } let mut stack = vec![StackItem::Borrowed(&self.pointers)]; - loop { - let Some(stack_last_item) = stack.pop() else { - // Terminate here since both `current` and `stack` reach the end - return Ok(()); - }; - for pointer in stack_last_item.as_ref() { + while let Some(pointers) = stack.pop() { + for pointer in pointers.as_ref() { match pointer { Pointer::Link { cid, cache: _ } => { let node = Node::load(conf, bs, cid, stack.len() as u32)?; @@ -268,6 +264,7 @@ where } } } + Ok(()) } /// Search for a key. From 9debdbcb3abb8c0b404662d40c1a88847266b0a7 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 4 Sep 2025 21:20:36 +0800 Subject: [PATCH 4/5] parity test --- ipld/hamt/src/node.rs | 2 +- ipld/hamt/tests/hamt_tests.rs | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index db3555e152..ff6f71a5db 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -252,7 +252,7 @@ where for pointer in pointers.as_ref() { match pointer { Pointer::Link { cid, cache: _ } => { - let node = Node::load(conf, bs, cid, stack.len() as u32)?; + let node = Node::load(conf, bs, cid, (stack.len() + 1) as u32)?; stack.push(StackItem::Owned(node.pointers)) } Pointer::Dirty(node) => stack.push(StackItem::Owned(node.pointers.clone())), diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index 950fb4e0c2..6e21b7c4ec 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -608,20 +608,39 @@ fn for_each_cacheless( let store = TrackingBlockstore::new(&mem); let mut hamt: Hamt<_, BytesKey> = factory.new_with_bit_width(&store, 5); + let store2 = MemoryBlockstore::default(); + let mut hamt2: Hamt<_, BytesKey> = factory.new_with_bit_width(&store2, 5); for i in 0..size_factor { hamt.set(tstring(i), tstring(i)).unwrap(); + hamt2.set(tstring(i), tstring(i)).unwrap(); } // Iterating through hamt with dirty caches. let mut count = 0; + let mut keys = Vec::with_capacity(size_factor); hamt.for_each_cacheless(|k, v| { assert_eq!(k, v); + keys.push(k.0.clone()); count += 1; Ok(()) }) .unwrap(); assert_eq!(count, size_factor); + keys.sort(); + + // Ensure parity between `for_each_cacheless` and `for_each` + let mut expected_keys = Vec::with_capacity(size_factor); + hamt2 + .for_each(|k, v| { + assert_eq!(k, v); + expected_keys.push(k.0.clone()); + Ok(()) + }) + .unwrap(); + expected_keys.sort(); + + assert_eq!(keys, expected_keys); let c = hamt.flush().unwrap(); cids.check_next(&c); From 313aec82f9a4df3ad4f01a2214bac444b64964ac Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 4 Sep 2025 22:12:27 +0800 Subject: [PATCH 5/5] fix iteration order --- ipld/hamt/src/node.rs | 71 ++++++++++++++++++++++++++--------- ipld/hamt/tests/hamt_tests.rs | 9 ++++- 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index ff6f71a5db..690a7fcbfe 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -233,38 +233,73 @@ where K: Clone, V: Clone, { - enum StackItem<'a, T> { + enum IterItem<'a, T> { Borrowed(&'a T), Owned(T), } - impl AsRef for StackItem<'_, T> { - fn as_ref(&self) -> &T { + enum StackItem<'a, T> { + Iter(std::slice::Iter<'a, T>), + IntoIter(std::vec::IntoIter), + } + + impl<'a, V> From> for StackItem<'a, V> { + fn from(value: std::slice::Iter<'a, V>) -> Self { + Self::Iter(value) + } + } + + impl From> for StackItem<'_, V> { + fn from(value: std::vec::IntoIter) -> Self { + Self::IntoIter(value) + } + } + + impl<'a, V> Iterator for StackItem<'a, V> { + type Item = IterItem<'a, V>; + + fn next(&mut self) -> Option { match self { - Self::Borrowed(i) => i, - Self::Owned(i) => i, + Self::Iter(it) => it.next().map(IterItem::Borrowed), + Self::IntoIter(it) => it.next().map(IterItem::Owned), } } } - let mut stack = vec![StackItem::Borrowed(&self.pointers)]; - while let Some(pointers) = stack.pop() { - for pointer in pointers.as_ref() { - match pointer { - Pointer::Link { cid, cache: _ } => { - let node = Node::load(conf, bs, cid, (stack.len() + 1) as u32)?; - stack.push(StackItem::Owned(node.pointers)) + let mut stack: Vec> = vec![self.pointers.iter().into()]; + loop { + let Some(pointers) = stack.last_mut() else { + return Ok(()); + }; + let Some(pointer) = pointers.next() else { + stack.pop(); + continue; + }; + match pointer { + IterItem::Borrowed(Pointer::Link { cid, cache: _ }) => { + let node = Node::load(conf, bs, cid, stack.len() as u32)?; + stack.push(node.pointers.into_iter().into()) + } + IterItem::Owned(Pointer::Link { cid, cache: _ }) => { + let node = Node::load(conf, bs, &cid, stack.len() as u32)?; + stack.push(node.pointers.into_iter().into()) + } + IterItem::Borrowed(Pointer::Dirty(node)) => stack.push(node.pointers.iter().into()), + IterItem::Owned(Pointer::Dirty(node)) => { + stack.push(node.pointers.clone().into_iter().into()) + } + IterItem::Borrowed(Pointer::Values(kvs)) => { + for kv in kvs.iter() { + f(kv.key(), kv.value())?; } - Pointer::Dirty(node) => stack.push(StackItem::Owned(node.pointers.clone())), - Pointer::Values(kvs) => { - for kv in kvs.iter() { - f(kv.key(), kv.value())?; - } + } + IterItem::Owned(Pointer::Values(kvs)) => { + for kv in kvs.iter() { + f(kv.key(), kv.value())?; } } } } - Ok(()) } /// Search for a key. diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index 6e21b7c4ec..3d1512ebc2 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -627,7 +627,6 @@ fn for_each_cacheless( }) .unwrap(); assert_eq!(count, size_factor); - keys.sort(); // Ensure parity between `for_each_cacheless` and `for_each` let mut expected_keys = Vec::with_capacity(size_factor); @@ -638,7 +637,6 @@ fn for_each_cacheless( Ok(()) }) .unwrap(); - expected_keys.sort(); assert_eq!(keys, expected_keys); @@ -1577,6 +1575,13 @@ macro_rules! test_hamt_mod { } } + #[test] + fn for_each_cacheless() { + for s in super::SIZE_FACTORS { + super::for_each_cacheless(*s, $factory, None, CidChecker::empty()) + } + } + #[test] fn for_each_ranged() { for s in super::SIZE_FACTORS {