Skip to content

Commit 621dfb2

Browse files
authored
feat: implement a persistent backend for the Large SMT Forest (#876)
This commit implements a persistent backend for the SMT forest that both allows it to start up rapidly from an on-disk state, and allows the offloading of many portions of the forest from resident memory. It stores the full tree data for each lineage in the forest in a RocksDB instance. As it stands, the backend has not undergone any particular optimization work, instead predominantly relying on the optimizations to access patterns developed for `LargeSmt`. Comparative performance analysis has been performed against Large SMT, and we have found that in like-for-like scenarios, the forest ranges from 1.5x to 2x slower than `LargeSmt`. This is perfectly in line with estimations, as in the worst case the forest has to perform 2x the amount of I/O due to its lack of an in-memory prefix. The commit also includes basic benchmarks for the large SMT forest. While they do not cover every single piece of functionality, they currently cover the following, specifically for the persistent backend: - `forest.open(...)`: The time it takes to get a single opening from some arbitrary tree in the forest, both for the current tree and from the history. - `forest.add_lineage(...)`: The time it takes to add a new lineage to the forest. - `forest.update_tree(...)`: The time it takes to update an existing lineage in the forest. - `forest.update_forest(...)`: The time it takes to update multiple lineages in the forest in a single batch. There may be further opportunities for optimization, based around tailoring the database parameters better for the forest, but the current performance is well-within the expected bounds.
1 parent bc86bb3 commit 621dfb2

File tree

28 files changed

+4067
-166
lines changed

28 files changed

+4067
-166
lines changed

miden-crypto/Cargo.toml

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,14 @@ required-features = ["std"]
4646

4747
[[bench]]
4848
harness = false
49-
name = "large-smt"
49+
name = "large_smt"
5050
required-features = ["rocksdb"]
5151

52+
[[bench]]
53+
harness = false
54+
name = "large_smt_forest"
55+
required-features = ["persistent-forest"]
56+
5257
[[bench]]
5358
harness = false
5459
name = "store"
@@ -75,15 +80,16 @@ name = "transpose"
7580
required-features = ["std"]
7681

7782
[features]
78-
concurrent = ["dep:rayon", "p3-maybe-rayon/parallel", "p3-miden-prover/parallel", "p3-util/parallel", "std"]
79-
default = ["concurrent", "std"]
80-
executable = ["concurrent", "dep:clap", "dep:rand-utils"]
81-
fuzzing = []
82-
internal = ["concurrent"]
83-
rocksdb = ["concurrent", "dep:rocksdb"]
84-
serde = ["dep:serde", "serde?/alloc"]
85-
std = ["blake3/std", "dep:cc", "miden-serde-utils/std", "rand/std", "rand/thread_rng", "serde?/std"]
86-
testing = ["dep:proptest", "miden-field/testing"]
83+
concurrent = ["dep:rayon", "p3-maybe-rayon/parallel", "p3-miden-prover/parallel", "p3-util/parallel", "std"]
84+
default = ["concurrent", "std"]
85+
executable = ["concurrent", "dep:clap", "dep:rand-utils"]
86+
fuzzing = []
87+
internal = ["concurrent"]
88+
persistent-forest = ["rocksdb", "serde"]
89+
rocksdb = ["concurrent", "dep:rocksdb"]
90+
serde = ["dep:serde", "serde?/alloc"]
91+
std = ["blake3/std", "dep:cc", "miden-serde-utils/std", "rand/std", "rand/thread_rng", "serde?/std"]
92+
testing = ["dep:proptest", "miden-field/testing"]
8793

8894
[dependencies]
8995
# Miden dependencies
Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use std::hint;
1+
use std::{hint, iter::empty};
22

3-
use criterion::{Criterion, criterion_group, criterion_main};
3+
use criterion::{BatchSize, Criterion, criterion_group, criterion_main};
44
use miden_crypto::{
55
Felt, Word,
66
merkle::{
@@ -13,11 +13,10 @@ use miden_crypto::{
1313
};
1414

1515
mod common;
16-
use common::*;
1716

18-
use crate::{
19-
common::data::{generate_smt_entries_sequential, generate_test_keys_sequential},
17+
use crate::common::{
2018
config::{DEFAULT_MEASUREMENT_TIME, DEFAULT_SAMPLE_SIZE},
19+
data::{generate_smt_entries_sequential, generate_test_keys_sequential},
2120
};
2221

2322
// SUBTREE SERIALIZATION BENCHMARKS
@@ -138,6 +137,28 @@ benchmark_with_setup_data! {
138137
},
139138
}
140139

140+
benchmark_with_setup_data! {
141+
large_smt_open_in_large_tree,
142+
DEFAULT_MEASUREMENT_TIME,
143+
DEFAULT_SAMPLE_SIZE,
144+
"rocksdb_smt_open_in_large_tree",
145+
|| {
146+
let entries = generate_smt_entries_sequential(10_000);
147+
let keys = generate_test_keys_sequential(10);
148+
let temp_dir = tempfile::TempDir::new().unwrap();
149+
let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap();
150+
let smt = LargeSmt::with_entries(storage, entries).unwrap();
151+
(smt, keys, temp_dir)
152+
},
153+
|b: &mut criterion::Bencher, (smt, keys, _temp_dir): &(LargeSmt<RocksDbStorage>, Vec<Word>, tempfile::TempDir)| {
154+
b.iter(|| {
155+
for key in keys {
156+
hint::black_box(smt.open(key));
157+
}
158+
})
159+
},
160+
}
161+
141162
benchmark_with_setup_data! {
142163
large_smt_compute_mutations,
143164
DEFAULT_MEASUREMENT_TIME,
@@ -241,6 +262,52 @@ benchmark_batch! {
241262
|size| Some(criterion::Throughput::Elements(size as u64))
242263
}
243264

265+
benchmark_batch! {
266+
large_smt_insert_batch_to_empty_tree,
267+
&[100, 1_000, 10_000],
268+
|b: &mut criterion::Bencher, insert_count: usize| {
269+
b.iter_batched(
270+
|| {
271+
let temp_dir = tempfile::TempDir::new().unwrap();
272+
let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap();
273+
let smt = LargeSmt::with_entries(storage, empty()).unwrap();
274+
let batch = generate_smt_entries_sequential(insert_count);
275+
276+
(temp_dir, smt, batch)
277+
},
278+
|(_temp_dir, mut smt, batch)| {
279+
smt.insert_batch(batch).unwrap();
280+
},
281+
BatchSize::LargeInput
282+
)
283+
},
284+
|size| Some(criterion::Throughput::Elements(size as u64))
285+
}
286+
287+
benchmark_batch! {
288+
large_smt_insert_batch_to_populated_tree,
289+
&[100, 1_000, 10_000],
290+
|b: &mut criterion::Bencher, insert_count: usize| {
291+
let initial_entries = generate_smt_entries_sequential(10_000);
292+
293+
b.iter_batched(
294+
|| {
295+
let temp_dir = tempfile::TempDir::new().unwrap();
296+
let storage = RocksDbStorage::open(RocksDbConfig::new(temp_dir.path())).unwrap();
297+
let smt = LargeSmt::with_entries(storage, initial_entries.clone()).unwrap();
298+
let batch = generate_smt_entries_sequential(insert_count);
299+
300+
(temp_dir, smt, batch)
301+
},
302+
|(_temp_dir, mut smt, batch)| {
303+
smt.insert_batch(batch).unwrap();
304+
},
305+
BatchSize::LargeInput
306+
)
307+
},
308+
|size| Some(criterion::Throughput::Elements(size as u64))
309+
}
310+
244311
// MEMORY STORAGE BENCHMARKS
245312
// ================================================================================================
246313

@@ -333,10 +400,13 @@ benchmark_batch! {
333400
criterion_group!(
334401
large_smt_benchmark_group,
335402
large_smt_open,
403+
large_smt_open_in_large_tree,
336404
large_smt_compute_mutations,
337405
large_smt_apply_mutations,
338406
large_smt_apply_mutations_with_reversion,
339407
large_smt_insert_batch,
408+
large_smt_insert_batch_to_empty_tree,
409+
large_smt_insert_batch_to_populated_tree,
340410
);
341411

342412
criterion_group!(
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
//! This module contains the benchmarks for the large SMT forest, focusing on the performance of key
2+
//! operations.
3+
4+
mod common;
5+
6+
use std::hint;
7+
8+
use criterion::{BatchSize, Criterion, criterion_group, criterion_main};
9+
use miden_crypto::{
10+
merkle::smt::{
11+
Backend, ForestPersistentBackend, LargeSmtForest, LineageId, PersistentBackendConfig,
12+
SmtForestUpdateBatch, SmtUpdateBatch, TreeId,
13+
},
14+
rand::test_utils::rand_value,
15+
};
16+
use miden_field::Word;
17+
18+
use crate::common::{
19+
config::{DEFAULT_MEASUREMENT_TIME, DEFAULT_SAMPLE_SIZE},
20+
data::{generate_smt_entries_sequential, generate_test_keys_sequential},
21+
};
22+
23+
// CONSTANTS
24+
// ================================================================================================
25+
26+
/// The number of entries to modify in an arbitrary batch of updates.
27+
const BATCH_SIZE: usize = 10_000;
28+
29+
/// The number of trees we update in a single whole-forest batch.
30+
const TREES_PER_BATCH: usize = 50;
31+
32+
// SETUP FUNCTIONALITY
33+
// ================================================================================================
34+
35+
/// The setup for a benchmark over the smt forest.
36+
#[derive(Debug)]
37+
struct ForestSetup<B: Backend> {
38+
pub forest: LargeSmtForest<B>,
39+
_file: Option<tempfile::TempDir>,
40+
}
41+
impl ForestSetup<ForestPersistentBackend> {
42+
/// Sets up a new persistent forest as a benchmark setup.
43+
fn new_persistent() -> Self {
44+
let dir = tempfile::tempdir().unwrap();
45+
let backend =
46+
ForestPersistentBackend::load(PersistentBackendConfig::new(dir.path()).unwrap())
47+
.unwrap();
48+
let forest = LargeSmtForest::new(backend).unwrap();
49+
let _file = Some(dir);
50+
51+
Self { forest, _file }
52+
}
53+
}
54+
55+
/// Generates a tree update batch containing `count` entries which may be additions or removals.
56+
fn generate_tree_update_batch(count: usize) -> SmtUpdateBatch {
57+
let entries = generate_smt_entries_sequential(count);
58+
SmtUpdateBatch::from(entries.into_iter())
59+
}
60+
61+
/// Generates a forest update batch containing `count` entries which may be additions or removals
62+
/// and which are allocated equally over the `lineages` in the forest.
63+
fn generate_forest_update_batch(lineages: &[LineageId], count: usize) -> SmtForestUpdateBatch {
64+
let mut updates = SmtForestUpdateBatch::empty();
65+
for lineage in lineages {
66+
*updates.operations(*lineage) = generate_tree_update_batch(count / lineages.len());
67+
}
68+
updates
69+
}
70+
71+
/// Generates `count` lineage identifiers.
72+
fn generate_lineages(count: usize) -> Vec<LineageId> {
73+
let mut lineages = Vec::new();
74+
for _ in 0..count {
75+
lineages.push(LineageId::new(rand_value()));
76+
}
77+
lineages
78+
}
79+
80+
// FOREST WITH PERSISTENT BACKEND
81+
// ================================================================================================
82+
83+
// Roughly equivalent to large_smt::large_smt_open in functionality.
84+
benchmark_with_setup_data! {
85+
large_smt_forest_persistent_open_full_tree,
86+
DEFAULT_MEASUREMENT_TIME,
87+
DEFAULT_SAMPLE_SIZE,
88+
"large_smt_forest_persistent_open_full_tree",
89+
|| {
90+
let mut setup = ForestSetup::new_persistent();
91+
let batch = generate_tree_update_batch(BATCH_SIZE);
92+
let lineage = LineageId::new([0x42; 32]);
93+
let version = 0;
94+
setup.forest.add_lineage(lineage, version, batch).unwrap();
95+
let keys = generate_test_keys_sequential(10);
96+
let tree = TreeId::new(lineage, version);
97+
(setup, keys, tree)
98+
},
99+
|b: &mut criterion::Bencher, (setup, keys, tree): &(ForestSetup<_>, Vec<Word>, TreeId)| {
100+
b.iter(|| {
101+
for key in keys {
102+
hint::black_box(setup.forest.open(*tree, *key).unwrap());
103+
}
104+
})
105+
}
106+
}
107+
108+
// Doesn't have a direct analogue in large SMT, but should be roughly equivalent in performance to
109+
// large_smt_forest_persistent_open_full_tree above, as the historical portion should not dominate.
110+
benchmark_with_setup_data! {
111+
large_smt_forest_persistent_open_historical_tree,
112+
DEFAULT_MEASUREMENT_TIME,
113+
DEFAULT_SAMPLE_SIZE,
114+
"large_smt_forest_persistent_open_historical_tree",
115+
|| {
116+
let mut setup = ForestSetup::new_persistent();
117+
let initial_batch = generate_tree_update_batch(BATCH_SIZE);
118+
let lineage = LineageId::new([0x42; 32]);
119+
let version = 0;
120+
setup.forest.add_lineage(lineage, version, initial_batch).unwrap();
121+
let update_batch = generate_tree_update_batch(BATCH_SIZE);
122+
setup.forest.update_tree(lineage, 1, update_batch).unwrap();
123+
124+
let keys = generate_test_keys_sequential(10);
125+
let tree = TreeId::new(lineage, version);
126+
(setup, keys, tree)
127+
},
128+
|b: &mut criterion::Bencher, (setup, keys, tree): &(ForestSetup<_>, Vec<Word>, TreeId)| {
129+
b.iter(|| {
130+
for key in keys {
131+
hint::black_box(setup.forest.open(*tree, *key).unwrap());
132+
}
133+
})
134+
},
135+
}
136+
137+
// Roughly equivalent to large_smt::large_smt_insert_batch_to_empty_tree in functionality.
138+
benchmark_batch! {
139+
large_smt_forest_persistent_add_lineage,
140+
&[100, 1_000, 10_000],
141+
|b: &mut criterion::Bencher, entry_count: usize| {
142+
let lineage = LineageId::new([0x42; 32]);
143+
let version = 0;
144+
145+
b.iter_batched(
146+
|| {
147+
let batch = generate_tree_update_batch(entry_count);
148+
let setup = ForestSetup::new_persistent();
149+
(setup, batch)
150+
},
151+
|(mut setup, batch)| {
152+
setup.forest.add_lineage(lineage, version, batch).unwrap()
153+
},
154+
BatchSize::LargeInput
155+
)
156+
},
157+
|size| Some(criterion::Throughput::Elements(size as u64))
158+
}
159+
160+
// Roughly equivalent to large_smt::large_smt_insert_batch_to_populated_tree in functionality.
161+
benchmark_batch! {
162+
large_smt_forest_persistent_update_tree,
163+
&[100, 1_000, 10_000],
164+
|b: &mut criterion::Bencher, entry_count: usize| {
165+
let initial_batch = generate_tree_update_batch(BATCH_SIZE);
166+
let lineage = LineageId::new([0x42; 32]);
167+
let version = 0;
168+
169+
b.iter_batched(
170+
|| {
171+
let mut setup = ForestSetup::new_persistent();
172+
setup.forest.add_lineage(lineage, version, initial_batch.clone()).unwrap();
173+
let batch = generate_tree_update_batch(entry_count);
174+
(setup, batch)
175+
},
176+
|(mut setup, batch)| {
177+
setup.forest.update_tree(lineage, version + 1, batch).unwrap();
178+
},
179+
BatchSize::LargeInput
180+
)
181+
},
182+
|size| Some(criterion::Throughput::Elements(size as u64))
183+
}
184+
185+
// Has no direct equivalent in the large smt, but should be broadly equivalent workwise to the
186+
// large_smt_forest_persistent_update_tree above in time as we try and do as much in parallel as
187+
// possible.
188+
benchmark_batch! {
189+
large_smt_forest_persistent_update_forest,
190+
&[100, 1_000, 10_000],
191+
|b: &mut criterion::Bencher, entry_count: usize| {
192+
let initial_batch = generate_tree_update_batch(100);
193+
let lineages = generate_lineages(TREES_PER_BATCH);
194+
195+
b.iter_batched(
196+
|| {
197+
let mut setup = ForestSetup::new_persistent();
198+
let version = 0;
199+
for lineage in &lineages {
200+
setup.forest.add_lineage(*lineage, version, initial_batch.clone()).unwrap();
201+
}
202+
203+
let batch = generate_forest_update_batch(&lineages, entry_count);
204+
205+
(setup, batch)
206+
},
207+
|(mut setup, batch)| {
208+
hint::black_box(setup.forest.update_forest(1, batch).unwrap())
209+
},
210+
BatchSize::LargeInput
211+
)
212+
},
213+
|size| Some(criterion::Throughput::Elements(size as u64))
214+
}
215+
216+
// BENCHMARK RUNS
217+
// ================================================================================================
218+
219+
criterion_group!(
220+
large_smt_forest_persistent_group,
221+
large_smt_forest_persistent_open_full_tree,
222+
large_smt_forest_persistent_open_historical_tree,
223+
large_smt_forest_persistent_add_lineage,
224+
large_smt_forest_persistent_update_tree,
225+
large_smt_forest_persistent_update_forest,
226+
);
227+
228+
criterion_main!(large_smt_forest_persistent_group);

miden-crypto/proptest-regressions/merkle/smt/large_forest/property_tests.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)