Skip to content

Commit eaec760

Browse files
committed
Add prefix filter support
1 parent ee8c68e commit eaec760

21 files changed

Lines changed: 6327 additions & 125 deletions

File tree

benches/run_reader.rs

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
2+
use lsm_tree::prefix::FixedPrefixExtractor;
3+
use lsm_tree::{AbstractTree, Config};
4+
use std::sync::Arc;
5+
use std::time::Instant;
6+
use tempfile::TempDir;
7+
8+
fn create_tree_with_segments(
9+
segment_count: usize,
10+
with_prefix_extractor: bool,
11+
) -> (TempDir, lsm_tree::Tree) {
12+
let tempdir = tempfile::tempdir().unwrap();
13+
14+
let mut config = Config::new(&tempdir);
15+
if with_prefix_extractor {
16+
config = config.prefix_extractor(Arc::new(FixedPrefixExtractor::new(8)));
17+
}
18+
19+
let tree = config.open().unwrap();
20+
21+
// Create segments with distinct prefixes
22+
for segment_idx in 0..segment_count {
23+
let prefix = format!("seg{:04}", segment_idx);
24+
25+
// Add 100 keys per segment
26+
for key_idx in 0..100 {
27+
let key = format!("{}_{:04}", prefix, key_idx);
28+
tree.insert(key.as_bytes(), vec![0u8; 100], 0);
29+
}
30+
31+
// Flush to create a segment
32+
tree.flush_active_memtable(0).unwrap();
33+
}
34+
35+
(tempdir, tree)
36+
}
37+
38+
fn benchmark_range_query(c: &mut Criterion) {
39+
let mut group = c.benchmark_group("range_query");
40+
41+
// Test different segment counts
42+
for segment_count in [10, 100, 500, 1000] {
43+
// Benchmark without prefix extractor
44+
group.bench_with_input(
45+
BenchmarkId::new("no_prefix", segment_count),
46+
&segment_count,
47+
|b, &count| {
48+
let (_tempdir, tree) = create_tree_with_segments(count, false);
49+
50+
b.iter(|| {
51+
// Query for a range that doesn't exist
52+
let start: &[u8] = b"zzz_0000";
53+
let end: &[u8] = b"zzz_9999";
54+
let iter = tree.range(start..=end, 0, None);
55+
// Force evaluation by counting
56+
let count = iter.count();
57+
black_box(count);
58+
});
59+
},
60+
);
61+
62+
// Benchmark with prefix extractor
63+
group.bench_with_input(
64+
BenchmarkId::new("with_prefix", segment_count),
65+
&segment_count,
66+
|b, &count| {
67+
let (_tempdir, tree) = create_tree_with_segments(count, true);
68+
69+
b.iter(|| {
70+
// Query for a range that doesn't exist (will check filters)
71+
let start: &[u8] = b"zzz_0000";
72+
let end: &[u8] = b"zzz_9999";
73+
let iter = tree.range(start..=end, 0, None);
74+
// Force evaluation by counting
75+
let count = iter.count();
76+
black_box(count);
77+
});
78+
},
79+
);
80+
81+
// Benchmark with prefix extractor - existing prefix
82+
group.bench_with_input(
83+
BenchmarkId::new("with_prefix_exists", segment_count),
84+
&segment_count,
85+
|b, &count| {
86+
let (_tempdir, tree) = create_tree_with_segments(count, true);
87+
88+
b.iter(|| {
89+
// Query for a range that exists in the middle
90+
let mid = count / 2;
91+
let prefix = format!("seg{:04}", mid);
92+
let start_str = format!("{}_0000", prefix);
93+
let end_str = format!("{}_0099", prefix);
94+
let start: &[u8] = start_str.as_bytes();
95+
let end: &[u8] = end_str.as_bytes();
96+
let iter = tree.range(start..=end, 0, None);
97+
// Force evaluation by counting
98+
let count = iter.count();
99+
black_box(count);
100+
});
101+
},
102+
);
103+
}
104+
105+
group.finish();
106+
}
107+
108+
fn benchmark_timing_comparison(_c: &mut Criterion) {
109+
println!("\n=== RunReader Performance Benchmark ===");
110+
println!("Testing impact of prefix filter checks on large runs\n");
111+
112+
for segment_count in [100, 500, 1000] {
113+
println!("\n--- Testing with {} segments ---", segment_count);
114+
115+
// Test without prefix extractor
116+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
117+
118+
let start = Instant::now();
119+
for _ in 0..100 {
120+
let start_key: &[u8] = b"zzz_0000";
121+
let end_key: &[u8] = b"zzz_9999";
122+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
123+
let _ = iter.count();
124+
}
125+
let no_prefix_time = start.elapsed();
126+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
127+
128+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
129+
130+
// Test with prefix extractor
131+
let (_tempdir_with_prefix, tree_with_prefix) =
132+
create_tree_with_segments(segment_count, true);
133+
134+
let start = Instant::now();
135+
for _ in 0..100 {
136+
let start_key: &[u8] = b"zzz_0000";
137+
let end_key: &[u8] = b"zzz_9999";
138+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
139+
let _ = iter.count();
140+
}
141+
let with_prefix_time = start.elapsed();
142+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
143+
144+
println!(
145+
" With prefix extractor: {:>8} ns/query",
146+
avg_with_prefix
147+
);
148+
149+
if avg_with_prefix > avg_no_prefix {
150+
let overhead = avg_with_prefix - avg_no_prefix;
151+
println!(
152+
" Overhead: {} ns ({:.1}%)",
153+
overhead,
154+
(overhead as f64 / avg_no_prefix as f64) * 100.0
155+
);
156+
} else {
157+
let savings = avg_no_prefix - avg_with_prefix;
158+
println!(
159+
" Savings: {} ns ({:.1}%)",
160+
savings,
161+
(savings as f64 / avg_no_prefix as f64) * 100.0
162+
);
163+
}
164+
165+
// Check CPU cost per segment
166+
if segment_count > 0 {
167+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
168+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
169+
} else {
170+
0
171+
};
172+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
173+
}
174+
}
175+
176+
println!("\n=== Summary ===");
177+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
178+
println!(
179+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
180+
);
181+
}
182+
183+
fn run_timing_benchmark() {
184+
println!("\n=== RunReader Performance Benchmark ===");
185+
println!("Testing impact of prefix filter checks on large runs\n");
186+
187+
for segment_count in [100, 500, 1000] {
188+
println!("\n--- Testing with {} segments ---", segment_count);
189+
190+
// Test without prefix extractor
191+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
192+
193+
let start = Instant::now();
194+
for _ in 0..100 {
195+
let start_key: &[u8] = b"zzz_0000";
196+
let end_key: &[u8] = b"zzz_9999";
197+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
198+
let _ = iter.count();
199+
}
200+
let no_prefix_time = start.elapsed();
201+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
202+
203+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
204+
205+
// Test with prefix extractor
206+
let (_tempdir_with_prefix, tree_with_prefix) =
207+
create_tree_with_segments(segment_count, true);
208+
209+
let start = Instant::now();
210+
for _ in 0..100 {
211+
let start_key: &[u8] = b"zzz_0000";
212+
let end_key: &[u8] = b"zzz_9999";
213+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
214+
let _ = iter.count();
215+
}
216+
let with_prefix_time = start.elapsed();
217+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
218+
219+
println!(
220+
" With prefix extractor: {:>8} ns/query",
221+
avg_with_prefix
222+
);
223+
224+
if avg_with_prefix > avg_no_prefix {
225+
let overhead = avg_with_prefix - avg_no_prefix;
226+
println!(
227+
" Overhead: {} ns ({:.1}%)",
228+
overhead,
229+
(overhead as f64 / avg_no_prefix as f64) * 100.0
230+
);
231+
} else {
232+
let savings = avg_no_prefix - avg_with_prefix;
233+
println!(
234+
" Savings: {} ns ({:.1}%)",
235+
savings,
236+
(savings as f64 / avg_no_prefix as f64) * 100.0
237+
);
238+
}
239+
240+
// Check CPU cost per segment
241+
if segment_count > 0 {
242+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
243+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
244+
} else {
245+
0
246+
};
247+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
248+
}
249+
}
250+
251+
println!("\n=== Summary ===");
252+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
253+
println!(
254+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
255+
);
256+
}
257+
258+
fn benchmark_all(c: &mut Criterion) {
259+
// Run standard benchmarks
260+
benchmark_range_query(c);
261+
262+
// Run the detailed timing comparison
263+
run_timing_benchmark();
264+
}
265+
266+
criterion_group!(benches, benchmark_range_query);
267+
criterion_main!(benches);

src/blob_tree/mod.rs

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,20 @@ impl AbstractTree for BlobTree {
219219
let range = prefix_to_range(prefix.as_ref());
220220

221221
Box::new(
222-
crate::Tree::create_internal_range(super_version.clone(), &range, seqno, index).map(
223-
move |kv| {
224-
IterGuardImpl::Blob(Guard {
225-
tree: tree.clone(),
226-
version: super_version.version.clone(),
227-
kv,
228-
})
229-
},
230-
),
222+
crate::Tree::create_internal_range(
223+
super_version.clone(),
224+
&range,
225+
seqno,
226+
index,
227+
self.index.config.prefix_extractor.clone(),
228+
)
229+
.map(move |kv| {
230+
IterGuardImpl::Blob(Guard {
231+
tree: tree.clone(),
232+
version: super_version.version.clone(),
233+
kv,
234+
})
235+
}),
231236
)
232237
}
233238

@@ -241,15 +246,20 @@ impl AbstractTree for BlobTree {
241246
let tree = self.clone();
242247

243248
Box::new(
244-
crate::Tree::create_internal_range(super_version.clone(), &range, seqno, index).map(
245-
move |kv| {
246-
IterGuardImpl::Blob(Guard {
247-
tree: tree.clone(),
248-
version: super_version.version.clone(),
249-
kv,
250-
})
251-
},
252-
),
249+
crate::Tree::create_internal_range(
250+
super_version.clone(),
251+
&range,
252+
seqno,
253+
index,
254+
self.index.config.prefix_extractor.clone(),
255+
)
256+
.map(move |kv| {
257+
IterGuardImpl::Blob(Guard {
258+
tree: tree.clone(),
259+
version: super_version.version.clone(),
260+
kv,
261+
})
262+
}),
253263
)
254264
}
255265

@@ -384,7 +394,11 @@ impl AbstractTree for BlobTree {
384394
Bloom(policy) => policy,
385395
None => BloomConstructionPolicy::BitsPerKey(0.0),
386396
}
387-
});
397+
})
398+
// Ensure tables built during blob tree flush carry the configured extractor.
399+
// This lets writers register prefixes and persist the extractor name in metadata
400+
// for compatibility checks at read time.
401+
.use_prefix_extractor(self.index.config.prefix_extractor.clone());
388402

389403
if index_partitioning {
390404
table_writer = table_writer.use_partitioned_index();
@@ -600,7 +614,12 @@ impl AbstractTree for BlobTree {
600614
.expect("lock is poisoned")
601615
.get_version_for_snapshot(seqno);
602616

603-
let Some(item) = crate::Tree::get_internal_entry_from_version(&super_version, key, seqno)?
617+
let Some(item) = crate::Tree::get_internal_entry_from_version(
618+
&super_version,
619+
key,
620+
seqno,
621+
&self.index.config,
622+
)?
604623
else {
605624
return Ok(None);
606625
};

src/compaction/flavour.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ pub(super) fn prepare_table_writer(
113113
None => BloomConstructionPolicy::BitsPerKey(0.0),
114114
}
115115
}
116-
}))
116+
})
117+
.use_prefix_extractor(opts.config.prefix_extractor.clone()))
117118
}
118119

119120
// TODO: find a better name

0 commit comments

Comments
 (0)