Skip to content

Commit 44164a6

Browse files
authored
Use sketches instead of time-based decay histogram (#2900)
1 parent 1d98ff5 commit 44164a6

File tree

4 files changed

+32
-31
lines changed

4 files changed

+32
-31
lines changed

Cargo.lock

Lines changed: 12 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,12 @@ duckdb = { path = "duckdb-vortex/duckdb-rs/crates/duckdb", features = [
8989
] }
9090
dyn-hash = "0.2.0"
9191
enum-iterator = "2.0.0"
92-
exponential-decay-histogram = "=0.1.13"
9392
fastlanes = "0.1.8"
9493
flatbuffers = "25"
9594
flexbuffers = "25"
9695
flume = "0.11"
9796
fsst-rs = "0.5.2"
9897
futures = { version = "0.3.31", default-features = false }
99-
futures-executor = "0.3.31"
10098
futures-util = "0.3.31"
10199
getrandom = "0.3"
102100
goldenfile = "1"
@@ -116,7 +114,6 @@ moka = { version = "0.12.10", default-features = false }
116114
num-traits = "0.2.19"
117115
num_enum = "0.7.2"
118116
object_store = "0.11.0"
119-
oneshot = "0.1.10"
120117
opentelemetry = "0.29.0"
121118
opentelemetry-otlp = "0.29.0"
122119
opentelemetry_sdk = "0.29.0"
@@ -159,6 +156,7 @@ serde = "1.0.203"
159156
serde_json = "1.0.116"
160157
serde_test = "1.0.176"
161158
simplelog = "0.12"
159+
sketches-ddsketch = "0.3.0"
162160
static_assertions = "1.1"
163161
tabled = { version = "0.18.0", default-features = false }
164162
tar = "0.4"

vortex-layout/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ async-trait = { workspace = true }
1919
bit-vec = { workspace = true }
2020
bytes = { workspace = true }
2121
dashmap = { workspace = true }
22-
exponential-decay-histogram = { workspace = true }
2322
flatbuffers = { workspace = true }
2423
flume = { workspace = true }
2524
futures = { workspace = true, features = ["alloc", "executor"] }
2625
getrandom_v03 = { workspace = true }
2726
itertools = { workspace = true }
2827
log = { workspace = true }
28+
sketches-ddsketch = { workspace = true }
2929
tokio = { workspace = true, features = ["rt"], optional = true }
3030
tracing = { workspace = true, optional = true }
3131
tracing-futures = { workspace = true, features = [

vortex-layout/src/layouts/filter.rs

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ use std::sync::{Arc, RwLock};
44

55
use async_trait::async_trait;
66
use bit_vec::BitVec;
7-
use exponential_decay_histogram::ExponentialDecayHistogram;
87
use itertools::Itertools;
8+
use sketches_ddsketch::DDSketch;
99
use vortex_array::aliases::hash_map::HashMap;
10-
use vortex_error::{VortexExpect, VortexResult, vortex_panic};
10+
use vortex_error::{VortexExpect, VortexResult, vortex_err, vortex_panic};
1111
use vortex_expr::ExprRef;
1212
use vortex_expr::forms::cnf::cnf;
1313
use vortex_mask::Mask;
@@ -18,8 +18,6 @@ use crate::{
1818

1919
/// The selectivity histogram quantile to use for reordering conjuncts. Where 0 == no rows match.
2020
const DEFAULT_SELECTIVITY_QUANTILE: f64 = 0.1;
21-
/// The multiplier to used to convert selectivity to i64 for the histogram.
22-
const SELECTIVITY_MULTIPLIER: f64 = 1_000_000.0;
2321

2422
/// A [`LayoutReader`] that splits boolean expressions into individual conjunctions, tracks
2523
/// statistics about selectivity, and uses this information to reorder the evaluation of the
@@ -116,7 +114,7 @@ pub struct FilterExpr {
116114
/// The conjuncts involved in the filter expression.
117115
conjuncts: Vec<ExprRef>,
118116
/// A histogram of the selectivity of each conjunct.
119-
conjunct_selectivity: Vec<RwLock<ExponentialDecayHistogram>>,
117+
conjunct_selectivity: Vec<RwLock<DDSketch>>,
120118
/// The preferred ordering of conjuncts.
121119
ordering: RwLock<Vec<usize>>,
122120
/// The quantile to use from the selectivity histogram of each conjunct.
@@ -129,11 +127,9 @@ impl FilterExpr {
129127
let num_conjuncts = conjuncts.len();
130128
Self {
131129
conjuncts,
132-
conjunct_selectivity: iter::repeat_with(|| {
133-
RwLock::new(ExponentialDecayHistogram::new())
134-
})
135-
.take(num_conjuncts)
136-
.collect(),
130+
conjunct_selectivity: iter::repeat_with(|| RwLock::new(DDSketch::default()))
131+
.take(num_conjuncts)
132+
.collect(),
137133
// The initial ordering is naive, we could order this by how well we expect each
138134
// comparison operator to perform. e.g. == might be more selective than <=? Not obvious.
139135
ordering: RwLock::new((0..num_conjuncts).collect()),
@@ -160,9 +156,7 @@ impl FilterExpr {
160156
.write()
161157
.vortex_expect("poisoned lock");
162158

163-
// Since our histogram only supports i64, we map our f64 into a 0-1m range.
164-
let selectivity = (selectivity * SELECTIVITY_MULTIPLIER).round() as i64;
165-
histogram.update(selectivity);
159+
histogram.add(selectivity);
166160
}
167161

168162
let all_selectivity = self
@@ -172,8 +166,11 @@ impl FilterExpr {
172166
histogram
173167
.read()
174168
.vortex_expect("poisoned lock")
175-
.snapshot()
176-
.value(self.selectivity_quantile)
169+
.quantile(self.selectivity_quantile)
170+
.map_err(|e| vortex_err!("{e}")) // Only errors when the quantile is out of range
171+
.vortex_expect("quantile out of range")
172+
// If the sketch is empty, its selectivity is 0.
173+
.unwrap_or_default()
177174
})
178175
.collect::<Vec<_>>();
179176

@@ -186,17 +183,17 @@ impl FilterExpr {
186183

187184
// Re-sort our conjuncts based on the new statistics.
188185
let mut ordering = self.ordering.write().vortex_expect("lock poisoned");
189-
ordering.sort_unstable_by_key(|&idx| all_selectivity[idx]);
186+
ordering.sort_unstable_by(|&l_idx, &r_idx| {
187+
all_selectivity[l_idx]
188+
.partial_cmp(&all_selectivity[r_idx])
189+
.vortex_expect("Can't compare selectivity values")
190+
});
190191

191192
log::debug!(
192193
"Reordered conjuncts based on new selectivity {:?}",
193194
ordering
194195
.iter()
195-
.map(|&idx| format!(
196-
"({}) => {}",
197-
self.conjuncts[idx],
198-
all_selectivity[idx] as f64 / SELECTIVITY_MULTIPLIER
199-
))
196+
.map(|&idx| format!("({}) => {}", self.conjuncts[idx], all_selectivity[idx]))
200197
.join(", ")
201198
);
202199
}

0 commit comments

Comments
 (0)