Skip to content

Commit 5ed4553

Browse files
committed
implement agg cost computation
1 parent 22656e6 commit 5ed4553

File tree

4 files changed

+67
-1
lines changed

4 files changed

+67
-1
lines changed

optd-cost-model/src/common/predicates/attr_ref_pred.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ use super::id_pred::IdPred;
1111
/// Currently, [`AttributeRefPred`] only holds base table attributes, i.e. attributes
1212
/// that already exist in the table. More complex structures may be introduced in the
1313
/// future to represent derived attributes (e.g. t.v1 + t.v2).
14+
///
15+
/// TODO: Support derived column in `AttributeRefPred`.
1416
#[derive(Clone, Debug)]
1517
pub struct AttributeRefPred(pub ArcPredicateNode);
1618

optd-cost-model/src/cost/agg.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
use optd_persistent::CostModelStorageLayer;
2+
3+
use crate::{
4+
common::{
5+
nodes::{ArcPredicateNode, PredicateType, ReprPredicateNode},
6+
predicates::{attr_ref_pred::AttributeRefPred, list_pred::ListPred},
7+
types::TableId,
8+
},
9+
cost_model::CostModelImpl,
10+
stats::DEFAULT_NUM_DISTINCT,
11+
CostModelError, CostModelResult, EstimatedStatistic,
12+
};
13+
14+
impl<S: CostModelStorageLayer> CostModelImpl<S> {
15+
pub fn get_agg_row_cnt(
16+
&self,
17+
group_by: ArcPredicateNode,
18+
) -> CostModelResult<EstimatedStatistic> {
19+
let group_by = ListPred::from_pred_node(group_by).unwrap();
20+
if group_by.is_empty() {
21+
Ok(EstimatedStatistic(1))
22+
} else {
23+
// Multiply the n-distinct of all the group by columns.
24+
// TODO: improve with multi-dimensional n-distinct
25+
let row_cnt = group_by.0.children.iter().try_fold(1, |acc, node| {
26+
match node.typ {
27+
PredicateType::AttributeRef => {
28+
let attr_ref =
29+
AttributeRefPred::from_pred_node(node.clone()).ok_or_else(|| {
30+
CostModelError::InvalidPredicate(
31+
"Expected AttributeRef predicate".to_string(),
32+
)
33+
})?;
34+
if attr_ref.is_derived() {
35+
Ok(acc * DEFAULT_NUM_DISTINCT)
36+
} else {
37+
let table_id = attr_ref.table_id();
38+
let attr_idx = attr_ref.attr_index();
39+
let stats_option =
40+
self.get_attribute_comb_stats(TableId(table_id), &vec![attr_idx])?;
41+
42+
let ndistinct = match stats_option {
43+
Some(stats) => stats.ndistinct,
44+
None => {
45+
// The column type is not supported or stats are missing.
46+
DEFAULT_NUM_DISTINCT
47+
}
48+
};
49+
Ok(acc * ndistinct)
50+
}
51+
}
52+
_ => {
53+
// TODO: Consider the case where `GROUP BY 1`.
54+
panic!("GROUP BY must have attribute ref predicate")
55+
}
56+
}
57+
})?;
58+
Ok(EstimatedStatistic(row_cnt))
59+
}
60+
}
61+
}

optd-cost-model/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ pub type CostModelResult<T> = Result<T, CostModelError>;
3838

3939
#[derive(Debug)]
4040
pub enum CostModelError {
41-
// TODO: Add more error types
4241
ORMError(BackendError),
42+
InvalidPredicate(String),
4343
}
4444

4545
pub trait CostModel: 'static + Send + Sync {

optd-cost-model/src/stats/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ use crate::common::values::Value;
88
use counter::Counter;
99
use serde::{Deserialize, Serialize};
1010

11+
// Default n-distinct estimate for derived columns or columns lacking statistics
12+
pub const DEFAULT_NUM_DISTINCT: u64 = 200;
13+
1114
pub type AttributeCombValue = Vec<Option<Value>>;
1215

1316
#[derive(Serialize, Deserialize, Debug)]

0 commit comments

Comments
 (0)