Skip to content

Commit 814c3d6

Browse files
committed
Resolve conflict with main
1 parent 5c5a40f commit 814c3d6

File tree

14 files changed

+249
-144
lines changed

14 files changed

+249
-144
lines changed

Cargo.lock

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-cost-model/src/cost/agg.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ use crate::{
88
},
99
cost_model::CostModelImpl,
1010
stats::DEFAULT_NUM_DISTINCT,
11-
CostModelError, CostModelResult, EstimatedStatistic,
11+
CostModelError, CostModelResult, EstimatedStatistic, SemanticError,
1212
};
1313

1414
impl<S: CostModelStorageLayer> CostModelImpl<S> {
15-
pub fn get_agg_row_cnt(
15+
pub async fn get_agg_row_cnt(
1616
&self,
1717
group_by: ArcPredicateNode,
1818
) -> CostModelResult<EstimatedStatistic> {
@@ -22,22 +22,24 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
2222
} else {
2323
// Multiply the n-distinct of all the group by columns.
2424
// TODO: improve with multi-dimensional n-distinct
25-
let row_cnt = group_by.0.children.iter().try_fold(1, |acc, node| {
25+
let mut row_cnt = 1;
26+
27+
for node in &group_by.0.children {
2628
match node.typ {
2729
PredicateType::AttributeRef => {
2830
let attr_ref =
2931
AttributeRefPred::from_pred_node(node.clone()).ok_or_else(|| {
30-
CostModelError::InvalidPredicate(
32+
SemanticError::InvalidPredicate(
3133
"Expected AttributeRef predicate".to_string(),
3234
)
3335
})?;
3436
if attr_ref.is_derived() {
35-
Ok(acc * DEFAULT_NUM_DISTINCT)
37+
row_cnt *= DEFAULT_NUM_DISTINCT;
3638
} else {
3739
let table_id = attr_ref.table_id();
3840
let attr_idx = attr_ref.attr_index();
3941
let stats_option =
40-
self.get_attribute_comb_stats(table_id, &[attr_idx])?;
42+
self.get_attribute_comb_stats(table_id, &[attr_idx]).await?;
4143

4244
let ndistinct = match stats_option {
4345
Some(stats) => stats.ndistinct,
@@ -46,15 +48,15 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
4648
DEFAULT_NUM_DISTINCT
4749
}
4850
};
49-
Ok(acc * ndistinct)
51+
row_cnt *= ndistinct;
5052
}
5153
}
5254
_ => {
5355
// TODO: Consider the case where `GROUP BY 1`.
54-
panic!("GROUP BY must have attribute ref predicate")
56+
panic!("GROUP BY must have attribute ref predicate");
5557
}
5658
}
57-
})?;
59+
}
5860
Ok(EstimatedStatistic(row_cnt))
5961
}
6062
}

optd-cost-model/src/cost/filter/attribute.rs

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
1919
/// Also, get_attribute_equality_selectivity is a subroutine when computing range
2020
/// selectivity, which is another reason for separating these into two functions
2121
/// is_eq means whether it's == or !=
22-
pub(crate) fn get_attribute_equality_selectivity(
22+
pub(crate) async fn get_attribute_equality_selectivity(
2323
&self,
2424
table_id: TableId,
2525
attr_base_index: usize,
@@ -28,8 +28,9 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
2828
) -> CostModelResult<f64> {
2929
// TODO: The attribute could be a derived attribute
3030
let ret_sel = {
31-
if let Some(attribute_stats) =
32-
self.get_attribute_comb_stats(table_id, &[attr_base_index])?
31+
if let Some(attribute_stats) = self
32+
.get_attribute_comb_stats(table_id, &[attr_base_index])
33+
.await?
3334
{
3435
let eq_freq =
3536
if let Some(freq) = attribute_stats.mcvs.freq(&vec![Some(value.clone())]) {
@@ -91,7 +92,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
9192
}
9293

9394
/// Compute the frequency of values in a attribute less than the given value.
94-
fn get_attribute_lt_value_freq(
95+
async fn get_attribute_lt_value_freq(
9596
&self,
9697
attribute_stats: &AttributeCombValueStats,
9798
table_id: TableId,
@@ -102,7 +103,9 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
102103
// into total_leq_cdf this logic just so happens to be the exact same logic as
103104
// get_attribute_equality_selectivity implements
104105
let ret_freq = Self::get_attribute_leq_value_freq(attribute_stats, value)
105-
- self.get_attribute_equality_selectivity(table_id, attr_base_index, value, true)?;
106+
- self
107+
.get_attribute_equality_selectivity(table_id, attr_base_index, value, true)
108+
.await?;
106109
assert!(
107110
(0.0..=1.0).contains(&ret_freq),
108111
"ret_freq ({}) should be in [0, 1]",
@@ -116,25 +119,29 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
116119
/// Range predicates are handled entirely differently from equality predicates so this is its
117120
/// own function. If it is unable to find the statistics, it returns DEFAULT_INEQ_SEL.
118121
/// The selectivity is computed as quantile of the right bound minus quantile of the left bound.
119-
pub(crate) fn get_attribute_range_selectivity(
122+
pub(crate) async fn get_attribute_range_selectivity(
120123
&self,
121124
table_id: TableId,
122125
attr_base_index: usize,
123126
start: Bound<&Value>,
124127
end: Bound<&Value>,
125128
) -> CostModelResult<f64> {
126129
// TODO: Consider attribute is a derived attribute
127-
if let Some(attribute_stats) =
128-
self.get_attribute_comb_stats(table_id, &[attr_base_index])?
130+
if let Some(attribute_stats) = self
131+
.get_attribute_comb_stats(table_id, &[attr_base_index])
132+
.await?
129133
{
130134
let left_quantile = match start {
131135
Bound::Unbounded => 0.0,
132-
Bound::Included(value) => self.get_attribute_lt_value_freq(
133-
&attribute_stats,
134-
table_id,
135-
attr_base_index,
136-
value,
137-
)?,
136+
Bound::Included(value) => {
137+
self.get_attribute_lt_value_freq(
138+
&attribute_stats,
139+
table_id,
140+
attr_base_index,
141+
value,
142+
)
143+
.await?
144+
}
138145
Bound::Excluded(value) => {
139146
Self::get_attribute_leq_value_freq(&attribute_stats, value)
140147
}
@@ -144,12 +151,15 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
144151
Bound::Included(value) => {
145152
Self::get_attribute_leq_value_freq(&attribute_stats, value)
146153
}
147-
Bound::Excluded(value) => self.get_attribute_lt_value_freq(
148-
&attribute_stats,
149-
table_id,
150-
attr_base_index,
151-
value,
152-
)?,
154+
Bound::Excluded(value) => {
155+
self.get_attribute_lt_value_freq(
156+
&attribute_stats,
157+
table_id,
158+
attr_base_index,
159+
value,
160+
)
161+
.await?
162+
}
153163
};
154164
assert!(
155165
left_quantile <= right_quantile,

optd-cost-model/src/cost/filter/comp_op.rs

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ use crate::{
1616
// compute the selectivity.
1717
stats::{DEFAULT_EQ_SEL, DEFAULT_INEQ_SEL, UNIMPLEMENTED_SEL},
1818
CostModelResult,
19+
SemanticError,
1920
};
2021

2122
impl<S: CostModelStorageLayer> CostModelImpl<S> {
2223
/// Comparison operators are the base case for recursion in get_filter_selectivity()
23-
pub(crate) fn get_comp_op_selectivity(
24+
pub(crate) async fn get_comp_op_selectivity(
2425
&self,
2526
comp_bin_op_typ: BinOpType,
2627
left: ArcPredicateNode,
@@ -30,8 +31,11 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
3031

3132
// I intentionally performed moves on left and right. This way, we don't accidentally use
3233
// them after this block
33-
let (attr_ref_exprs, values, non_attr_ref_exprs, is_left_attr_ref) =
34-
self.get_semantic_nodes(left, right)?;
34+
let semantic_res = self.get_semantic_nodes(left, right).await;
35+
if semantic_res.is_err() {
36+
return Ok(Self::get_default_comparison_op_selectivity(comp_bin_op_typ));
37+
}
38+
let (attr_ref_exprs, values, non_attr_ref_exprs, is_left_attr_ref) = semantic_res.unwrap();
3539

3640
// Handle the different cases of semantic nodes.
3741
if attr_ref_exprs.is_empty() {
@@ -51,13 +55,17 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
5155
match comp_bin_op_typ {
5256
BinOpType::Eq => {
5357
self.get_attribute_equality_selectivity(table_id, attr_ref_idx, value, true)
58+
.await
59+
}
60+
BinOpType::Neq => {
61+
self.get_attribute_equality_selectivity(
62+
table_id,
63+
attr_ref_idx,
64+
value,
65+
false,
66+
)
67+
.await
5468
}
55-
BinOpType::Neq => self.get_attribute_equality_selectivity(
56-
table_id,
57-
attr_ref_idx,
58-
value,
59-
false,
60-
),
6169
BinOpType::Lt | BinOpType::Leq | BinOpType::Gt | BinOpType::Geq => {
6270
let start = match (comp_bin_op_typ, is_left_attr_ref) {
6371
(BinOpType::Lt, true) | (BinOpType::Geq, false) => Bound::Unbounded,
@@ -74,6 +82,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
7482
_ => unreachable!("all comparison BinOpTypes were enumerated. this should be unreachable"),
7583
};
7684
self.get_attribute_range_selectivity(table_id, attr_ref_idx, start, end)
85+
.await
7786
}
7887
_ => unreachable!(
7988
"all comparison BinOpTypes were enumerated. this should be unreachable"
@@ -109,7 +118,7 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
109118
/// This is convenient to avoid repeating the same logic just with "left" and "right" swapped.
110119
/// The last return value is true when the input node (left) is a AttributeRefPred.
111120
#[allow(clippy::type_complexity)]
112-
fn get_semantic_nodes(
121+
async fn get_semantic_nodes(
113122
&self,
114123
left: ArcPredicateNode,
115124
right: ArcPredicateNode,
@@ -175,11 +184,16 @@ impl<S: CostModelStorageLayer> CostModelImpl<S> {
175184
// The "invert" cast is to invert the cast so that we're casting the
176185
// non_cast_node to the attribute's original type.
177186
// TODO(migration): double check
178-
let invert_cast_data_type = &(self
187+
// TODO: Consider attribute info is None.
188+
let attribute_info = self
179189
.storage_manager
180-
.get_attribute_info(table_id, attr_ref_idx as i32)?
181-
.typ
182-
.into_data_type());
190+
.get_attribute_info(table_id, attr_ref_idx as i32)
191+
.await?
192+
.ok_or({
193+
SemanticError::AttributeNotFound(table_id, attr_ref_idx as i32)
194+
})?;
195+
196+
let invert_cast_data_type = &attribute_info.typ.into_data_type();
183197

184198
match non_cast_node.typ {
185199
PredicateType::AttributeRef => {

0 commit comments

Comments
 (0)