Skip to content

Commit 1569fc5

Browse files
committed
Modify the tests of filter and agg
1 parent 8c4191f commit 1569fc5

File tree

13 files changed

+581
-491
lines changed

13 files changed

+581
-491
lines changed

optd-cost-model/src/cost/agg.rs

Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ use crate::{
22
common::{
33
nodes::{ArcPredicateNode, PredicateType, ReprPredicateNode},
44
predicates::{attr_index_pred::AttrIndexPred, list_pred::ListPred},
5-
types::TableId,
5+
properties::attr_ref::{AttrRef, BaseTableAttrRef},
6+
types::GroupId,
67
},
78
cost_model::CostModelImpl,
89
stats::DEFAULT_NUM_DISTINCT,
@@ -13,6 +14,7 @@ use crate::{
1314
impl<S: CostModelStorageManager> CostModelImpl<S> {
1415
pub async fn get_agg_row_cnt(
1516
&self,
17+
group_id: GroupId,
1618
group_by: ArcPredicateNode,
1719
) -> CostModelResult<EstimatedStatistic> {
1820
let group_by = ListPred::from_pred_node(group_by).unwrap();
@@ -32,12 +34,9 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
3234
"Expected AttributeRef predicate".to_string(),
3335
)
3436
})?;
35-
let is_derived = todo!();
36-
if is_derived {
37-
row_cnt *= DEFAULT_NUM_DISTINCT;
38-
} else {
39-
let table_id = todo!();
40-
let attr_idx = attr_ref.attr_index();
37+
if let AttrRef::BaseTableAttrRef(BaseTableAttrRef { table_id, attr_idx }) =
38+
self.memo.get_attribute_ref(group_id, attr_ref.attr_index())
39+
{
4140
// TODO: Only query ndistinct instead of all kinds of stats.
4241
let stats_option =
4342
self.get_attribute_comb_stats(table_id, &[attr_idx]).await?;
@@ -50,6 +49,9 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
5049
}
5150
};
5251
row_cnt *= ndistinct;
52+
} else {
53+
// TOOD: Handle derived attributes.
54+
row_cnt *= DEFAULT_NUM_DISTINCT;
5355
}
5456
}
5557
_ => {
@@ -65,7 +67,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
6567

6668
#[cfg(test)]
6769
mod tests {
68-
use std::collections::HashMap;
70+
use std::{collections::HashMap, ops::Deref};
6971

7072
use crate::{
7173
common::{
@@ -75,48 +77,59 @@ mod tests {
7577
values::Value,
7678
},
7779
cost_model::tests::{
78-
attr_index, cnst, create_mock_cost_model, empty_list, empty_per_attr_stats, list,
79-
TestPerAttributeStats,
80+
attr_index, cnst, create_mock_cost_model, create_mock_cost_model_with_attr_types,
81+
empty_list, empty_per_attr_stats, list, TestPerAttributeStats, TEST_ATTR1_BASE_INDEX,
82+
TEST_ATTR2_BASE_INDEX, TEST_ATTR3_BASE_INDEX, TEST_GROUP1_ID, TEST_TABLE1_ID,
8083
},
8184
stats::{utilities::simple_map::SimpleMap, MostCommonValues, DEFAULT_NUM_DISTINCT},
8285
EstimatedStatistic,
8386
};
8487

8588
#[tokio::test]
8689
async fn test_agg_no_stats() {
87-
let table_id = TableId(0);
88-
let cost_model = create_mock_cost_model(vec![table_id], vec![], vec![None]);
90+
let cost_model = create_mock_cost_model_with_attr_types(
91+
vec![TEST_TABLE1_ID],
92+
vec![],
93+
vec![HashMap::from([
94+
(TEST_ATTR1_BASE_INDEX, ConstantType::Int32),
95+
(TEST_ATTR2_BASE_INDEX, ConstantType::Int32),
96+
])],
97+
vec![None],
98+
);
8999

90100
// Group by empty list should return 1.
91101
let group_bys = empty_list();
92102
assert_eq!(
93-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
103+
cost_model
104+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
105+
.await
106+
.unwrap(),
94107
EstimatedStatistic(1.0)
95108
);
96109

97110
// Group by single column should return the default value since there are no stats.
98111
let group_bys = list(vec![attr_index(0)]);
99112
assert_eq!(
100-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
113+
cost_model
114+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
115+
.await
116+
.unwrap(),
101117
EstimatedStatistic(DEFAULT_NUM_DISTINCT as f64)
102118
);
103119

104120
// Group by two columns should return the default value squared since there are no stats.
105121
let group_bys = list(vec![attr_index(0), attr_index(1)]);
106122
assert_eq!(
107-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
123+
cost_model
124+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
125+
.await
126+
.unwrap(),
108127
EstimatedStatistic((DEFAULT_NUM_DISTINCT * DEFAULT_NUM_DISTINCT) as f64)
109128
);
110129
}
111130

112131
#[tokio::test]
113132
async fn test_agg_with_stats() {
114-
let table_id = TableId(0);
115-
let group_id = GroupId(0);
116-
let attr1_base_idx = 0;
117-
let attr2_base_idx = 1;
118-
let attr3_base_idx = 2;
119-
120133
let attr1_ndistinct = 12;
121134
let attr2_ndistinct = 645;
122135
let attr1_stats = TestPerAttributeStats::new(
@@ -132,47 +145,58 @@ mod tests {
132145
0.0,
133146
);
134147

135-
let cost_model = create_mock_cost_model(
136-
vec![table_id],
148+
let cost_model = create_mock_cost_model_with_attr_types(
149+
vec![TEST_TABLE1_ID],
150+
vec![HashMap::from([
151+
(TEST_ATTR1_BASE_INDEX, attr1_stats),
152+
(TEST_ATTR2_BASE_INDEX, attr2_stats),
153+
])],
137154
vec![HashMap::from([
138-
(attr1_base_idx, attr1_stats),
139-
(attr2_base_idx, attr2_stats),
155+
(TEST_ATTR1_BASE_INDEX, ConstantType::Int32),
156+
(TEST_ATTR2_BASE_INDEX, ConstantType::Int32),
157+
(TEST_ATTR3_BASE_INDEX, ConstantType::Int32),
140158
])],
141159
vec![None],
142-
// attr_infos,
143160
);
144161

145162
// Group by empty list should return 1.
146163
let group_bys = empty_list();
147164
assert_eq!(
148-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
165+
cost_model
166+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
167+
.await
168+
.unwrap(),
149169
EstimatedStatistic(1.0)
150170
);
151171

152172
// Group by single column should return the n-distinct of the column.
153-
let group_bys = list(vec![attr_index(attr1_base_idx)]); // TODO: Fix this
173+
let group_bys = list(vec![attr_index(0)]);
154174
assert_eq!(
155-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
175+
cost_model
176+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
177+
.await
178+
.unwrap(),
156179
EstimatedStatistic(attr1_ndistinct as f64)
157180
);
158181

159182
// Group by two columns should return the product of the n-distinct of the columns.
160-
let group_bys = list(vec![attr_index(attr1_base_idx), attr_index(attr2_base_idx)]); // TODO: Fix this
183+
let group_bys = list(vec![attr_index(0), attr_index(1)]);
161184
assert_eq!(
162-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
185+
cost_model
186+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
187+
.await
188+
.unwrap(),
163189
EstimatedStatistic((attr1_ndistinct * attr2_ndistinct) as f64)
164190
);
165191

166192
// Group by multiple columns should return the product of the n-distinct of the columns. If one of the columns
167193
// does not have stats, it should use the default value instead.
168-
let group_bys = list(vec![
169-
// TODO: Fix this
170-
attr_index(attr1_base_idx),
171-
attr_index(attr2_base_idx),
172-
attr_index(attr3_base_idx),
173-
]);
194+
let group_bys = list(vec![attr_index(0), attr_index(1), attr_index(2)]);
174195
assert_eq!(
175-
cost_model.get_agg_row_cnt(group_bys).await.unwrap(),
196+
cost_model
197+
.get_agg_row_cnt(TEST_GROUP1_ID, group_bys)
198+
.await
199+
.unwrap(),
176200
EstimatedStatistic((attr1_ndistinct * attr2_ndistinct * DEFAULT_NUM_DISTINCT) as f64)
177201
);
178202
}

optd-cost-model/src/cost/filter/attribute.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
1616
/// Also, get_attribute_equality_selectivity is a subroutine when computing range
1717
/// selectivity, which is another reason for separating these into two functions
1818
/// is_eq means whether it's == or !=
19+
///
20+
/// Currently, we only support calculating the equality selectivity for an existed attribute,
21+
/// not a derived attribute.
22+
/// TODO: Support derived attributes.
1923
pub(crate) async fn get_attribute_equality_selectivity(
2024
&self,
2125
table_id: TableId,
2226
attr_base_index: u64,
2327
value: &Value,
2428
is_eq: bool,
2529
) -> CostModelResult<f64> {
26-
// TODO: The attribute could be a derived attribute
2730
let ret_sel = {
2831
if let Some(attribute_stats) = self
2932
.get_attribute_comb_stats(table_id, &[attr_base_index])
@@ -89,6 +92,10 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
8992
}
9093

9194
/// Compute the frequency of values in a attribute less than the given value.
95+
///
96+
/// Currently, we only support calculating the equality selectivity for an existed attribute,
97+
/// not a derived attribute.
98+
/// TODO: Support derived attributes.
9299
async fn get_attribute_lt_value_freq(
93100
&self,
94101
attribute_stats: &AttributeCombValueStats,
@@ -116,6 +123,10 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
116123
/// Range predicates are handled entirely differently from equality predicates so this is its
117124
/// own function. If it is unable to find the statistics, it returns DEFAULT_INEQ_SEL.
118125
/// The selectivity is computed as quantile of the right bound minus quantile of the left bound.
126+
///
127+
/// Currently, we only support calculating the equality selectivity for an existed attribute,
128+
/// not a derived attribute.
129+
/// TODO: Support derived attributes.
119130
pub(crate) async fn get_attribute_range_selectivity(
120131
&self,
121132
table_id: TableId,

0 commit comments

Comments
 (0)