Skip to content

Commit e9cd234

Browse files
authored
feat(cost-model): implement cost model derive statistics (#43)
1 parent 7be153a commit e9cd234

File tree

20 files changed

+709
-674
lines changed

20 files changed

+709
-674
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-cost-model/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ itertools = "0.13"
1818
assert_approx_eq = "1.1.0"
1919
trait-variant = "0.1.2"
2020
tokio = { version = "1.0.1", features = ["macros", "rt-multi-thread"] }
21+
async-trait = "0.1"
2122

2223
[dev-dependencies]
2324
crossbeam = "0.8"

optd-cost-model/src/cost/agg.rs

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use crate::{
88
cost_model::CostModelImpl,
99
stats::DEFAULT_NUM_DISTINCT,
1010
storage::CostModelStorageManager,
11-
CostModelError, CostModelResult, EstimatedStatistic, SemanticError,
11+
CostModelResult, EstimatedStatistic, SemanticError,
1212
};
1313

1414
impl<S: CostModelStorageManager> CostModelImpl<S> {
@@ -67,21 +67,16 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
6767

6868
#[cfg(test)]
6969
mod tests {
70-
use std::{collections::HashMap, ops::Deref};
70+
use std::collections::HashMap;
7171

7272
use crate::{
73-
common::{
74-
predicates::constant_pred::ConstantType,
75-
properties::Attribute,
76-
types::{GroupId, TableId},
77-
values::Value,
78-
},
79-
cost_model::tests::{
80-
attr_index, cnst, create_mock_cost_model, create_mock_cost_model_with_attr_types,
81-
empty_list, empty_per_attr_stats, list, TestPerAttributeStats, TEST_ATTR1_BASE_INDEX,
82-
TEST_ATTR2_BASE_INDEX, TEST_ATTR3_BASE_INDEX, TEST_GROUP1_ID, TEST_TABLE1_ID,
83-
},
73+
common::predicates::constant_pred::ConstantType,
8474
stats::{utilities::simple_map::SimpleMap, MostCommonValues, DEFAULT_NUM_DISTINCT},
75+
test_utils::tests::{
76+
attr_index, create_mock_cost_model_with_attr_types, empty_list, list,
77+
TestPerAttributeStats, TEST_ATTR1_BASE_INDEX, TEST_ATTR2_BASE_INDEX,
78+
TEST_ATTR3_BASE_INDEX, TEST_GROUP1_ID, TEST_TABLE1_ID,
79+
},
8580
EstimatedStatistic,
8681
};
8782

optd-cost-model/src/cost/filter/comp_op.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
cost_model::CostModelImpl,
1515
stats::{DEFAULT_EQ_SEL, DEFAULT_INEQ_SEL, UNIMPLEMENTED_SEL},
1616
storage::CostModelStorageManager,
17-
CostModelResult, SemanticError,
17+
CostModelResult,
1818
};
1919

2020
impl<S: CostModelStorageManager> CostModelImpl<S> {

optd-cost-model/src/cost/filter/core.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,11 @@ mod tests {
9898
bin_op_pred::BinOpType, constant_pred::ConstantType, log_op_pred::LogOpType,
9999
un_op_pred::UnOpType,
100100
},
101-
properties::Attribute,
102101
types::TableId,
103102
values::Value,
104103
},
105-
cost_model::tests::*,
106-
memo_ext::tests::MemoGroupInfo,
107-
stats::{
108-
utilities::{counter::Counter, simple_map::SimpleMap},
109-
Distribution, MostCommonValues, DEFAULT_EQ_SEL,
110-
},
104+
stats::{utilities::simple_map::SimpleMap, Distribution, MostCommonValues, DEFAULT_EQ_SEL},
105+
test_utils::tests::*,
111106
};
112107
use arrow_schema::DataType;
113108

@@ -834,7 +829,7 @@ mod tests {
834829
0,
835830
0.0,
836831
);
837-
let table_id = TableId(0);
832+
838833
let cost_model = create_mock_cost_model_with_attr_types(
839834
vec![TEST_TABLE1_ID],
840835
vec![HashMap::from([(

optd-cost-model/src/cost/filter/in_list.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,9 @@ mod tests {
8383
use std::collections::HashMap;
8484

8585
use crate::{
86-
common::{
87-
types::{GroupId, TableId},
88-
values::Value,
89-
},
90-
cost_model::tests::*,
91-
memo_ext::tests::MemoGroupInfo,
92-
stats::{
93-
utilities::{counter::Counter, simple_map::SimpleMap},
94-
MostCommonValues,
95-
},
86+
common::values::Value,
87+
stats::{utilities::simple_map::SimpleMap, MostCommonValues},
88+
test_utils::tests::*,
9689
};
9790

9891
#[tokio::test]

optd-cost-model/src/cost/filter/like.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,12 @@ mod tests {
113113
use std::collections::HashMap;
114114

115115
use crate::{
116-
common::{
117-
types::{GroupId, TableId},
118-
values::Value,
119-
},
120-
cost_model::tests::*,
116+
common::values::Value,
121117
stats::{
122118
utilities::{counter::Counter, simple_map::SimpleMap},
123119
MostCommonValues, FIXED_CHAR_SEL_FACTOR, FULL_WILDCARD_SEL_FACTOR,
124120
},
121+
test_utils::tests::*,
125122
};
126123

127124
#[tokio::test]

optd-cost-model/src/cost/filter/log_op.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
2525
let mut or_sel_neg = 1.0;
2626
for child in children {
2727
let selectivity = self.get_filter_selectivity(group_id, child.clone()).await?;
28-
or_sel_neg *= (1.0 - selectivity);
28+
or_sel_neg *= 1.0 - selectivity;
2929
}
3030
Ok(1.0 - or_sel_neg)
3131
}

optd-cost-model/src/cost/join.rs

Whitespace-only changes.

optd-cost-model/src/cost/join/core.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,11 @@ use crate::{
77
nodes::{ArcPredicateNode, JoinType, PredicateType, ReprPredicateNode},
88
predicates::{
99
attr_index_pred::AttrIndexPred,
10-
bin_op_pred::BinOpType,
1110
list_pred::ListPred,
1211
log_op_pred::{LogOpPred, LogOpType},
1312
},
1413
properties::attr_ref::{
15-
self, AttrRef, AttrRefs, BaseTableAttrRef, EqPredicate, GroupAttrRefs,
16-
SemanticCorrelation,
14+
AttrRef, AttrRefs, BaseTableAttrRef, EqPredicate, SemanticCorrelation,
1715
},
1816
types::GroupId,
1917
},
@@ -409,23 +407,24 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
409407
mod tests {
410408
use std::collections::HashMap;
411409

410+
use attr_ref::GroupAttrRefs;
411+
412412
use crate::{
413413
common::{
414-
predicates::{attr_index_pred, constant_pred::ConstantType},
415-
properties::Attribute,
416-
types::TableId,
414+
predicates::bin_op_pred::BinOpType,
415+
properties::{attr_ref, Attribute},
417416
values::Value,
418417
},
419-
cost_model::tests::{
418+
stats::DEFAULT_EQ_SEL,
419+
test_utils::tests::MemoGroupInfo,
420+
test_utils::tests::{
420421
attr_index, bin_op, cnst, create_four_table_mock_cost_model, create_mock_cost_model,
421422
create_three_table_mock_cost_model, create_two_table_mock_cost_model,
422423
create_two_table_mock_cost_model_custom_row_cnts, empty_per_attr_stats, log_op,
423424
per_attr_stats_with_dist_and_ndistinct, per_attr_stats_with_ndistinct,
424-
TestOptCostModelMock, TestPerAttributeStats, TEST_ATTR1_NAME, TEST_ATTR2_NAME,
425-
TEST_TABLE1_ID, TEST_TABLE2_ID, TEST_TABLE3_ID, TEST_TABLE4_ID,
425+
TestOptCostModelMock, TEST_ATTR1_NAME, TEST_ATTR2_NAME, TEST_TABLE1_ID, TEST_TABLE2_ID,
426+
TEST_TABLE3_ID, TEST_TABLE4_ID,
426427
},
427-
memo_ext::tests::MemoGroupInfo,
428-
stats::DEFAULT_EQ_SEL,
429428
};
430429

431430
use super::*;
@@ -905,7 +904,8 @@ mod tests {
905904
expected_inner_sel
906905
);
907906
// check the outer sels
908-
assert_outer_selectivities(&cost_model, expr_tree, expr_tree_rev, &attr_refs, 0.25, 0.2);
907+
assert_outer_selectivities(&cost_model, expr_tree, expr_tree_rev, &attr_refs, 0.25, 0.2)
908+
.await;
909909
}
910910

911911
/// Non-unique oncond means the column is not unique in either table

0 commit comments

Comments
 (0)