@@ -2,7 +2,8 @@ use crate::{
22 common:: {
33 nodes:: { ArcPredicateNode , PredicateType , ReprPredicateNode } ,
44 predicates:: { attr_index_pred:: AttrIndexPred , list_pred:: ListPred } ,
5- types:: TableId ,
5+ properties:: attr_ref:: { AttrRef , BaseTableAttrRef } ,
6+ types:: GroupId ,
67 } ,
78 cost_model:: CostModelImpl ,
89 stats:: DEFAULT_NUM_DISTINCT ,
@@ -13,6 +14,7 @@ use crate::{
1314impl < S : CostModelStorageManager > CostModelImpl < S > {
1415 pub async fn get_agg_row_cnt (
1516 & self ,
17+ group_id : GroupId ,
1618 group_by : ArcPredicateNode ,
1719 ) -> CostModelResult < EstimatedStatistic > {
1820 let group_by = ListPred :: from_pred_node ( group_by) . unwrap ( ) ;
@@ -32,12 +34,9 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
3234 "Expected AttributeRef predicate" . to_string ( ) ,
3335 )
3436 } ) ?;
35- let is_derived = todo ! ( ) ;
36- if is_derived {
37- row_cnt *= DEFAULT_NUM_DISTINCT ;
38- } else {
39- let table_id = todo ! ( ) ;
40- let attr_idx = attr_ref. attr_index ( ) ;
37+ if let AttrRef :: BaseTableAttrRef ( BaseTableAttrRef { table_id, attr_idx } ) =
38+ self . memo . get_attribute_ref ( group_id, attr_ref. attr_index ( ) )
39+ {
4140 // TODO: Only query ndistinct instead of all kinds of stats.
4241 let stats_option =
4342 self . get_attribute_comb_stats ( table_id, & [ attr_idx] ) . await ?;
@@ -50,6 +49,9 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
5049 }
5150 } ;
5251 row_cnt *= ndistinct;
52+ } else {
53+ // TOOD: Handle derived attributes.
54+ row_cnt *= DEFAULT_NUM_DISTINCT ;
5355 }
5456 }
5557 _ => {
@@ -65,7 +67,7 @@ impl<S: CostModelStorageManager> CostModelImpl<S> {
6567
6668#[ cfg( test) ]
6769mod tests {
68- use std:: collections:: HashMap ;
70+ use std:: { collections:: HashMap , ops :: Deref } ;
6971
7072 use crate :: {
7173 common:: {
@@ -75,48 +77,59 @@ mod tests {
7577 values:: Value ,
7678 } ,
7779 cost_model:: tests:: {
78- attr_index, cnst, create_mock_cost_model, empty_list, empty_per_attr_stats, list,
79- TestPerAttributeStats ,
80+ attr_index, cnst, create_mock_cost_model, create_mock_cost_model_with_attr_types,
81+ empty_list, empty_per_attr_stats, list, TestPerAttributeStats , TEST_ATTR1_BASE_INDEX ,
82+ TEST_ATTR2_BASE_INDEX , TEST_ATTR3_BASE_INDEX , TEST_GROUP1_ID , TEST_TABLE1_ID ,
8083 } ,
8184 stats:: { utilities:: simple_map:: SimpleMap , MostCommonValues , DEFAULT_NUM_DISTINCT } ,
8285 EstimatedStatistic ,
8386 } ;
8487
8588 #[ tokio:: test]
8689 async fn test_agg_no_stats ( ) {
87- let table_id = TableId ( 0 ) ;
88- let cost_model = create_mock_cost_model ( vec ! [ table_id] , vec ! [ ] , vec ! [ None ] ) ;
90+ let cost_model = create_mock_cost_model_with_attr_types (
91+ vec ! [ TEST_TABLE1_ID ] ,
92+ vec ! [ ] ,
93+ vec ! [ HashMap :: from( [
94+ ( TEST_ATTR1_BASE_INDEX , ConstantType :: Int32 ) ,
95+ ( TEST_ATTR2_BASE_INDEX , ConstantType :: Int32 ) ,
96+ ] ) ] ,
97+ vec ! [ None ] ,
98+ ) ;
8999
90100 // Group by empty list should return 1.
91101 let group_bys = empty_list ( ) ;
92102 assert_eq ! (
93- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
103+ cost_model
104+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
105+ . await
106+ . unwrap( ) ,
94107 EstimatedStatistic ( 1.0 )
95108 ) ;
96109
97110 // Group by single column should return the default value since there are no stats.
98111 let group_bys = list ( vec ! [ attr_index( 0 ) ] ) ;
99112 assert_eq ! (
100- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
113+ cost_model
114+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
115+ . await
116+ . unwrap( ) ,
101117 EstimatedStatistic ( DEFAULT_NUM_DISTINCT as f64 )
102118 ) ;
103119
104120 // Group by two columns should return the default value squared since there are no stats.
105121 let group_bys = list ( vec ! [ attr_index( 0 ) , attr_index( 1 ) ] ) ;
106122 assert_eq ! (
107- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
123+ cost_model
124+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
125+ . await
126+ . unwrap( ) ,
108127 EstimatedStatistic ( ( DEFAULT_NUM_DISTINCT * DEFAULT_NUM_DISTINCT ) as f64 )
109128 ) ;
110129 }
111130
112131 #[ tokio:: test]
113132 async fn test_agg_with_stats ( ) {
114- let table_id = TableId ( 0 ) ;
115- let group_id = GroupId ( 0 ) ;
116- let attr1_base_idx = 0 ;
117- let attr2_base_idx = 1 ;
118- let attr3_base_idx = 2 ;
119-
120133 let attr1_ndistinct = 12 ;
121134 let attr2_ndistinct = 645 ;
122135 let attr1_stats = TestPerAttributeStats :: new (
@@ -132,47 +145,58 @@ mod tests {
132145 0.0 ,
133146 ) ;
134147
135- let cost_model = create_mock_cost_model (
136- vec ! [ table_id] ,
148+ let cost_model = create_mock_cost_model_with_attr_types (
149+ vec ! [ TEST_TABLE1_ID ] ,
150+ vec ! [ HashMap :: from( [
151+ ( TEST_ATTR1_BASE_INDEX , attr1_stats) ,
152+ ( TEST_ATTR2_BASE_INDEX , attr2_stats) ,
153+ ] ) ] ,
137154 vec ! [ HashMap :: from( [
138- ( attr1_base_idx, attr1_stats) ,
139- ( attr2_base_idx, attr2_stats) ,
155+ ( TEST_ATTR1_BASE_INDEX , ConstantType :: Int32 ) ,
156+ ( TEST_ATTR2_BASE_INDEX , ConstantType :: Int32 ) ,
157+ ( TEST_ATTR3_BASE_INDEX , ConstantType :: Int32 ) ,
140158 ] ) ] ,
141159 vec ! [ None ] ,
142- // attr_infos,
143160 ) ;
144161
145162 // Group by empty list should return 1.
146163 let group_bys = empty_list ( ) ;
147164 assert_eq ! (
148- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
165+ cost_model
166+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
167+ . await
168+ . unwrap( ) ,
149169 EstimatedStatistic ( 1.0 )
150170 ) ;
151171
152172 // Group by single column should return the n-distinct of the column.
153- let group_bys = list ( vec ! [ attr_index( attr1_base_idx ) ] ) ; // TODO: Fix this
173+ let group_bys = list ( vec ! [ attr_index( 0 ) ] ) ;
154174 assert_eq ! (
155- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
175+ cost_model
176+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
177+ . await
178+ . unwrap( ) ,
156179 EstimatedStatistic ( attr1_ndistinct as f64 )
157180 ) ;
158181
159182 // Group by two columns should return the product of the n-distinct of the columns.
160- let group_bys = list ( vec ! [ attr_index( attr1_base_idx ) , attr_index( attr2_base_idx ) ] ) ; // TODO: Fix this
183+ let group_bys = list ( vec ! [ attr_index( 0 ) , attr_index( 1 ) ] ) ;
161184 assert_eq ! (
162- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
185+ cost_model
186+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
187+ . await
188+ . unwrap( ) ,
163189 EstimatedStatistic ( ( attr1_ndistinct * attr2_ndistinct) as f64 )
164190 ) ;
165191
166192 // Group by multiple columns should return the product of the n-distinct of the columns. If one of the columns
167193 // does not have stats, it should use the default value instead.
168- let group_bys = list ( vec ! [
169- // TODO: Fix this
170- attr_index( attr1_base_idx) ,
171- attr_index( attr2_base_idx) ,
172- attr_index( attr3_base_idx) ,
173- ] ) ;
194+ let group_bys = list ( vec ! [ attr_index( 0 ) , attr_index( 1 ) , attr_index( 2 ) ] ) ;
174195 assert_eq ! (
175- cost_model. get_agg_row_cnt( group_bys) . await . unwrap( ) ,
196+ cost_model
197+ . get_agg_row_cnt( TEST_GROUP1_ID , group_bys)
198+ . await
199+ . unwrap( ) ,
176200 EstimatedStatistic ( ( attr1_ndistinct * attr2_ndistinct * DEFAULT_NUM_DISTINCT ) as f64 )
177201 ) ;
178202 }
0 commit comments