Skip to content

Commit ebab829

Browse files
committed
Finish most tests for filter
1 parent 2c1f09b commit ebab829

File tree

9 files changed

+829
-566
lines changed

9 files changed

+829
-566
lines changed

optd-cost-model/src/cost/filter/controller.rs

Lines changed: 752 additions & 545 deletions
Large diffs are not rendered by default.

optd-cost-model/src/cost_model.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,9 @@ pub mod tests {
136136
values::Value,
137137
},
138138
stats::{
139-
counter::Counter, tdigest::TDigest, AttributeCombValueStats, Distribution,
140-
MostCommonValues,
139+
utilities::counter::Counter, AttributeCombValueStats, Distribution, MostCommonValues,
141140
},
142-
storage::mock::{CostModelStorageMockManagerImpl, TableStats},
141+
storage::mock::{BaseTableAttrInfo, CostModelStorageMockManagerImpl, TableStats},
143142
};
144143

145144
use super::*;
@@ -152,6 +151,7 @@ pub mod tests {
152151
table_id: Vec<TableId>,
153152
per_attribute_stats: Vec<TestPerAttributeStats>,
154153
row_counts: Vec<Option<usize>>,
154+
per_table_attr_infos: BaseTableAttrInfo,
155155
) -> TestOptCostModelMock {
156156
let storage_manager = CostModelStorageMockManagerImpl::new(
157157
table_id
@@ -168,6 +168,7 @@ pub mod tests {
168168
)
169169
})
170170
.collect(),
171+
per_table_attr_infos,
171172
);
172173
CostModelImpl::new(storage_manager, CatalogSource::Mock)
173174
}
@@ -222,9 +223,6 @@ pub mod tests {
222223
)
223224
}
224225

225-
/// The reason this isn't an associated function of PerAttributeStats is because that would require
226-
/// adding an empty type to the enum definitions of MostCommonValues and Distribution,
227-
/// which I wanted to avoid
228226
pub(crate) fn get_empty_per_attr_stats() -> TestPerAttributeStats {
229227
TestPerAttributeStats::new(MostCommonValues::Counter(Counter::default()), 0, 0.0, None)
230228
}

optd-cost-model/src/stats/mod.rs

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
#![allow(unused)]
22

33
mod arith_encoder;
4-
pub mod counter;
5-
pub mod tdigest;
4+
pub mod utilities;
65

76
use crate::common::values::Value;
8-
use counter::Counter;
97
use serde::{Deserialize, Serialize};
8+
use utilities::counter::Counter;
9+
use utilities::{
10+
simple_map::{self, SimpleMap},
11+
tdigest::TDigest,
12+
};
1013

1114
// Default n-distinct estimate for derived columns or columns lacking statistics
1215
pub const DEFAULT_NUM_DISTINCT: u64 = 200;
@@ -27,7 +30,8 @@ pub const FIXED_CHAR_SEL_FACTOR: f64 = 0.2;
2730

2831
pub type AttributeCombValue = Vec<Option<Value>>;
2932

30-
#[derive(Serialize, Deserialize, Debug)]
33+
// TODO: remove the clone, see the comment in the [`AttributeCombValueStats`]
34+
#[derive(Serialize, Deserialize, Debug, Clone)]
3135
#[serde(tag = "type")]
3236
pub enum MostCommonValues {
3337
Counter(Counter<AttributeCombValue>),
@@ -71,10 +75,12 @@ impl MostCommonValues {
7175
}
7276
}
7377

74-
#[derive(Serialize, Deserialize, Debug)]
78+
// TODO: remove the clone, see the comment in the [`AttributeCombValueStats`]
79+
#[derive(Serialize, Deserialize, Debug, Clone)]
7580
#[serde(tag = "type")]
7681
pub enum Distribution {
77-
TDigest(tdigest::TDigest<Value>),
82+
TDigest(TDigest<Value>),
83+
SimpleDistribution(SimpleMap),
7884
// Add more types here...
7985
}
8086

@@ -89,11 +95,21 @@ impl Distribution {
8995
tdigest.centroids.len() as f64 * tdigest.cdf(value) / nb_rows as f64
9096
}
9197
}
98+
Distribution::SimpleDistribution(simple_distribution) => {
99+
*simple_distribution.m.get(value).unwrap_or(&0.0)
100+
}
92101
}
93102
}
94103
}
95104

96-
#[derive(Serialize, Deserialize, Debug)]
105+
// TODO: Remove the clone. Now I have to add this because
106+
// persistent.rs doesn't have a memory cache, so we have to
107+
// return AttributeCombValueStats rather than &AttributeCombValueStats.
108+
// But this poses a problem for mock.rs when testing, since mock storage
109+
// only has memory hash map, so we need to return a clone of AttributeCombValueStats.
110+
// Later, if memory cache is added, we should change this to return a reference.
111+
// **and** remove the clone.
112+
#[derive(Serialize, Deserialize, Debug, Clone)]
97113
pub struct AttributeCombValueStats {
98114
pub mcvs: MostCommonValues, // Does NOT contain full nulls.
99115
pub distr: Option<Distribution>, // Does NOT contain mcvs; optional.

optd-cost-model/src/stats/counter.rs renamed to optd-cost-model/src/stats/utilities/counter.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ use serde::de::DeserializeOwned;
55
use serde::{Deserialize, Serialize};
66

77
/// The Counter structure to track exact frequencies of fixed elements.
8+
/// TODO: remove the clone, see the comment in the [`AttributeCombValueStats`]
89
#[serde_with::serde_as]
9-
#[derive(Default, Serialize, Deserialize, Debug)]
10+
#[derive(Default, Serialize, Deserialize, Debug, Clone)]
1011
pub struct Counter<T: PartialEq + Eq + Hash + Clone + Serialize + DeserializeOwned> {
1112
#[serde_as(as = "HashMap<serde_with::json::JsonString, _>")]
1213
counts: HashMap<T, i32>, // The exact counts of an element T.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
pub mod counter;
2+
pub mod simple_map;
3+
pub mod tdigest;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
use std::collections::HashMap;
2+
3+
use serde::{Deserialize, Serialize};
4+
5+
use crate::common::values::Value;
6+
7+
/// TODO: documentation
8+
/// Now it is mainly for testing purposes.
9+
#[derive(Clone, Serialize, Deserialize, Debug)]
10+
pub struct SimpleMap {
11+
pub(crate) m: HashMap<Value, f64>,
12+
}
13+
14+
impl SimpleMap {
15+
pub fn new(v: Vec<(Value, f64)>) -> Self {
16+
Self {
17+
m: v.into_iter().collect(),
18+
}
19+
}
20+
}

optd-cost-model/src/stats/tdigest.rs renamed to optd-cost-model/src/stats/utilities/tdigest.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ use std::marker::PhantomData;
1515
use itertools::Itertools;
1616
use serde::{Deserialize, Serialize};
1717

18-
use crate::common::values::Value;
19-
20-
use super::arith_encoder;
18+
use crate::{common::values::Value, stats::arith_encoder};
2119

2220
pub const DEFAULT_COMPRESSION: f64 = 200.0;
2321

optd-cost-model/src/storage/mock.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,21 @@ impl TableStats {
3030
}
3131

3232
pub type BaseTableStats = HashMap<TableId, TableStats>;
33+
pub type BaseTableAttrInfo = HashMap<TableId, HashMap<i32, Attribute>>;
3334

3435
pub struct CostModelStorageMockManagerImpl {
3536
pub(crate) per_table_stats_map: BaseTableStats,
37+
pub(crate) per_table_attr_infos_map: BaseTableAttrInfo,
3638
}
3739

3840
impl CostModelStorageMockManagerImpl {
39-
pub fn new(per_table_stats_map: BaseTableStats) -> Self {
41+
pub fn new(
42+
per_table_stats_map: BaseTableStats,
43+
per_table_attr_infos_map: BaseTableAttrInfo,
44+
) -> Self {
4045
Self {
4146
per_table_stats_map,
47+
per_table_attr_infos_map,
4248
}
4349
}
4450
}
@@ -49,14 +55,28 @@ impl CostModelStorageManager for CostModelStorageMockManagerImpl {
4955
table_id: TableId,
5056
attr_base_index: i32,
5157
) -> CostModelResult<Option<Attribute>> {
52-
todo!()
58+
let table_attr_infos = self.per_table_attr_infos_map.get(&table_id);
59+
match table_attr_infos {
60+
None => Ok(None),
61+
Some(table_attr_infos) => match table_attr_infos.get(&attr_base_index) {
62+
None => Ok(None),
63+
Some(attr) => Ok(Some(attr.clone())),
64+
},
65+
}
5366
}
5467

5568
async fn get_attributes_comb_statistics(
5669
&self,
5770
table_id: TableId,
5871
attr_base_indices: &[usize],
5972
) -> CostModelResult<Option<AttributeCombValueStats>> {
60-
todo!()
73+
let table_stats = self.per_table_stats_map.get(&table_id);
74+
match table_stats {
75+
None => Ok(None),
76+
Some(table_stats) => match table_stats.column_comb_stats.get(attr_base_indices) {
77+
None => Ok(None),
78+
Some(stats) => Ok(Some(stats.clone())),
79+
},
80+
}
6181
}
6282
}

optd-cost-model/src/storage/persistent.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use optd_persistent::{cost_model::interface::StatType, CostModelStorageLayer};
55

66
use crate::{
77
common::{predicates::constant_pred::ConstantType, types::TableId},
8-
stats::{counter::Counter, AttributeCombValueStats, Distribution, MostCommonValues},
8+
stats::{utilities::counter::Counter, AttributeCombValueStats, Distribution, MostCommonValues},
99
CostModelResult,
1010
};
1111

0 commit comments

Comments
 (0)