Skip to content

Commit d6e1825

Browse files
committed
Improve filter tests
1 parent 2c9240f commit d6e1825

File tree

5 files changed

+93
-98
lines changed

5 files changed

+93
-98
lines changed

optd-cost-model/src/cost/filter/controller.rs

Lines changed: 63 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,11 @@ mod tests {
136136

137137
#[tokio::test]
138138
async fn test_attr_ref_eq_constint_in_mcv() {
139-
let mut mcvs_counts = HashMap::new();
140-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
141-
let mcvs_total_count = 10;
142139
let per_attribute_stats = TestPerAttributeStats::new(
143-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
140+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
141+
vec![Some(Value::Int32(1))],
142+
0.3,
143+
)])),
144144
0,
145145
0.0,
146146
None,
@@ -170,12 +170,11 @@ mod tests {
170170

171171
#[tokio::test]
172172
async fn test_attr_ref_eq_constint_not_in_mcv() {
173-
let mut mcvs_counts = HashMap::new();
174-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 20);
175-
mcvs_counts.insert(vec![Some(Value::Int32(3))], 44);
176-
let mcvs_total_count = 100;
177173
let per_attribute_stats = TestPerAttributeStats::new(
178-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
174+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
175+
(vec![Some(Value::Int32(1))], 0.2),
176+
(vec![Some(Value::Int32(3))], 0.44),
177+
])),
179178
5,
180179
0.0,
181180
None,
@@ -206,11 +205,11 @@ mod tests {
206205
/// I only have one test for NEQ since I'll assume that it uses the same underlying logic as EQ
207206
#[tokio::test]
208207
async fn test_attr_ref_neq_constint_in_mcv() {
209-
let mut mcvs_counts = HashMap::new();
210-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
211-
let mcvs_total_count = 10;
212208
let per_attribute_stats = TestPerAttributeStats::new(
213-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
209+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
210+
vec![Some(Value::Int32(1))],
211+
0.3,
212+
)])),
214213
0,
215214
0.0,
216215
None,
@@ -240,10 +239,8 @@ mod tests {
240239

241240
#[tokio::test]
242241
async fn test_attr_ref_leq_constint_no_mcvs_in_range() {
243-
let mut mcvs_counts = HashMap::new();
244-
let mcvs_total_count = 10;
245242
let per_attribute_stats = TestPerAttributeStats::new(
246-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
243+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
247244
10,
248245
0.0,
249246
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
@@ -280,14 +277,13 @@ mod tests {
280277

281278
#[tokio::test]
282279
async fn test_attr_ref_leq_constint_with_mcvs_in_range_not_at_border() {
283-
let mut mcvs_counts = HashMap::new();
284-
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
285-
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
286-
mcvs_counts.insert(vec![Some(Value::Int32(17))], 8);
287-
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
288-
let mcvs_total_count = 100;
289280
let per_attribute_stats = TestPerAttributeStats::new(
290-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
281+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
282+
(vec![Some(Value::Int32(6))], 0.05),
283+
(vec![Some(Value::Int32(10))], 0.1),
284+
(vec![Some(Value::Int32(17))], 0.08),
285+
(vec![Some(Value::Int32(25))], 0.07),
286+
])),
291287
10,
292288
0.0,
293289
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
@@ -324,14 +320,13 @@ mod tests {
324320

325321
#[tokio::test]
326322
async fn test_attr_ref_leq_constint_with_mcv_at_border() {
327-
let mut mcvs_counts = HashMap::new();
328-
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
329-
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
330-
mcvs_counts.insert(vec![Some(Value::Int32(15))], 8);
331-
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
332-
let mcvs_total_count = 100;
333323
let per_attribute_stats = TestPerAttributeStats::new(
334-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
324+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
325+
(vec![Some(Value::Int32(6))], 0.05),
326+
(vec![Some(Value::Int32(10))], 0.1),
327+
(vec![Some(Value::Int32(15))], 0.08),
328+
(vec![Some(Value::Int32(25))], 0.07),
329+
])),
335330
10,
336331
0.0,
337332
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
@@ -368,10 +363,8 @@ mod tests {
368363

369364
#[tokio::test]
370365
async fn test_attr_ref_lt_constint_no_mcvs_in_range() {
371-
let mut mcvs_counts = HashMap::new();
372-
let mcvs_total_count = 10;
373366
let per_attribute_stats = TestPerAttributeStats::new(
374-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
367+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
375368
10,
376369
0.0,
377370
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
@@ -408,14 +401,13 @@ mod tests {
408401

409402
#[tokio::test]
410403
async fn test_attr_ef_lt_constint_with_mcvs_in_range_not_at_border() {
411-
let mut mcvs_counts = HashMap::new();
412-
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
413-
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
414-
mcvs_counts.insert(vec![Some(Value::Int32(17))], 8);
415-
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
416-
let mcvs_total_count = 100;
417404
let per_attribute_stats = TestPerAttributeStats::new(
418-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
405+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
406+
(vec![Some(Value::Int32(6))], 0.05),
407+
(vec![Some(Value::Int32(10))], 0.1),
408+
(vec![Some(Value::Int32(17))], 0.08),
409+
(vec![Some(Value::Int32(25))], 0.07),
410+
])),
419411
11, /* there are 4 MCVs which together add up to 0.3. With 11 total ndistinct, each
420412
* remaining value has freq 0.1 */
421413
0.0,
@@ -453,14 +445,13 @@ mod tests {
453445

454446
#[tokio::test]
455447
async fn test_attr_ref_lt_constint_with_mcv_at_border() {
456-
let mut mcvs_counts = HashMap::new();
457-
mcvs_counts.insert(vec![Some(Value::Int32(6))], 5);
458-
mcvs_counts.insert(vec![Some(Value::Int32(10))], 10);
459-
mcvs_counts.insert(vec![Some(Value::Int32(15))], 8);
460-
mcvs_counts.insert(vec![Some(Value::Int32(25))], 7);
461-
let mcvs_total_count = 100;
462448
let per_attribute_stats = TestPerAttributeStats::new(
463-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
449+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
450+
(vec![Some(Value::Int32(6))], 0.05),
451+
(vec![Some(Value::Int32(10))], 0.1),
452+
(vec![Some(Value::Int32(15))], 0.08),
453+
(vec![Some(Value::Int32(25))], 0.07),
454+
])),
464455
11, /* there are 4 MCVs which together add up to 0.3. With 11 total ndistinct, each
465456
* remaining value has freq 0.1 */
466457
0.0,
@@ -500,10 +491,8 @@ mod tests {
500491
/// The only interesting thing to test is that if there are nulls, those aren't included in GT
501492
#[tokio::test]
502493
async fn test_attr_ref_gt_constint() {
503-
let mut mcvs_counts = HashMap::new();
504-
let mcvs_total_count = 100;
505494
let per_attribute_stats = TestPerAttributeStats::new(
506-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
495+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
507496
10,
508497
0.0,
509498
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
@@ -540,10 +529,8 @@ mod tests {
540529

541530
#[tokio::test]
542531
async fn test_attr_ref_geq_constint() {
543-
let mut mcvs_counts = HashMap::new();
544-
let mcvs_total_count = 100;
545532
let per_attribute_stats = TestPerAttributeStats::new(
546-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
533+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
547534
10,
548535
0.0,
549536
Some(Distribution::SimpleDistribution(SimpleMap::new(vec![(
@@ -581,13 +568,12 @@ mod tests {
581568

582569
#[tokio::test]
583570
async fn test_and() {
584-
let mut mcvs_counts = HashMap::new();
585-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
586-
mcvs_counts.insert(vec![Some(Value::Int32(5))], 5);
587-
mcvs_counts.insert(vec![Some(Value::Int32(8))], 2);
588-
let mcvs_total_count = 10;
589571
let per_attribute_stats = TestPerAttributeStats::new(
590-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
572+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
573+
(vec![Some(Value::Int32(1))], 0.3),
574+
(vec![Some(Value::Int32(5))], 0.5),
575+
(vec![Some(Value::Int32(8))], 0.2),
576+
])),
591577
0,
592578
0.0,
593579
None,
@@ -629,13 +615,12 @@ mod tests {
629615

630616
#[tokio::test]
631617
async fn test_or() {
632-
let mut mcvs_counts = HashMap::new();
633-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
634-
mcvs_counts.insert(vec![Some(Value::Int32(5))], 5);
635-
mcvs_counts.insert(vec![Some(Value::Int32(8))], 2);
636-
let mcvs_total_count = 10;
637618
let per_attribute_stats = TestPerAttributeStats::new(
638-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
619+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
620+
(vec![Some(Value::Int32(1))], 0.3),
621+
(vec![Some(Value::Int32(5))], 0.5),
622+
(vec![Some(Value::Int32(8))], 0.2),
623+
])),
639624
0,
640625
0.0,
641626
None,
@@ -677,11 +662,11 @@ mod tests {
677662

678663
#[tokio::test]
679664
async fn test_not() {
680-
let mut mcvs_counts = HashMap::new();
681-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
682-
let mcvs_total_count = 10;
683665
let per_attribute_stats = TestPerAttributeStats::new(
684-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
666+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
667+
vec![Some(Value::Int32(1))],
668+
0.3,
669+
)])),
685670
0,
686671
0.0,
687672
None,
@@ -710,11 +695,11 @@ mod tests {
710695

711696
#[tokio::test]
712697
async fn test_attr_ref_eq_cast_value() {
713-
let mut mcvs_counts = HashMap::new();
714-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
715-
let mcvs_total_count = 10;
716698
let per_attribute_stats = TestPerAttributeStats::new(
717-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
699+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
700+
vec![Some(Value::Int32(1))],
701+
0.3,
702+
)])),
718703
0,
719704
0.0,
720705
None,
@@ -753,11 +738,11 @@ mod tests {
753738

754739
#[tokio::test]
755740
async fn test_cast_attr_ref_eq_value() {
756-
let mut mcvs_counts = HashMap::new();
757-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 3);
758-
let mcvs_total_count = 10;
759741
let per_attribute_stats = TestPerAttributeStats::new(
760-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
742+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![(
743+
vec![Some(Value::Int32(1))],
744+
0.3,
745+
)])),
761746
0,
762747
0.1,
763748
None,
@@ -812,10 +797,8 @@ mod tests {
812797
/// pretty good signal that the Cast was left as is.
813798
#[tokio::test]
814799
async fn test_cast_attr_ref_eq_attr_ref() {
815-
let mut mcvs_counts = HashMap::new();
816-
let mcvs_total_count = 10;
817800
let per_attribute_stats = TestPerAttributeStats::new(
818-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
801+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![])),
819802
0,
820803
0.0,
821804
None,

optd-cost-model/src/cost/filter/in_list.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,17 +73,19 @@ mod tests {
7373
use crate::{
7474
common::{types::TableId, values::Value},
7575
cost_model::tests::*,
76-
stats::{utilities::counter::Counter, MostCommonValues},
76+
stats::{
77+
utilities::{counter::Counter, simple_map::SimpleMap},
78+
MostCommonValues,
79+
},
7780
};
7881

7982
#[tokio::test]
8083
async fn test_in_list() {
81-
let mut mcvs_counts = HashMap::new();
82-
mcvs_counts.insert(vec![Some(Value::Int32(1))], 8);
83-
mcvs_counts.insert(vec![Some(Value::Int32(2))], 2);
84-
let mcvs_total_count = 10;
8584
let per_attribute_stats = TestPerAttributeStats::new(
86-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
85+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
86+
(vec![Some(Value::Int32(1))], 0.8),
87+
(vec![Some(Value::Int32(2))], 0.2),
88+
])),
8789
2,
8890
0.0,
8991
None,

optd-cost-model/src/cost/filter/like.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,19 +108,18 @@ mod tests {
108108
common::{types::TableId, values::Value},
109109
cost_model::tests::*,
110110
stats::{
111-
utilities::counter::Counter, MostCommonValues, FIXED_CHAR_SEL_FACTOR,
112-
FULL_WILDCARD_SEL_FACTOR,
111+
utilities::{counter::Counter, simple_map::SimpleMap},
112+
MostCommonValues, FIXED_CHAR_SEL_FACTOR, FULL_WILDCARD_SEL_FACTOR,
113113
},
114114
};
115115

116116
#[tokio::test]
117117
async fn test_like_no_nulls() {
118-
let mut mcvs_counts = HashMap::new();
119-
mcvs_counts.insert(vec![Some(Value::String("abcd".into()))], 1);
120-
mcvs_counts.insert(vec![Some(Value::String("abc".into()))], 1);
121-
let mcvs_total_count = 10;
122118
let per_attribute_stats = TestPerAttributeStats::new(
123-
MostCommonValues::Counter(Counter::new_from_existing(mcvs_counts, mcvs_total_count)),
119+
MostCommonValues::SimpleFrequency(SimpleMap::new(vec![
120+
(vec![Some(Value::String("abcd".into()))], 0.1),
121+
(vec![Some(Value::String("abc".into()))], 0.1),
122+
])),
124123
2,
125124
0.0,
126125
None,

optd-cost-model/src/stats/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ pub type AttributeCombValue = Vec<Option<Value>>;
3535
#[serde(tag = "type")]
3636
pub enum MostCommonValues {
3737
Counter(Counter<AttributeCombValue>),
38+
SimpleFrequency(SimpleMap<AttributeCombValue>),
3839
// Add more types here...
3940
}
4041

@@ -47,12 +48,14 @@ impl MostCommonValues {
4748
pub fn freq(&self, value: &AttributeCombValue) -> Option<f64> {
4849
match self {
4950
MostCommonValues::Counter(counter) => counter.frequencies().get(value).copied(),
51+
MostCommonValues::SimpleFrequency(simple_map) => simple_map.m.get(value).copied(),
5052
}
5153
}
5254

5355
pub fn total_freq(&self) -> f64 {
5456
match self {
5557
MostCommonValues::Counter(counter) => counter.frequencies().values().sum(),
58+
MostCommonValues::SimpleFrequency(simple_map) => simple_map.m.values().sum(),
5659
}
5760
}
5861

@@ -64,13 +67,20 @@ impl MostCommonValues {
6467
.filter(|(val, _)| pred(val))
6568
.map(|(_, freq)| freq)
6669
.sum(),
70+
MostCommonValues::SimpleFrequency(simple_map) => simple_map
71+
.m
72+
.iter()
73+
.filter(|(val, _)| pred(val))
74+
.map(|(_, freq)| freq)
75+
.sum(),
6776
}
6877
}
6978

7079
// returns the # of entries (i.e. value + freq) in the most common values structure
7180
pub fn cnt(&self) -> usize {
7281
match self {
7382
MostCommonValues::Counter(counter) => counter.frequencies().len(),
83+
MostCommonValues::SimpleFrequency(simple_map) => simple_map.m.len(),
7484
}
7585
}
7686
}
@@ -80,7 +90,7 @@ impl MostCommonValues {
8090
#[serde(tag = "type")]
8191
pub enum Distribution {
8292
TDigest(TDigest<Value>),
83-
SimpleDistribution(SimpleMap),
93+
SimpleDistribution(SimpleMap<Value>),
8494
// Add more types here...
8595
}
8696

0 commit comments

Comments
 (0)