Skip to content

Commit 303d73c

Browse files
committed
merge main and resolve conflicts
2 parents 0059141 + 9ba03e6 commit 303d73c

35 files changed

+1490
-62
lines changed

optd-cost-model/src/common/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod nodes;
22
pub mod predicates;
3+
pub mod properties;
34
pub mod types;
45
pub mod values;
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
use std::collections::HashSet;
2+
3+
use crate::{common::types::TableId, utils::DisjointSets};
4+
5+
pub type AttrRefs = Vec<AttrRef>;
6+
7+
/// [`BaseTableAttrRef`] represents a reference to an attribute in a base table,
8+
/// i.e. a table existing in the catalog.
9+
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
10+
pub struct BaseTableAttrRef {
11+
pub table_id: TableId,
12+
pub attr_idx: u64,
13+
}
14+
15+
/// [`AttrRef`] represents a reference to an attribute in a query.
16+
#[derive(Clone, Debug)]
17+
pub enum AttrRef {
18+
/// Reference to a base table attribute.
19+
BaseTableAttrRef(BaseTableAttrRef),
20+
/// Reference to a derived attribute (e.g. t.v1 + t.v2).
21+
/// TODO: Better representation of derived attributes.
22+
Derived,
23+
}
24+
25+
impl AttrRef {
26+
pub fn base_table_attr_ref(table_id: TableId, attr_idx: u64) -> Self {
27+
AttrRef::BaseTableAttrRef(BaseTableAttrRef { table_id, attr_idx })
28+
}
29+
}
30+
31+
impl From<BaseTableAttrRef> for AttrRef {
32+
fn from(attr: BaseTableAttrRef) -> Self {
33+
AttrRef::BaseTableAttrRef(attr)
34+
}
35+
}
36+
37+
/// [`EqPredicate`] represents an equality predicate between two attributes.
38+
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
39+
pub struct EqPredicate {
40+
pub left: BaseTableAttrRef,
41+
pub right: BaseTableAttrRef,
42+
}
43+
44+
impl EqPredicate {
45+
pub fn new(left: BaseTableAttrRef, right: BaseTableAttrRef) -> Self {
46+
Self { left, right }
47+
}
48+
}
49+
50+
/// [`SemanticCorrelation`] represents the semantic correlation between attributes in a
51+
/// query. "Semantic" means that the attributes are correlated based on the
52+
/// semantics of the query, not the statistics.
53+
///
54+
/// [`SemanticCorrelation`] contains equal attributes denoted by disjoint sets of base
55+
/// table attributes, e.g. {{ t1.c1 = t2.c1 = t3.c1 }, { t1.c2 = t2.c2 }}.
56+
#[derive(Clone, Debug, Default)]
57+
pub struct SemanticCorrelation {
58+
/// A disjoint set of base table attributes with equal values in the same row.
59+
disjoint_eq_attr_sets: DisjointSets<BaseTableAttrRef>,
60+
/// The predicates that define the equalities.
61+
eq_predicates: HashSet<EqPredicate>,
62+
}
63+
64+
impl SemanticCorrelation {
65+
pub fn new() -> Self {
66+
Self {
67+
disjoint_eq_attr_sets: DisjointSets::new(),
68+
eq_predicates: HashSet::new(),
69+
}
70+
}
71+
72+
pub fn add_predicate(&mut self, predicate: EqPredicate) {
73+
let left = &predicate.left;
74+
let right = &predicate.right;
75+
76+
// Add the indices to the set if they do not exist.
77+
if !self.disjoint_eq_attr_sets.contains(left) {
78+
self.disjoint_eq_attr_sets
79+
.make_set(left.clone())
80+
.expect("just checked left attribute index does not exist");
81+
}
82+
if !self.disjoint_eq_attr_sets.contains(right) {
83+
self.disjoint_eq_attr_sets
84+
.make_set(right.clone())
85+
.expect("just checked right attribute index does not exist");
86+
}
87+
// Union the attributes.
88+
self.disjoint_eq_attr_sets
89+
.union(left, right)
90+
.expect("both attribute indices should exist");
91+
92+
// Keep track of the predicate.
93+
self.eq_predicates.insert(predicate);
94+
}
95+
96+
/// Determine if two attributes are in the same set.
97+
pub fn is_eq(&mut self, left: &BaseTableAttrRef, right: &BaseTableAttrRef) -> bool {
98+
self.disjoint_eq_attr_sets
99+
.same_set(left, right)
100+
.unwrap_or(false)
101+
}
102+
103+
pub fn contains(&self, base_attr_ref: &BaseTableAttrRef) -> bool {
104+
self.disjoint_eq_attr_sets.contains(base_attr_ref)
105+
}
106+
107+
/// Get the number of attributes that are equal to `attr`, including `attr` itself.
108+
pub fn num_eq_attributes(&mut self, attr: &BaseTableAttrRef) -> usize {
109+
self.disjoint_eq_attr_sets.set_size(attr).unwrap()
110+
}
111+
112+
/// Find the set of predicates that define the equality of the set of attributes `attr` belongs to.
113+
pub fn find_predicates_for_eq_attr_set(&mut self, attr: &BaseTableAttrRef) -> Vec<EqPredicate> {
114+
let mut predicates = Vec::new();
115+
for predicate in &self.eq_predicates {
116+
let left = &predicate.left;
117+
let right = &predicate.right;
118+
if (left != attr && self.disjoint_eq_attr_sets.same_set(attr, left).unwrap())
119+
|| (right != attr && self.disjoint_eq_attr_sets.same_set(attr, right).unwrap())
120+
{
121+
predicates.push(predicate.clone());
122+
}
123+
}
124+
predicates
125+
}
126+
127+
/// Find the set of attributes that define the equality of the set of attributes `attr` belongs to.
128+
pub fn find_attrs_for_eq_attribute_set(
129+
&mut self,
130+
attr: &BaseTableAttrRef,
131+
) -> HashSet<BaseTableAttrRef> {
132+
let predicates = self.find_predicates_for_eq_attr_set(attr);
133+
predicates
134+
.into_iter()
135+
.flat_map(|predicate| vec![predicate.left, predicate.right])
136+
.collect()
137+
}
138+
139+
/// Union two `EqBaseTableattributesets` to produce a new disjoint sets.
140+
pub fn union(x: Self, y: Self) -> Self {
141+
let mut eq_attr_sets = Self::new();
142+
for predicate in x
143+
.eq_predicates
144+
.into_iter()
145+
.chain(y.eq_predicates.into_iter())
146+
{
147+
eq_attr_sets.add_predicate(predicate);
148+
}
149+
eq_attr_sets
150+
}
151+
152+
pub fn merge(x: Option<Self>, y: Option<Self>) -> Option<Self> {
153+
let eq_attr_sets = match (x, y) {
154+
(Some(x), Some(y)) => Self::union(x, y),
155+
(Some(x), None) => x.clone(),
156+
(None, Some(y)) => y.clone(),
157+
_ => return None,
158+
};
159+
Some(eq_attr_sets)
160+
}
161+
}
162+
163+
/// [`GroupAttrRefs`] represents the attributes of a group in a query.
164+
#[derive(Clone, Debug)]
165+
pub struct GroupAttrRefs {
166+
attribute_refs: AttrRefs,
167+
/// Correlation of the output attributes of the group.
168+
output_correlation: Option<SemanticCorrelation>,
169+
}
170+
171+
impl GroupAttrRefs {
172+
pub fn new(attribute_refs: AttrRefs, output_correlation: Option<SemanticCorrelation>) -> Self {
173+
Self {
174+
attribute_refs,
175+
output_correlation,
176+
}
177+
}
178+
179+
pub fn base_table_attribute_refs(&self) -> &AttrRefs {
180+
&self.attribute_refs
181+
}
182+
183+
pub fn output_correlation(&self) -> Option<&SemanticCorrelation> {
184+
self.output_correlation.as_ref()
185+
}
186+
}
187+
188+
#[cfg(test)]
189+
mod tests {
190+
use super::*;
191+
192+
#[test]
193+
fn test_eq_base_table_attribute_sets() {
194+
let attr1 = BaseTableAttrRef {
195+
table_id: TableId(1),
196+
attr_idx: 1,
197+
};
198+
let attr2 = BaseTableAttrRef {
199+
table_id: TableId(2),
200+
attr_idx: 2,
201+
};
202+
let attr3 = BaseTableAttrRef {
203+
table_id: TableId(3),
204+
attr_idx: 3,
205+
};
206+
let attr4 = BaseTableAttrRef {
207+
table_id: TableId(4),
208+
attr_idx: 4,
209+
};
210+
let pred1 = EqPredicate::new(attr1.clone(), attr2.clone());
211+
let pred2 = EqPredicate::new(attr3.clone(), attr4.clone());
212+
let pred3 = EqPredicate::new(attr1.clone(), attr3.clone());
213+
214+
let mut eq_attr_sets = SemanticCorrelation::new();
215+
216+
// (1, 2)
217+
eq_attr_sets.add_predicate(pred1.clone());
218+
assert!(eq_attr_sets.is_eq(&attr1, &attr2));
219+
220+
// (1, 2), (3, 4)
221+
eq_attr_sets.add_predicate(pred2.clone());
222+
assert!(eq_attr_sets.is_eq(&attr3, &attr4));
223+
assert!(!eq_attr_sets.is_eq(&attr2, &attr3));
224+
225+
let predicates = eq_attr_sets.find_predicates_for_eq_attr_set(&attr1);
226+
assert_eq!(predicates.len(), 1);
227+
assert!(predicates.contains(&pred1));
228+
229+
let predicates = eq_attr_sets.find_predicates_for_eq_attr_set(&attr3);
230+
assert_eq!(predicates.len(), 1);
231+
assert!(predicates.contains(&pred2));
232+
233+
// (1, 2, 3, 4)
234+
eq_attr_sets.add_predicate(pred3.clone());
235+
assert!(eq_attr_sets.is_eq(&attr1, &attr3));
236+
assert!(eq_attr_sets.is_eq(&attr2, &attr4));
237+
assert!(eq_attr_sets.is_eq(&attr1, &attr4));
238+
239+
let predicates = eq_attr_sets.find_predicates_for_eq_attr_set(&attr1);
240+
assert_eq!(predicates.len(), 3);
241+
assert!(predicates.contains(&pred1));
242+
assert!(predicates.contains(&pred2));
243+
assert!(predicates.contains(&pred3));
244+
}
245+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
use serde::{Deserialize, Serialize};
2+
3+
use super::predicates::constant_pred::ConstantType;
4+
5+
pub mod attr_ref;
6+
pub mod schema;
7+
8+
#[derive(Clone, Debug, Serialize, Deserialize)]
9+
pub struct Attribute {
10+
pub name: String,
11+
pub typ: ConstantType,
12+
pub nullable: bool,
13+
}
14+
15+
impl std::fmt::Display for Attribute {
16+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
17+
if self.nullable {
18+
write!(f, "{}:{:?}", self.name, self.typ)
19+
} else {
20+
write!(f, "{}:{:?}(non-null)", self.name, self.typ)
21+
}
22+
}
23+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
use itertools::Itertools;
2+
3+
use serde::{Deserialize, Serialize};
4+
5+
use super::Attribute;
6+
7+
/// [`Schema`] represents the schema of a group in the memo. It contains a list of attributes.
8+
#[derive(Clone, Debug, Serialize, Deserialize)]
9+
pub struct Schema {
10+
pub attributes: Vec<Attribute>,
11+
}
12+
13+
impl std::fmt::Display for Schema {
14+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15+
write!(
16+
f,
17+
"[{}]",
18+
self.attributes.iter().map(|x| x.to_string()).join(", ")
19+
)
20+
}
21+
}
22+
23+
impl Schema {
24+
pub fn new(attributes: Vec<Attribute>) -> Self {
25+
Self { attributes }
26+
}
27+
28+
pub fn len(&self) -> usize {
29+
self.attributes.len()
30+
}
31+
32+
pub fn is_empty(&self) -> bool {
33+
self.len() == 0
34+
}
35+
}

optd-cost-model/src/cost/agg.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,15 @@ mod tests {
6767
use std::collections::HashMap;
6868

6969
use crate::{
70-
common::{predicates::constant_pred::ConstantType, types::TableId, values::Value},
70+
common::{
71+
predicates::constant_pred::ConstantType, properties::Attribute, types::TableId,
72+
values::Value,
73+
},
7174
cost_model::tests::{
7275
attr_ref, cnst, create_cost_model_mock_storage, empty_list, empty_per_attr_stats, list,
7376
TestPerAttributeStats,
7477
},
7578
stats::{utilities::simple_map::SimpleMap, MostCommonValues, DEFAULT_NUM_DISTINCT},
76-
storage::Attribute,
7779
EstimatedStatistic,
7880
};
7981

optd-cost-model/src/cost/filter/controller.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ mod tests {
9898
bin_op_pred::BinOpType, constant_pred::ConstantType, log_op_pred::LogOpType,
9999
un_op_pred::UnOpType,
100100
},
101+
properties::Attribute,
101102
types::TableId,
102103
values::Value,
103104
},
@@ -106,7 +107,6 @@ mod tests {
106107
utilities::{counter::Counter, simple_map::SimpleMap},
107108
Distribution, MostCommonValues, DEFAULT_EQ_SEL,
108109
},
109-
storage::Attribute,
110110
};
111111
use arrow_schema::DataType;
112112

0 commit comments

Comments
 (0)