|
| 1 | +use std::collections::HashSet; |
| 2 | + |
| 3 | +use crate::{common::types::TableId, utils::DisjointSets}; |
| 4 | + |
| 5 | +pub type AttrRefs = Vec<AttrRef>; |
| 6 | + |
| 7 | +/// [`BaseTableAttrRef`] represents a reference to an attribute in a base table, |
| 8 | +/// i.e. a table existing in the catalog. |
| 9 | +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] |
| 10 | +pub struct BaseTableAttrRef { |
| 11 | + pub table_id: TableId, |
| 12 | + pub attr_idx: u64, |
| 13 | +} |
| 14 | + |
| 15 | +/// [`AttrRef`] represents a reference to an attribute in a query. |
| 16 | +#[derive(Clone, Debug)] |
| 17 | +pub enum AttrRef { |
| 18 | + /// Reference to a base table attribute. |
| 19 | + BaseTableAttrRef(BaseTableAttrRef), |
| 20 | + /// Reference to a derived attribute (e.g. t.v1 + t.v2). |
| 21 | + /// TODO: Better representation of derived attributes. |
| 22 | + Derived, |
| 23 | +} |
| 24 | + |
| 25 | +impl AttrRef { |
| 26 | + pub fn base_table_attr_ref(table_id: TableId, attr_idx: u64) -> Self { |
| 27 | + AttrRef::BaseTableAttrRef(BaseTableAttrRef { table_id, attr_idx }) |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +impl From<BaseTableAttrRef> for AttrRef { |
| 32 | + fn from(attr: BaseTableAttrRef) -> Self { |
| 33 | + AttrRef::BaseTableAttrRef(attr) |
| 34 | + } |
| 35 | +} |
| 36 | + |
| 37 | +/// [`EqPredicate`] represents an equality predicate between two attributes. |
| 38 | +#[derive(Clone, Debug, Eq, Hash, PartialEq)] |
| 39 | +pub struct EqPredicate { |
| 40 | + pub left: BaseTableAttrRef, |
| 41 | + pub right: BaseTableAttrRef, |
| 42 | +} |
| 43 | + |
| 44 | +impl EqPredicate { |
| 45 | + pub fn new(left: BaseTableAttrRef, right: BaseTableAttrRef) -> Self { |
| 46 | + Self { left, right } |
| 47 | + } |
| 48 | +} |
| 49 | + |
| 50 | +/// [`SemanticCorrelation`] represents the semantic correlation between attributes in a |
| 51 | +/// query. "Semantic" means that the attributes are correlated based on the |
| 52 | +/// semantics of the query, not the statistics. |
| 53 | +/// |
| 54 | +/// [`SemanticCorrelation`] contains equal attributes denoted by disjoint sets of base |
| 55 | +/// table attributes, e.g. {{ t1.c1 = t2.c1 = t3.c1 }, { t1.c2 = t2.c2 }}. |
| 56 | +#[derive(Clone, Debug, Default)] |
| 57 | +pub struct SemanticCorrelation { |
| 58 | + /// A disjoint set of base table attributes with equal values in the same row. |
| 59 | + disjoint_eq_attr_sets: DisjointSets<BaseTableAttrRef>, |
| 60 | + /// The predicates that define the equalities. |
| 61 | + eq_predicates: HashSet<EqPredicate>, |
| 62 | +} |
| 63 | + |
| 64 | +impl SemanticCorrelation { |
| 65 | + pub fn new() -> Self { |
| 66 | + Self { |
| 67 | + disjoint_eq_attr_sets: DisjointSets::new(), |
| 68 | + eq_predicates: HashSet::new(), |
| 69 | + } |
| 70 | + } |
| 71 | + |
| 72 | + pub fn add_predicate(&mut self, predicate: EqPredicate) { |
| 73 | + let left = &predicate.left; |
| 74 | + let right = &predicate.right; |
| 75 | + |
| 76 | + // Add the indices to the set if they do not exist. |
| 77 | + if !self.disjoint_eq_attr_sets.contains(left) { |
| 78 | + self.disjoint_eq_attr_sets |
| 79 | + .make_set(left.clone()) |
| 80 | + .expect("just checked left attribute index does not exist"); |
| 81 | + } |
| 82 | + if !self.disjoint_eq_attr_sets.contains(right) { |
| 83 | + self.disjoint_eq_attr_sets |
| 84 | + .make_set(right.clone()) |
| 85 | + .expect("just checked right attribute index does not exist"); |
| 86 | + } |
| 87 | + // Union the attributes. |
| 88 | + self.disjoint_eq_attr_sets |
| 89 | + .union(left, right) |
| 90 | + .expect("both attribute indices should exist"); |
| 91 | + |
| 92 | + // Keep track of the predicate. |
| 93 | + self.eq_predicates.insert(predicate); |
| 94 | + } |
| 95 | + |
| 96 | + /// Determine if two attributes are in the same set. |
| 97 | + pub fn is_eq(&mut self, left: &BaseTableAttrRef, right: &BaseTableAttrRef) -> bool { |
| 98 | + self.disjoint_eq_attr_sets |
| 99 | + .same_set(left, right) |
| 100 | + .unwrap_or(false) |
| 101 | + } |
| 102 | + |
| 103 | + pub fn contains(&self, base_attr_ref: &BaseTableAttrRef) -> bool { |
| 104 | + self.disjoint_eq_attr_sets.contains(base_attr_ref) |
| 105 | + } |
| 106 | + |
| 107 | + /// Get the number of attributes that are equal to `attr`, including `attr` itself. |
| 108 | + pub fn num_eq_attributes(&mut self, attr: &BaseTableAttrRef) -> usize { |
| 109 | + self.disjoint_eq_attr_sets.set_size(attr).unwrap() |
| 110 | + } |
| 111 | + |
| 112 | + /// Find the set of predicates that define the equality of the set of attributes `attr` belongs to. |
| 113 | + pub fn find_predicates_for_eq_attr_set(&mut self, attr: &BaseTableAttrRef) -> Vec<EqPredicate> { |
| 114 | + let mut predicates = Vec::new(); |
| 115 | + for predicate in &self.eq_predicates { |
| 116 | + let left = &predicate.left; |
| 117 | + let right = &predicate.right; |
| 118 | + if (left != attr && self.disjoint_eq_attr_sets.same_set(attr, left).unwrap()) |
| 119 | + || (right != attr && self.disjoint_eq_attr_sets.same_set(attr, right).unwrap()) |
| 120 | + { |
| 121 | + predicates.push(predicate.clone()); |
| 122 | + } |
| 123 | + } |
| 124 | + predicates |
| 125 | + } |
| 126 | + |
| 127 | + /// Find the set of attributes that define the equality of the set of attributes `attr` belongs to. |
| 128 | + pub fn find_attrs_for_eq_attribute_set( |
| 129 | + &mut self, |
| 130 | + attr: &BaseTableAttrRef, |
| 131 | + ) -> HashSet<BaseTableAttrRef> { |
| 132 | + let predicates = self.find_predicates_for_eq_attr_set(attr); |
| 133 | + predicates |
| 134 | + .into_iter() |
| 135 | + .flat_map(|predicate| vec![predicate.left, predicate.right]) |
| 136 | + .collect() |
| 137 | + } |
| 138 | + |
| 139 | + /// Union two `EqBaseTableattributesets` to produce a new disjoint sets. |
| 140 | + pub fn union(x: Self, y: Self) -> Self { |
| 141 | + let mut eq_attr_sets = Self::new(); |
| 142 | + for predicate in x |
| 143 | + .eq_predicates |
| 144 | + .into_iter() |
| 145 | + .chain(y.eq_predicates.into_iter()) |
| 146 | + { |
| 147 | + eq_attr_sets.add_predicate(predicate); |
| 148 | + } |
| 149 | + eq_attr_sets |
| 150 | + } |
| 151 | + |
| 152 | + pub fn merge(x: Option<Self>, y: Option<Self>) -> Option<Self> { |
| 153 | + let eq_attr_sets = match (x, y) { |
| 154 | + (Some(x), Some(y)) => Self::union(x, y), |
| 155 | + (Some(x), None) => x.clone(), |
| 156 | + (None, Some(y)) => y.clone(), |
| 157 | + _ => return None, |
| 158 | + }; |
| 159 | + Some(eq_attr_sets) |
| 160 | + } |
| 161 | +} |
| 162 | + |
| 163 | +/// [`GroupAttrRefs`] represents the attributes of a group in a query. |
| 164 | +#[derive(Clone, Debug)] |
| 165 | +pub struct GroupAttrRefs { |
| 166 | + attribute_refs: AttrRefs, |
| 167 | + /// Correlation of the output attributes of the group. |
| 168 | + output_correlation: Option<SemanticCorrelation>, |
| 169 | +} |
| 170 | + |
| 171 | +impl GroupAttrRefs { |
| 172 | + pub fn new(attribute_refs: AttrRefs, output_correlation: Option<SemanticCorrelation>) -> Self { |
| 173 | + Self { |
| 174 | + attribute_refs, |
| 175 | + output_correlation, |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + pub fn base_table_attribute_refs(&self) -> &AttrRefs { |
| 180 | + &self.attribute_refs |
| 181 | + } |
| 182 | + |
| 183 | + pub fn output_correlation(&self) -> Option<&SemanticCorrelation> { |
| 184 | + self.output_correlation.as_ref() |
| 185 | + } |
| 186 | +} |
| 187 | + |
| 188 | +#[cfg(test)] |
| 189 | +mod tests { |
| 190 | + use super::*; |
| 191 | + |
| 192 | + #[test] |
| 193 | + fn test_eq_base_table_attribute_sets() { |
| 194 | + let attr1 = BaseTableAttrRef { |
| 195 | + table_id: TableId(1), |
| 196 | + attr_idx: 1, |
| 197 | + }; |
| 198 | + let attr2 = BaseTableAttrRef { |
| 199 | + table_id: TableId(2), |
| 200 | + attr_idx: 2, |
| 201 | + }; |
| 202 | + let attr3 = BaseTableAttrRef { |
| 203 | + table_id: TableId(3), |
| 204 | + attr_idx: 3, |
| 205 | + }; |
| 206 | + let attr4 = BaseTableAttrRef { |
| 207 | + table_id: TableId(4), |
| 208 | + attr_idx: 4, |
| 209 | + }; |
| 210 | + let pred1 = EqPredicate::new(attr1.clone(), attr2.clone()); |
| 211 | + let pred2 = EqPredicate::new(attr3.clone(), attr4.clone()); |
| 212 | + let pred3 = EqPredicate::new(attr1.clone(), attr3.clone()); |
| 213 | + |
| 214 | + let mut eq_attr_sets = SemanticCorrelation::new(); |
| 215 | + |
| 216 | + // (1, 2) |
| 217 | + eq_attr_sets.add_predicate(pred1.clone()); |
| 218 | + assert!(eq_attr_sets.is_eq(&attr1, &attr2)); |
| 219 | + |
| 220 | + // (1, 2), (3, 4) |
| 221 | + eq_attr_sets.add_predicate(pred2.clone()); |
| 222 | + assert!(eq_attr_sets.is_eq(&attr3, &attr4)); |
| 223 | + assert!(!eq_attr_sets.is_eq(&attr2, &attr3)); |
| 224 | + |
| 225 | + let predicates = eq_attr_sets.find_predicates_for_eq_attr_set(&attr1); |
| 226 | + assert_eq!(predicates.len(), 1); |
| 227 | + assert!(predicates.contains(&pred1)); |
| 228 | + |
| 229 | + let predicates = eq_attr_sets.find_predicates_for_eq_attr_set(&attr3); |
| 230 | + assert_eq!(predicates.len(), 1); |
| 231 | + assert!(predicates.contains(&pred2)); |
| 232 | + |
| 233 | + // (1, 2, 3, 4) |
| 234 | + eq_attr_sets.add_predicate(pred3.clone()); |
| 235 | + assert!(eq_attr_sets.is_eq(&attr1, &attr3)); |
| 236 | + assert!(eq_attr_sets.is_eq(&attr2, &attr4)); |
| 237 | + assert!(eq_attr_sets.is_eq(&attr1, &attr4)); |
| 238 | + |
| 239 | + let predicates = eq_attr_sets.find_predicates_for_eq_attr_set(&attr1); |
| 240 | + assert_eq!(predicates.len(), 3); |
| 241 | + assert!(predicates.contains(&pred1)); |
| 242 | + assert!(predicates.contains(&pred2)); |
| 243 | + assert!(predicates.contains(&pred3)); |
| 244 | + } |
| 245 | +} |
0 commit comments