Skip to content

Commit a74f03c

Browse files
committed
huge refactor of persistent memo implementation
This commit completely refactors the memo table, removing the `Memo` trait and instead placing all methods directly on the `PersistentMemo` structure itself. This also cleans up some code in other places.
1 parent 2856496 commit a74f03c

File tree

13 files changed

+601
-479
lines changed

13 files changed

+601
-479
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

optd-mvp/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ serde_json = "1.0.118" # Support `Hash` on `serde_json::Value` in "1.0.118".
2020
tokio = { version = "1.0", features = ["macros", "rt-multi-thread"] }
2121
trait-variant = "0.1.2" # Support `make(Send)` syntax in "0.1.2".
2222
thiserror = "2.0"
23+
fxhash = "0.2"
2324

2425
# Pin more recent versions for `-Zminimal-versions`.
2526
async-trait = "0.1.43" # Remove lifetime parameter from "0.1.42".
2627
async-stream = "0.3.1" # Fix unsatisfied trait bound from "0.3.0".
2728
strum = "0.26.0" # Fix `std::marker::Sized` from "0.25.0".
29+

optd-mvp/src/entities/logical_children.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ pub enum Relation {
2323
CascadesGroup,
2424
#[sea_orm(
2525
belongs_to = "super::logical_expression::Entity",
26-
from = "Column::GroupId",
26+
from = "Column::LogicalExpressionId",
2727
to = "super::logical_expression::Column::Id",
2828
on_update = "Cascade",
2929
on_delete = "Cascade"

optd-mvp/src/entities/prelude.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0
22
3+
#![allow(unused_imports)]
4+
35
pub use super::cascades_group::Entity as CascadesGroup;
46
pub use super::fingerprint::Entity as Fingerprint;
57
pub use super::logical_children::Entity as LogicalChildren;

optd-mvp/src/expression/logical_expression.rs

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,91 @@
11
//! Definition of logical expressions / relations in the Cascades query optimization framework.
22
//!
3-
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4-
//! FIXME: Representation needs to know how to "rewrite" child group IDs to whatever a fingerprint
5-
//! will need.
3+
//! FIXME: All fields are placeholders.
64
//!
7-
//! TODO figure out if each relation should be in a different submodule.
5+
//! TODO Remove dead code.
6+
//! TODO Figure out if each relation should be in a different submodule.
87
//! TODO This entire file is a WIP.
98
10-
use crate::entities::*;
9+
#![allow(dead_code)]
10+
11+
use crate::{entities::*, memo::GroupId};
12+
use fxhash::hash;
1113
use serde::{Deserialize, Serialize};
1214

13-
#[derive(Clone, Debug)]
15+
#[derive(Clone, Debug, PartialEq, Eq)]
1416
pub enum LogicalExpression {
1517
Scan(Scan),
1618
Filter(Filter),
1719
Join(Join),
1820
}
1921

20-
#[derive(Serialize, Deserialize, Clone, Debug)]
22+
/// FIXME: Figure out how to make everything unsigned instead of signed.
23+
impl LogicalExpression {
24+
pub fn kind(&self) -> i16 {
25+
match self {
26+
LogicalExpression::Scan(_) => 0,
27+
LogicalExpression::Filter(_) => 1,
28+
LogicalExpression::Join(_) => 2,
29+
}
30+
}
31+
32+
/// Definitions of custom fingerprinting strategies for each kind of logical expression.
33+
pub fn fingerprint(&self) -> i64 {
34+
self.fingerprint_with_rewrite(&[])
35+
}
36+
37+
/// Calculates the fingerprint of a given expression, but replaces all of the children group IDs
38+
/// with a new group ID if it is listed in the input `rewrites` list.
39+
///
40+
/// TODO Allow each expression to implement a trait that does this.
41+
pub fn fingerprint_with_rewrite(&self, rewrites: &[(GroupId, GroupId)]) -> i64 {
42+
// Closure that rewrites a group ID if needed.
43+
let rewrite = |x: GroupId| {
44+
if rewrites.is_empty() {
45+
return x;
46+
}
47+
48+
if let Some(i) = rewrites.iter().position(|(curr, _new)| &x == curr) {
49+
assert_eq!(rewrites[i].0, x);
50+
rewrites[i].1
51+
} else {
52+
x
53+
}
54+
};
55+
56+
let kind = self.kind() as u16 as usize;
57+
let hash = match self {
58+
LogicalExpression::Scan(scan) => hash(scan.table_schema.as_str()),
59+
LogicalExpression::Filter(filter) => {
60+
hash(&rewrite(filter.child).0) ^ hash(filter.expression.as_str())
61+
}
62+
LogicalExpression::Join(join) => {
63+
hash(&rewrite(join.left).0)
64+
^ hash(&rewrite(join.right).0)
65+
^ hash(join.expression.as_str())
66+
}
67+
};
68+
69+
// Mask out the bottom 16 bits of `hash` and replace them with `kind`.
70+
((hash & !0xFFFF) | kind) as i64
71+
}
72+
}
73+
74+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
2175
pub struct Scan {
2276
table_schema: String,
2377
}
2478

25-
#[derive(Serialize, Deserialize, Clone, Debug)]
79+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
2680
pub struct Filter {
27-
child: i32,
81+
child: GroupId,
2882
expression: String,
2983
}
3084

31-
#[derive(Serialize, Deserialize, Clone, Debug)]
85+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
3286
pub struct Join {
33-
left: i32,
34-
right: i32,
87+
left: GroupId,
88+
right: GroupId,
3589
expression: String,
3690
}
3791

@@ -71,17 +125,18 @@ impl From<LogicalExpression> for logical_expression::Model {
71125
}
72126
}
73127

128+
let kind = value.kind();
74129
match value {
75130
LogicalExpression::Scan(scan) => create_logical_expression(
76-
0,
131+
kind,
77132
serde_json::to_value(scan).expect("unable to serialize logical `Scan`"),
78133
),
79134
LogicalExpression::Filter(filter) => create_logical_expression(
80-
1,
135+
kind,
81136
serde_json::to_value(filter).expect("unable to serialize logical `Filter`"),
82137
),
83138
LogicalExpression::Join(join) => create_logical_expression(
84-
2,
139+
kind,
85140
serde_json::to_value(join).expect("unable to serialize logical `Join`"),
86141
),
87142
}
@@ -94,24 +149,28 @@ pub use build::*;
94149
#[cfg(test)]
95150
mod build {
96151
use super::*;
97-
use crate::expression::Expression;
152+
use crate::expression::LogicalExpression;
98153

99-
pub fn scan(table_schema: String) -> Expression {
100-
Expression::Logical(LogicalExpression::Scan(Scan { table_schema }))
154+
pub fn scan(table_schema: String) -> LogicalExpression {
155+
LogicalExpression::Scan(Scan { table_schema })
101156
}
102157

103-
pub fn filter(child_group: i32, expression: String) -> Expression {
104-
Expression::Logical(LogicalExpression::Filter(Filter {
158+
pub fn filter(child_group: GroupId, expression: String) -> LogicalExpression {
159+
LogicalExpression::Filter(Filter {
105160
child: child_group,
106161
expression,
107-
}))
162+
})
108163
}
109164

110-
pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression {
111-
Expression::Logical(LogicalExpression::Join(Join {
165+
pub fn join(
166+
left_group: GroupId,
167+
right_group: GroupId,
168+
expression: String,
169+
) -> LogicalExpression {
170+
LogicalExpression::Join(Join {
112171
left: left_group,
113172
right: right_group,
114173
expression,
115-
}))
174+
})
116175
}
117176
}

optd-mvp/src/expression/physical_expression.rs

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,38 @@
11
//! Definition of physical expressions / operators in the Cascades query optimization framework.
22
//!
3-
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
3+
//! FIXME: All fields are placeholders.
44
//!
5-
//! TODO figure out if each operator should be in a different submodule.
5+
//! TODO Remove dead code.
6+
//! TODO Figure out if each operator should be in a different submodule.
67
//! TODO This entire file is a WIP.
78
8-
use crate::entities::*;
9+
#![allow(dead_code)]
10+
11+
use crate::{entities::*, memo::GroupId};
912
use serde::{Deserialize, Serialize};
1013

11-
#[derive(Clone, Debug)]
14+
#[derive(Clone, Debug, PartialEq, Eq)]
1215
pub enum PhysicalExpression {
1316
TableScan(TableScan),
1417
Filter(PhysicalFilter),
1518
HashJoin(HashJoin),
1619
}
1720

18-
#[derive(Serialize, Deserialize, Clone, Debug)]
21+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
1922
pub struct TableScan {
2023
table_schema: String,
2124
}
2225

23-
#[derive(Serialize, Deserialize, Clone, Debug)]
26+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
2427
pub struct PhysicalFilter {
25-
child: i32,
28+
child: GroupId,
2629
expression: String,
2730
}
2831

29-
#[derive(Serialize, Deserialize, Clone, Debug)]
32+
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
3033
pub struct HashJoin {
31-
left: i32,
32-
right: i32,
34+
left: GroupId,
35+
right: GroupId,
3336
expression: String,
3437
}
3538

@@ -92,24 +95,28 @@ pub use build::*;
9295
#[cfg(test)]
9396
mod build {
9497
use super::*;
95-
use crate::expression::Expression;
98+
use crate::expression::PhysicalExpression;
9699

97-
pub fn table_scan(table_schema: String) -> Expression {
98-
Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema }))
100+
pub fn table_scan(table_schema: String) -> PhysicalExpression {
101+
PhysicalExpression::TableScan(TableScan { table_schema })
99102
}
100103

101-
pub fn filter(child_group: i32, expression: String) -> Expression {
102-
Expression::Physical(PhysicalExpression::Filter(PhysicalFilter {
104+
pub fn filter(child_group: GroupId, expression: String) -> PhysicalExpression {
105+
PhysicalExpression::Filter(PhysicalFilter {
103106
child: child_group,
104107
expression,
105-
}))
108+
})
106109
}
107110

108-
pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression {
109-
Expression::Physical(PhysicalExpression::HashJoin(HashJoin {
111+
pub fn hash_join(
112+
left_group: GroupId,
113+
right_group: GroupId,
114+
expression: String,
115+
) -> PhysicalExpression {
116+
PhysicalExpression::HashJoin(HashJoin {
110117
left: left_group,
111118
right: right_group,
112119
expression,
113-
}))
120+
})
114121
}
115122
}

optd-mvp/src/lib.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,3 @@ pub type OptimizerResult<T> = Result<T, OptimizerError>;
3737
pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> {
3838
Migrator::refresh(db).await
3939
}
40-
41-
/// Helper function for hashing expression data.
42-
///
43-
/// TODO remove this.
44-
fn hash_expression(kind: i16, data: &serde_json::Value) -> i64 {
45-
use std::hash::{DefaultHasher, Hash, Hasher};
46-
47-
let mut hasher = DefaultHasher::new();
48-
kind.hash(&mut hasher);
49-
data.hash(&mut hasher);
50-
hasher.finish() as i64
51-
}

0 commit comments

Comments
 (0)