Skip to content

Commit fd33981

Browse files
committed
add expression representation and refactor memo
This commit adds the `src/expression` module which contains a very simple representation of Cascades expressions. The `Memo` trait interface and implemenation has also changed, where it now correctly detects exact match duplicates, and it does not track fingerprints for physical expressions (only logical). TODO: Add the duplicate detection to the other methods that need them. TODO: Add more tests. TODO: Figure out how to test in CI.
1 parent 0e54957 commit fd33981

File tree

11 files changed

+446
-56
lines changed

11 files changed

+446
-56
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ target/
1010

1111
# We will check in all code-generated entity files, as newer versions of `sea-orm-cli` might
1212
# conflict with previous versions.
13-
# **/entities
13+
# **/entities

optd-mvp/src/entities/physical_expression.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ pub struct Model {
88
#[sea_orm(primary_key)]
99
pub id: i32,
1010
pub group_id: i32,
11-
pub fingerprint: i64,
1211
pub kind: i16,
1312
pub data: Json,
1413
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//! Definition of logical expressions / relations in the Cascades query optimization framework.
2+
//!
3+
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4+
//!
5+
//! TODO figure out if each relation should be in a different submodule.
6+
7+
use crate::entities::*;
8+
use serde::{Deserialize, Serialize};
9+
use std::hash::{DefaultHasher, Hash, Hasher};
10+
11+
#[derive(Clone, Debug)]
12+
pub enum LogicalExpression {
13+
Scan(Scan),
14+
Filter(Filter),
15+
Join(Join),
16+
}
17+
18+
#[derive(Serialize, Deserialize, Clone, Debug)]
19+
pub struct Scan {
20+
table_schema: String,
21+
}
22+
23+
#[derive(Serialize, Deserialize, Clone, Debug)]
24+
pub struct Filter {
25+
child: i32,
26+
expression: String,
27+
}
28+
29+
#[derive(Serialize, Deserialize, Clone, Debug)]
30+
pub struct Join {
31+
left: i32,
32+
right: i32,
33+
expression: String,
34+
}
35+
36+
/// TODO Use a macro instead.
37+
impl From<logical_expression::Model> for LogicalExpression {
38+
fn from(value: logical_expression::Model) -> Self {
39+
match value.kind {
40+
0 => Self::Scan(
41+
serde_json::from_value(value.data)
42+
.expect("unable to deserialize data into a logical `Scan`"),
43+
),
44+
1 => Self::Filter(
45+
serde_json::from_value(value.data)
46+
.expect("Unable to deserialize data into a logical `Filter`"),
47+
),
48+
2 => Self::Join(
49+
serde_json::from_value(value.data)
50+
.expect("Unable to deserialize data into a logical `Join`"),
51+
),
52+
_ => panic!(),
53+
}
54+
}
55+
}
56+
57+
/// TODO Use a macro instead.
58+
impl From<LogicalExpression> for logical_expression::Model {
59+
fn from(value: LogicalExpression) -> logical_expression::Model {
60+
fn create_logical_expression(
61+
kind: i16,
62+
data: serde_json::Value,
63+
) -> logical_expression::Model {
64+
let mut hasher = DefaultHasher::new();
65+
kind.hash(&mut hasher);
66+
data.hash(&mut hasher);
67+
let fingerprint = hasher.finish() as i64;
68+
69+
logical_expression::Model {
70+
id: -1,
71+
group_id: -1,
72+
fingerprint,
73+
kind,
74+
data,
75+
}
76+
}
77+
78+
match value {
79+
LogicalExpression::Scan(scan) => create_logical_expression(
80+
0,
81+
serde_json::to_value(scan).expect("unable to serialize logical `Scan`"),
82+
),
83+
LogicalExpression::Filter(filter) => create_logical_expression(
84+
1,
85+
serde_json::to_value(filter).expect("unable to serialize logical `Filter`"),
86+
),
87+
LogicalExpression::Join(join) => create_logical_expression(
88+
2,
89+
serde_json::to_value(join).expect("unable to serialize logical `Join`"),
90+
),
91+
}
92+
}
93+
}
94+
95+
#[cfg(test)]
96+
pub use build::*;
97+
98+
#[cfg(test)]
99+
mod build {
100+
use super::*;
101+
use crate::expression::Expression;
102+
103+
pub fn scan(table_schema: String) -> Expression {
104+
Expression::Logical(LogicalExpression::Scan(Scan { table_schema }))
105+
}
106+
107+
pub fn filter(child_group: i32, expression: String) -> Expression {
108+
Expression::Logical(LogicalExpression::Filter(Filter {
109+
child: child_group,
110+
expression,
111+
}))
112+
}
113+
114+
pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression {
115+
Expression::Logical(LogicalExpression::Join(Join {
116+
left: left_group,
117+
right: right_group,
118+
expression,
119+
}))
120+
}
121+
}

optd-mvp/src/expression/mod.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//! In-memory representation of Cascades logical and physical expression / operators / relations.
2+
//!
3+
//! TODO more docs.
4+
5+
mod logical_expression;
6+
pub use logical_expression::*;
7+
8+
mod physical_expression;
9+
pub use physical_expression::*;
10+
11+
/// The representation of a Cascades expression.
12+
///
13+
/// TODO more docs.
14+
#[derive(Clone, Debug)]
15+
pub enum Expression {
16+
Logical(LogicalExpression),
17+
Physical(PhysicalExpression),
18+
}
19+
20+
/// Converts the database / JSON representation of a logical expression into an in-memory one.
21+
impl From<crate::entities::logical_expression::Model> for Expression {
22+
fn from(value: crate::entities::logical_expression::Model) -> Self {
23+
Self::Logical(value.into())
24+
}
25+
}
26+
27+
/// Converts the in-memory representation of a logical expression into the database / JSON version.
28+
///
29+
/// # Panics
30+
///
31+
/// This will panic if the [`Expression`] is [`Expression::Physical`].
32+
impl From<Expression> for crate::entities::logical_expression::Model {
33+
fn from(value: Expression) -> Self {
34+
let Expression::Logical(expr) = value else {
35+
panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression");
36+
};
37+
38+
expr.into()
39+
}
40+
}
41+
42+
/// Converts the database / JSON representation of a physical expression into an in-memory one.
43+
impl From<crate::entities::physical_expression::Model> for Expression {
44+
fn from(value: crate::entities::physical_expression::Model) -> Self {
45+
Self::Physical(value.into())
46+
}
47+
}
48+
49+
/// Converts the in-memory representation of a physical expression into the database / JSON version.
50+
///
51+
/// # Panics
52+
///
53+
/// This will panic if the [`Expression`] is [`Expression::Physical`].
54+
impl From<Expression> for crate::entities::physical_expression::Model {
55+
fn from(value: Expression) -> Self {
56+
let Expression::Physical(expr) = value else {
57+
panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression");
58+
};
59+
60+
expr.into()
61+
}
62+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
//! Definition of physical expressions / operators in the Cascades query optimization framework.
2+
//!
3+
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4+
//!
5+
//! TODO figure out if each operator should be in a different submodule.
6+
7+
use crate::entities::*;
8+
use serde::{Deserialize, Serialize};
9+
10+
#[derive(Clone, Debug)]
11+
pub enum PhysicalExpression {
12+
TableScan(TableScan),
13+
Filter(PhysicalFilter),
14+
HashJoin(HashJoin),
15+
}
16+
17+
#[derive(Serialize, Deserialize, Clone, Debug)]
18+
pub struct TableScan {
19+
table_schema: String,
20+
}
21+
22+
#[derive(Serialize, Deserialize, Clone, Debug)]
23+
pub struct PhysicalFilter {
24+
child: i32,
25+
expression: String,
26+
}
27+
28+
#[derive(Serialize, Deserialize, Clone, Debug)]
29+
pub struct HashJoin {
30+
left: i32,
31+
right: i32,
32+
expression: String,
33+
}
34+
35+
/// TODO Use a macro instead.
36+
impl From<physical_expression::Model> for PhysicalExpression {
37+
fn from(value: physical_expression::Model) -> Self {
38+
match value.kind {
39+
0 => Self::TableScan(
40+
serde_json::from_value(value.data)
41+
.expect("unable to deserialize data into a physical `TableScan`"),
42+
),
43+
1 => Self::Filter(
44+
serde_json::from_value(value.data)
45+
.expect("Unable to deserialize data into a physical `Filter`"),
46+
),
47+
2 => Self::HashJoin(
48+
serde_json::from_value(value.data)
49+
.expect("Unable to deserialize data into a physical `HashJoin`"),
50+
),
51+
_ => panic!(),
52+
}
53+
}
54+
}
55+
56+
/// TODO Use a macro instead.
57+
impl From<PhysicalExpression> for physical_expression::Model {
58+
fn from(value: PhysicalExpression) -> physical_expression::Model {
59+
fn create_physical_expression(
60+
kind: i16,
61+
data: serde_json::Value,
62+
) -> physical_expression::Model {
63+
physical_expression::Model {
64+
id: -1,
65+
group_id: -1,
66+
kind,
67+
data,
68+
}
69+
}
70+
71+
match value {
72+
PhysicalExpression::TableScan(scan) => create_physical_expression(
73+
0,
74+
serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"),
75+
),
76+
PhysicalExpression::Filter(filter) => create_physical_expression(
77+
1,
78+
serde_json::to_value(filter).expect("unable to serialize physical `Filter`"),
79+
),
80+
PhysicalExpression::HashJoin(join) => create_physical_expression(
81+
2,
82+
serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"),
83+
),
84+
}
85+
}
86+
}
87+
88+
#[cfg(test)]
89+
pub use build::*;
90+
91+
#[cfg(test)]
92+
mod build {
93+
use super::*;
94+
use crate::expression::Expression;
95+
96+
pub fn table_scan(table_schema: String) -> Expression {
97+
Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema }))
98+
}
99+
100+
pub fn filter(child_group: i32, expression: String) -> Expression {
101+
Expression::Physical(PhysicalExpression::Filter(PhysicalFilter {
102+
child: child_group,
103+
expression,
104+
}))
105+
}
106+
107+
pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression {
108+
Expression::Physical(PhysicalExpression::HashJoin(HashJoin {
109+
left: left_group,
110+
right: right_group,
111+
expression,
112+
}))
113+
}
114+
}

optd-mvp/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ mod entities;
1010
mod memo;
1111
use memo::MemoError;
1212

13+
mod expression;
14+
1315
/// The filename of the SQLite database for migration.
1416
pub const DATABASE_FILENAME: &str = "sqlite.db";
1517
/// The URL of the SQLite database for migration.

optd-mvp/src/memo/interface.rs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//! This module defines the [`Memo`] trait, which defines shared behavior of all memo table that can
2+
//! be used for query optimization in the Cascades framework.
3+
14
use crate::OptimizerResult;
25
use thiserror::Error;
36

@@ -75,6 +78,11 @@ pub trait Memo {
7578
group_id: Self::GroupId,
7679
) -> OptimizerResult<Vec<Self::PhysicalExpressionId>>;
7780

81+
/// Checks if a given logical expression is unique.
82+
///
83+
/// TODO more docs.
84+
async fn is_unique_logical_expression(&self) -> OptimizerResult<bool>;
85+
7886
/// Updates / replaces a group's best physical plan (winner). Optionally returns the previous
7987
/// winner's physical expression ID.
8088
///
@@ -85,39 +93,45 @@ pub trait Memo {
8593
physical_expression_id: Self::PhysicalExpressionId,
8694
) -> OptimizerResult<Option<Self::PhysicalExpressionId>>;
8795

88-
/// Adds a logical expression to an existing group via its [`Self::GroupId`]. This function
96+
/// Adds a physical expression to an existing group via its [`Self::GroupId`]. This function
8997
/// assumes that insertion of this expression would not create any duplicates.
9098
///
9199
/// The caller is required to pass in a slice of `GroupId` that represent the child groups of
92100
/// the input expression.
93101
///
94-
/// The caller is also required to set the `group_id` field of the input `logical_expression`
102+
/// The caller is also required to set the `group_id` field of the input `physical_expression`
95103
/// to be equal to `group_id`, otherwise this function will return a
96104
/// [`MemoError::InvalidExpression`] error.
97105
///
98106
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
99-
async fn add_logical_expression_to_group(
107+
///
108+
/// FIXME: This needs to have a mechanism of reporting that a duplicate expression was found in
109+
/// another group.
110+
async fn add_physical_expression_to_group(
100111
&self,
101112
group_id: Self::GroupId,
102-
logical_expression: Self::LogicalExpression,
113+
physical_expression: Self::PhysicalExpression,
103114
children: &[Self::GroupId],
104115
) -> OptimizerResult<()>;
105116

106-
/// Adds a physical expression to an existing group via its [`Self::GroupId`]. This function
117+
/// Adds a logical expression to an existing group via its [`Self::GroupId`]. This function
107118
/// assumes that insertion of this expression would not create any duplicates.
108119
///
109120
/// The caller is required to pass in a slice of `GroupId` that represent the child groups of
110121
/// the input expression.
111122
///
112-
/// The caller is also required to set the `group_id` field of the input `physical_expression`
123+
/// The caller is also required to set the `group_id` field of the input `logical_expression`
113124
/// to be equal to `group_id`, otherwise this function will return a
114125
/// [`MemoError::InvalidExpression`] error.
115126
///
116127
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
117-
async fn add_physical_expression_to_group(
128+
///
129+
/// FIXME: This needs to have a mechanism of reporting that a duplicate expression was found in
130+
/// another group.
131+
async fn add_logical_expression_to_group(
118132
&self,
119133
group_id: Self::GroupId,
120-
physical_expression: Self::PhysicalExpression,
134+
logical_expression: Self::LogicalExpression,
121135
children: &[Self::GroupId],
122136
) -> OptimizerResult<()>;
123137

0 commit comments

Comments
 (0)