Skip to content

Commit f637fe4

Browse files
committed
add expression representation and refactor memo
This commit adds the `src/expression` module which contains a very simple representation of Cascades expressions. The `Memo` trait interface and implemenation has also changed, where it now correctly detects exact match duplicates, and it does not track fingerprints for physical expressions (only logical). TODO: Add the duplicate detection to the other methods that need them. TODO: Add more tests. TODO: Figure out how to test in CI.
1 parent 0e54957 commit f637fe4

File tree

11 files changed

+452
-56
lines changed

11 files changed

+452
-56
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ target/
1010

1111
# We will check in all code-generated entity files, as newer versions of `sea-orm-cli` might
1212
# conflict with previous versions.
13-
# **/entities
13+
# **/entities

optd-mvp/src/entities/physical_expression.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ pub struct Model {
88
#[sea_orm(primary_key)]
99
pub id: i32,
1010
pub group_id: i32,
11-
pub fingerprint: i64,
1211
pub kind: i16,
1312
pub data: Json,
1413
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//! Definition of logical expressions / relations in the Cascades query optimization framework.
2+
//!
3+
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4+
//!
5+
//! TODO figure out if each relation should be in a different submodule.
6+
7+
use crate::entities::*;
8+
use serde::{Deserialize, Serialize};
9+
use std::hash::{DefaultHasher, Hash, Hasher};
10+
11+
#[derive(Clone, Debug)]
12+
pub enum LogicalExpression {
13+
Scan(Scan),
14+
Filter(Filter),
15+
Join(Join),
16+
}
17+
18+
#[derive(Serialize, Deserialize, Clone, Debug)]
19+
pub struct Scan {
20+
table_schema: String,
21+
}
22+
23+
#[derive(Serialize, Deserialize, Clone, Debug)]
24+
pub struct Filter {
25+
child: i32,
26+
expression: String,
27+
}
28+
29+
#[derive(Serialize, Deserialize, Clone, Debug)]
30+
pub struct Join {
31+
left: i32,
32+
right: i32,
33+
expression: String,
34+
}
35+
36+
/// TODO Use a macro instead.
37+
impl From<logical_expression::Model> for LogicalExpression {
38+
fn from(value: logical_expression::Model) -> Self {
39+
match value.kind {
40+
0 => Self::Scan(
41+
serde_json::from_value(value.data)
42+
.expect("unable to deserialize data into a logical `Scan`"),
43+
),
44+
1 => Self::Filter(
45+
serde_json::from_value(value.data)
46+
.expect("Unable to deserialize data into a logical `Filter`"),
47+
),
48+
2 => Self::Join(
49+
serde_json::from_value(value.data)
50+
.expect("Unable to deserialize data into a logical `Join`"),
51+
),
52+
_ => panic!(),
53+
}
54+
}
55+
}
56+
57+
/// TODO Use a macro instead.
58+
impl From<LogicalExpression> for logical_expression::Model {
59+
fn from(value: LogicalExpression) -> logical_expression::Model {
60+
fn create_logical_expression(
61+
kind: i16,
62+
data: serde_json::Value,
63+
) -> logical_expression::Model {
64+
let mut hasher = DefaultHasher::new();
65+
kind.hash(&mut hasher);
66+
data.hash(&mut hasher);
67+
let fingerprint = hasher.finish() as i64;
68+
69+
logical_expression::Model {
70+
id: -1,
71+
group_id: -1,
72+
fingerprint,
73+
kind,
74+
data,
75+
}
76+
}
77+
78+
match value {
79+
LogicalExpression::Scan(scan) => create_logical_expression(
80+
0,
81+
serde_json::to_value(scan).expect("unable to serialize logical `Scan`"),
82+
),
83+
LogicalExpression::Filter(filter) => create_logical_expression(
84+
1,
85+
serde_json::to_value(filter).expect("unable to serialize logical `Filter`"),
86+
),
87+
LogicalExpression::Join(join) => create_logical_expression(
88+
2,
89+
serde_json::to_value(join).expect("unable to serialize logical `Join`"),
90+
),
91+
}
92+
}
93+
}
94+
95+
#[cfg(test)]
96+
pub use build::*;
97+
98+
#[cfg(test)]
99+
mod build {
100+
use super::*;
101+
use crate::expression::Expression;
102+
103+
pub fn scan(table_schema: String) -> Expression {
104+
Expression::Logical(LogicalExpression::Scan(Scan { table_schema }))
105+
}
106+
107+
pub fn filter(child_group: i32, expression: String) -> Expression {
108+
Expression::Logical(LogicalExpression::Filter(Filter {
109+
child: child_group,
110+
expression,
111+
}))
112+
}
113+
114+
pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression {
115+
Expression::Logical(LogicalExpression::Join(Join {
116+
left: left_group,
117+
right: right_group,
118+
expression,
119+
}))
120+
}
121+
}

optd-mvp/src/expression/mod.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//! In-memory representation of Cascades logical and physical expression / operators / relations.
2+
//!
3+
//! TODO more docs.
4+
5+
mod logical_expression;
6+
pub use logical_expression::*;
7+
8+
mod physical_expression;
9+
pub use physical_expression::*;
10+
11+
/// The representation of a Cascades expression.
12+
///
13+
/// TODO more docs.
14+
#[derive(Clone, Debug)]
15+
pub enum Expression {
16+
Logical(LogicalExpression),
17+
Physical(PhysicalExpression),
18+
}
19+
20+
/// Converts the database / JSON representation of a logical expression into an in-memory one.
21+
impl From<crate::entities::logical_expression::Model> for Expression {
22+
fn from(value: crate::entities::logical_expression::Model) -> Self {
23+
Self::Logical(value.into())
24+
}
25+
}
26+
27+
/// Converts the in-memory representation of a logical expression into the database / JSON version.
28+
///
29+
/// # Panics
30+
///
31+
/// This will panic if the [`Expression`] is [`Expression::Physical`].
32+
impl From<Expression> for crate::entities::logical_expression::Model {
33+
fn from(value: Expression) -> Self {
34+
let Expression::Logical(expr) = value else {
35+
panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression");
36+
};
37+
38+
expr.into()
39+
}
40+
}
41+
42+
/// Converts the database / JSON representation of a physical expression into an in-memory one.
43+
impl From<crate::entities::physical_expression::Model> for Expression {
44+
fn from(value: crate::entities::physical_expression::Model) -> Self {
45+
Self::Physical(value.into())
46+
}
47+
}
48+
49+
/// Converts the in-memory representation of a physical expression into the database / JSON version.
50+
///
51+
/// # Panics
52+
///
53+
/// This will panic if the [`Expression`] is [`Expression::Physical`].
54+
impl From<Expression> for crate::entities::physical_expression::Model {
55+
fn from(value: Expression) -> Self {
56+
let Expression::Physical(expr) = value else {
57+
panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression");
58+
};
59+
60+
expr.into()
61+
}
62+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
//! Definition of physical expressions / operators in the Cascades query optimization framework.
2+
//!
3+
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4+
//!
5+
//! TODO figure out if each operator should be in a different submodule.
6+
7+
use crate::entities::*;
8+
use serde::{Deserialize, Serialize};
9+
use std::hash::{DefaultHasher, Hash, Hasher};
10+
11+
#[derive(Clone, Debug)]
12+
pub enum PhysicalExpression {
13+
TableScan(TableScan),
14+
Filter(PhysicalFilter),
15+
HashJoin(HashJoin),
16+
}
17+
18+
#[derive(Serialize, Deserialize, Clone, Debug)]
19+
pub struct TableScan {
20+
table_schema: String,
21+
}
22+
23+
#[derive(Serialize, Deserialize, Clone, Debug)]
24+
pub struct PhysicalFilter {
25+
child: i32,
26+
expression: String,
27+
}
28+
29+
#[derive(Serialize, Deserialize, Clone, Debug)]
30+
pub struct HashJoin {
31+
left: i32,
32+
right: i32,
33+
expression: String,
34+
}
35+
36+
/// TODO Use a macro instead.
37+
impl From<physical_expression::Model> for PhysicalExpression {
38+
fn from(value: physical_expression::Model) -> Self {
39+
match value.kind {
40+
0 => Self::TableScan(
41+
serde_json::from_value(value.data)
42+
.expect("unable to deserialize data into a physical `TableScan`"),
43+
),
44+
1 => Self::Filter(
45+
serde_json::from_value(value.data)
46+
.expect("Unable to deserialize data into a physical `Filter`"),
47+
),
48+
2 => Self::HashJoin(
49+
serde_json::from_value(value.data)
50+
.expect("Unable to deserialize data into a physical `HashJoin`"),
51+
),
52+
_ => panic!(),
53+
}
54+
}
55+
}
56+
57+
/// TODO Use a macro instead.
58+
impl From<PhysicalExpression> for physical_expression::Model {
59+
fn from(value: PhysicalExpression) -> physical_expression::Model {
60+
fn create_physical_expression(
61+
kind: i16,
62+
data: serde_json::Value,
63+
) -> physical_expression::Model {
64+
let mut hasher = DefaultHasher::new();
65+
kind.hash(&mut hasher);
66+
data.hash(&mut hasher);
67+
let fingerprint = hasher.finish() as i64;
68+
69+
physical_expression::Model {
70+
id: -1,
71+
group_id: -1,
72+
kind,
73+
data,
74+
}
75+
}
76+
77+
match value {
78+
PhysicalExpression::TableScan(scan) => create_physical_expression(
79+
0,
80+
serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"),
81+
),
82+
PhysicalExpression::Filter(filter) => create_physical_expression(
83+
1,
84+
serde_json::to_value(filter).expect("unable to serialize physical `Filter`"),
85+
),
86+
PhysicalExpression::HashJoin(join) => create_physical_expression(
87+
2,
88+
serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"),
89+
),
90+
}
91+
}
92+
}
93+
94+
#[cfg(test)]
95+
pub use build::*;
96+
97+
#[cfg(test)]
98+
mod build {
99+
use super::*;
100+
use crate::expression::Expression;
101+
102+
pub fn table_scan(table_schema: String) -> Expression {
103+
Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema }))
104+
}
105+
106+
pub fn filter(child_group: i32, expression: String) -> Expression {
107+
Expression::Physical(PhysicalExpression::Filter(PhysicalFilter {
108+
child: child_group,
109+
expression,
110+
}))
111+
}
112+
113+
pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression {
114+
Expression::Physical(PhysicalExpression::HashJoin(HashJoin {
115+
left: left_group,
116+
right: right_group,
117+
expression,
118+
}))
119+
}
120+
}

optd-mvp/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ mod entities;
1010
mod memo;
1111
use memo::MemoError;
1212

13+
mod expression;
14+
1315
/// The filename of the SQLite database for migration.
1416
pub const DATABASE_FILENAME: &str = "sqlite.db";
1517
/// The URL of the SQLite database for migration.

0 commit comments

Comments
 (0)