Skip to content

Commit 9689c04

Browse files
committed
add expression representation and refactor memo
This commit adds the `src/expression` module which contains a very simple representation of Cascades expressions. The `Memo` trait interface and implemenation has also changed, where it now correctly detects exact match duplicates. TODO: Add the duplicate detection to the other methods that need them. TODO: Add more tests. TODO: Figure out how to test in CI.
1 parent 0e54957 commit 9689c04

File tree

8 files changed

+417
-27
lines changed

8 files changed

+417
-27
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//! Definition of logical expressions / relations in the Cascades query optimization framework.
2+
//!
3+
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4+
//!
5+
//! TODO figure out if each relation should be in a different submodule.
6+
7+
use crate::entities::*;
8+
use serde::{Deserialize, Serialize};
9+
use std::hash::{DefaultHasher, Hash, Hasher};
10+
11+
#[derive(Clone, Debug)]
12+
pub enum LogicalExpression {
13+
Scan(Scan),
14+
Filter(Filter),
15+
Join(Join),
16+
}
17+
18+
#[derive(Serialize, Deserialize, Clone, Debug)]
19+
pub struct Scan {
20+
table_schema: String,
21+
}
22+
23+
#[derive(Serialize, Deserialize, Clone, Debug)]
24+
pub struct Filter {
25+
child: i32,
26+
expression: String,
27+
}
28+
29+
#[derive(Serialize, Deserialize, Clone, Debug)]
30+
pub struct Join {
31+
left: i32,
32+
right: i32,
33+
expression: String,
34+
}
35+
36+
/// TODO Use a macro instead.
37+
impl From<logical_expression::Model> for LogicalExpression {
38+
fn from(value: logical_expression::Model) -> Self {
39+
match value.kind {
40+
0 => Self::Scan(
41+
serde_json::from_value(value.data)
42+
.expect("unable to deserialize data into a logical `Scan`"),
43+
),
44+
1 => Self::Filter(
45+
serde_json::from_value(value.data)
46+
.expect("Unable to deserialize data into a logical `Filter`"),
47+
),
48+
2 => Self::Join(
49+
serde_json::from_value(value.data)
50+
.expect("Unable to deserialize data into a logical `Join`"),
51+
),
52+
_ => panic!(),
53+
}
54+
}
55+
}
56+
57+
/// TODO Use a macro instead.
58+
impl From<LogicalExpression> for logical_expression::Model {
59+
fn from(value: LogicalExpression) -> logical_expression::Model {
60+
fn create_logical_expression(
61+
kind: i16,
62+
data: serde_json::Value,
63+
) -> logical_expression::Model {
64+
let mut hasher = DefaultHasher::new();
65+
kind.hash(&mut hasher);
66+
data.hash(&mut hasher);
67+
let fingerprint = hasher.finish() as i64;
68+
69+
logical_expression::Model {
70+
id: -1,
71+
group_id: -1,
72+
fingerprint,
73+
kind,
74+
data,
75+
}
76+
}
77+
78+
match value {
79+
LogicalExpression::Scan(scan) => create_logical_expression(
80+
0,
81+
serde_json::to_value(scan).expect("unable to serialize logical `Scan`"),
82+
),
83+
LogicalExpression::Filter(filter) => create_logical_expression(
84+
1,
85+
serde_json::to_value(filter).expect("unable to serialize logical `Filter`"),
86+
),
87+
LogicalExpression::Join(join) => create_logical_expression(
88+
2,
89+
serde_json::to_value(join).expect("unable to serialize logical `Join`"),
90+
),
91+
}
92+
}
93+
}
94+
95+
#[cfg(test)]
96+
pub use build::*;
97+
98+
#[cfg(test)]
99+
mod build {
100+
use super::*;
101+
use crate::expression::Expression;
102+
103+
pub fn scan(table_schema: String) -> Expression {
104+
Expression::Logical(LogicalExpression::Scan(Scan { table_schema }))
105+
}
106+
107+
pub fn filter(child_group: i32, expression: String) -> Expression {
108+
Expression::Logical(LogicalExpression::Filter(Filter {
109+
child: child_group,
110+
expression,
111+
}))
112+
}
113+
114+
pub fn join(left_group: i32, right_group: i32, expression: String) -> Expression {
115+
Expression::Logical(LogicalExpression::Join(Join {
116+
left: left_group,
117+
right: right_group,
118+
expression,
119+
}))
120+
}
121+
}

optd-mvp/src/expression/mod.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//! In-memory representation of Cascades logical and physical expression / operators / relations.
2+
//!
3+
//! TODO more docs.
4+
5+
mod logical_expression;
6+
pub use logical_expression::*;
7+
8+
mod physical_expression;
9+
pub use physical_expression::*;
10+
11+
/// The representation of a Cascades expression.
12+
///
13+
/// TODO more docs.
14+
#[derive(Clone, Debug)]
15+
pub enum Expression {
16+
Logical(LogicalExpression),
17+
Physical(PhysicalExpression),
18+
}
19+
20+
/// Converts the database / JSON representation of a logical expression into an in-memory one.
21+
impl From<crate::entities::logical_expression::Model> for Expression {
22+
fn from(value: crate::entities::logical_expression::Model) -> Self {
23+
Self::Logical(value.into())
24+
}
25+
}
26+
27+
/// Converts the in-memory representation of a logical expression into the database / JSON version.
28+
///
29+
/// # Panics
30+
///
31+
/// This will panic if the [`Expression`] is [`Expression::Physical`].
32+
impl From<Expression> for crate::entities::logical_expression::Model {
33+
fn from(value: Expression) -> Self {
34+
let Expression::Logical(expr) = value else {
35+
panic!("Attempted to convert an in-memory physical expression into a logical database / JSON expression");
36+
};
37+
38+
expr.into()
39+
}
40+
}
41+
42+
/// Converts the database / JSON representation of a physical expression into an in-memory one.
43+
impl From<crate::entities::physical_expression::Model> for Expression {
44+
fn from(value: crate::entities::physical_expression::Model) -> Self {
45+
Self::Physical(value.into())
46+
}
47+
}
48+
49+
/// Converts the in-memory representation of a physical expression into the database / JSON version.
50+
///
51+
/// # Panics
52+
///
53+
/// This will panic if the [`Expression`] is [`Expression::Physical`].
54+
impl From<Expression> for crate::entities::physical_expression::Model {
55+
fn from(value: Expression) -> Self {
56+
let Expression::Physical(expr) = value else {
57+
panic!("Attempted to convert an in-memory logical expression into a physical database / JSON expression");
58+
};
59+
60+
expr.into()
61+
}
62+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
//! Definition of physical expressions / operators in the Cascades query optimization framework.
2+
//!
3+
//! FIXME: All fields are placeholders, and group IDs are just represented as i32 for now.
4+
//!
5+
//! TODO figure out if each operator should be in a different submodule.
6+
7+
use crate::entities::*;
8+
use serde::{Deserialize, Serialize};
9+
use std::hash::{DefaultHasher, Hash, Hasher};
10+
11+
#[derive(Clone, Debug)]
12+
pub enum PhysicalExpression {
13+
TableScan(TableScan),
14+
Filter(PhysicalFilter),
15+
HashJoin(HashJoin),
16+
}
17+
18+
#[derive(Serialize, Deserialize, Clone, Debug)]
19+
pub struct TableScan {
20+
table_schema: String,
21+
}
22+
23+
#[derive(Serialize, Deserialize, Clone, Debug)]
24+
pub struct PhysicalFilter {
25+
child: i32,
26+
expression: String,
27+
}
28+
29+
#[derive(Serialize, Deserialize, Clone, Debug)]
30+
pub struct HashJoin {
31+
left: i32,
32+
right: i32,
33+
expression: String,
34+
}
35+
36+
/// TODO Use a macro instead.
37+
impl From<physical_expression::Model> for PhysicalExpression {
38+
fn from(value: physical_expression::Model) -> Self {
39+
match value.kind {
40+
0 => Self::TableScan(
41+
serde_json::from_value(value.data)
42+
.expect("unable to deserialize data into a physical `TableScan`"),
43+
),
44+
1 => Self::Filter(
45+
serde_json::from_value(value.data)
46+
.expect("Unable to deserialize data into a physical `Filter`"),
47+
),
48+
2 => Self::HashJoin(
49+
serde_json::from_value(value.data)
50+
.expect("Unable to deserialize data into a physical `HashJoin`"),
51+
),
52+
_ => panic!(),
53+
}
54+
}
55+
}
56+
57+
/// TODO Use a macro instead.
58+
impl From<PhysicalExpression> for physical_expression::Model {
59+
fn from(value: PhysicalExpression) -> physical_expression::Model {
60+
fn create_physical_expression(
61+
kind: i16,
62+
data: serde_json::Value,
63+
) -> physical_expression::Model {
64+
let mut hasher = DefaultHasher::new();
65+
kind.hash(&mut hasher);
66+
data.hash(&mut hasher);
67+
let fingerprint = hasher.finish() as i64;
68+
69+
physical_expression::Model {
70+
id: -1,
71+
group_id: -1,
72+
fingerprint,
73+
kind,
74+
data,
75+
}
76+
}
77+
78+
match value {
79+
PhysicalExpression::TableScan(scan) => create_physical_expression(
80+
0,
81+
serde_json::to_value(scan).expect("unable to serialize physical `TableScan`"),
82+
),
83+
PhysicalExpression::Filter(filter) => create_physical_expression(
84+
1,
85+
serde_json::to_value(filter).expect("unable to serialize physical `Filter`"),
86+
),
87+
PhysicalExpression::HashJoin(join) => create_physical_expression(
88+
2,
89+
serde_json::to_value(join).expect("unable to serialize physical `HashJoin`"),
90+
),
91+
}
92+
}
93+
}
94+
95+
#[cfg(test)]
96+
pub use build::*;
97+
98+
#[cfg(test)]
99+
mod build {
100+
use super::*;
101+
use crate::expression::Expression;
102+
103+
pub fn table_scan(table_schema: String) -> Expression {
104+
Expression::Physical(PhysicalExpression::TableScan(TableScan { table_schema }))
105+
}
106+
107+
pub fn filter(child_group: i32, expression: String) -> Expression {
108+
Expression::Physical(PhysicalExpression::Filter(PhysicalFilter {
109+
child: child_group,
110+
expression,
111+
}))
112+
}
113+
114+
pub fn hash_join(left_group: i32, right_group: i32, expression: String) -> Expression {
115+
Expression::Physical(PhysicalExpression::HashJoin(HashJoin {
116+
left: left_group,
117+
right: right_group,
118+
expression,
119+
}))
120+
}
121+
}

optd-mvp/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ mod entities;
1010
mod memo;
1111
use memo::MemoError;
1212

13+
mod expression;
14+
1315
/// The filename of the SQLite database for migration.
1416
pub const DATABASE_FILENAME: &str = "sqlite.db";
1517
/// The URL of the SQLite database for migration.

optd-mvp/src/memo/interface.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//! This module defines the [`Memo`] trait, which defines shared behavior of all memo table that can
2+
//! be used for query optimization in the Cascades framework.
3+
14
use crate::OptimizerResult;
25
use thiserror::Error;
36

@@ -96,6 +99,9 @@ pub trait Memo {
9699
/// [`MemoError::InvalidExpression`] error.
97100
///
98101
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
102+
///
103+
/// FIXME: This needs to have a mechanism of reporting that a duplicate expression was found in
104+
/// another group.
99105
async fn add_logical_expression_to_group(
100106
&self,
101107
group_id: Self::GroupId,
@@ -114,6 +120,9 @@ pub trait Memo {
114120
/// [`MemoError::InvalidExpression`] error.
115121
///
116122
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
123+
///
124+
/// FIXME: This needs to have a mechanism of reporting that a duplicate expression was found in
125+
/// another group.
117126
async fn add_physical_expression_to_group(
118127
&self,
119128
group_id: Self::GroupId,

0 commit comments

Comments
 (0)