Skip to content

Commit 9b74ff7

Browse files
committed
add memo trait
This commit adds a `Memo` trait and a first draft of an implementation of the `Memo` trait via the backed ORM-mapped database. move things into `memo` submodule change to `BackendManager` merge with cost model trait and implementation Quick format fix make columns nullable for cost model (#21) add doc for cost model migrators Add mock catalog Fix some chores add memo trait move things into `memo` submodule change to `BackendManager` merge with cost model trait and implementation fix duplicate def
1 parent e1cba24 commit 9b74ff7

File tree

6 files changed

+467
-2
lines changed

6 files changed

+467
-2
lines changed

optd-persistent/src/lib.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ mod migrator;
1313
pub mod cost_model;
1414
pub use cost_model::interface::CostModelStorageLayer;
1515

16+
mod memo;
17+
pub use memo::interface::Memo;
18+
1619
/// The filename of the SQLite database for migration.
1720
pub const DATABASE_FILENAME: &str = "sqlite.db";
1821
/// The URL of the SQLite database for migration.
@@ -39,18 +42,29 @@ fn get_sqlite_url(file: &str) -> String {
3942
format!("sqlite:{}?mode=rwc", file)
4043
}
4144

42-
pub type StorageResult<T> = Result<T, BackendError>;
43-
4445
#[derive(Debug)]
4546
pub enum CostModelError {
4647
// TODO: Add more error types
4748
UnknownStatisticType,
4849
VersionedStatisticNotFound,
4950
}
5051

52+
/// TODO convert this to `thiserror`
53+
#[derive(Debug)]
54+
/// The different kinds of errors that might occur while running operations on a memo table.
55+
pub enum MemoError {
56+
UnknownGroup,
57+
UnknownLogicalExpression,
58+
UnknownPhysicalExpression,
59+
InvalidExpression,
60+
Database(DbErr),
61+
}
62+
63+
/// TODO convert this to `thiserror`
5164
#[derive(Debug)]
5265
pub enum BackendError {
5366
CostModel(CostModelError),
67+
Memo(MemoError),
5468
Database(DbErr),
5569
// TODO: Add other variants as needed for different error types
5670
}
@@ -61,12 +75,27 @@ impl From<CostModelError> for BackendError {
6175
}
6276
}
6377

78+
impl From<MemoError> for BackendError {
79+
fn from(value: MemoError) -> Self {
80+
BackendError::Memo(value)
81+
}
82+
}
83+
6484
impl From<DbErr> for BackendError {
6585
fn from(value: DbErr) -> Self {
6686
BackendError::Database(value)
6787
}
6888
}
6989

90+
impl From<DbErr> for MemoError {
91+
fn from(value: DbErr) -> Self {
92+
MemoError::Database(value)
93+
}
94+
}
95+
96+
/// A type alias for a result with [`BackendError`] as the error type.
97+
pub type StorageResult<T> = Result<T, BackendError>;
98+
7099
pub struct BackendManager {
71100
db: DatabaseConnection,
72101
}

optd-persistent/src/main.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,17 @@ use optd_persistent::DATABASE_URL;
1717

1818
#[tokio::main]
1919
async fn main() {
20+
basic_demo().await;
21+
memo_demo().await;
22+
}
23+
24+
async fn memo_demo() {
25+
let _db = Database::connect(DATABASE_URL).await.unwrap();
26+
27+
todo!()
28+
}
29+
30+
async fn basic_demo() {
2031
let db = Database::connect(DATABASE_URL).await.unwrap();
2132

2233
// Create a new `CascadesGroup`.
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use crate::entities::*;
2+
use std::hash::{DefaultHasher, Hash, Hasher};
3+
4+
/// All of the different types of fixed logical operators.
5+
///
6+
/// Note that there could be more operators that the memo table must support that are not enumerated
7+
/// in this enum, as there can be up to `2^16` different types of operators.
8+
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
9+
#[non_exhaustive]
10+
#[repr(i16)]
11+
pub enum LogicalOperator {
12+
Scan,
13+
Join,
14+
}
15+
16+
/// All of the different types of fixed physical operators.
17+
///
18+
/// Note that there could be more operators that the memo table must support that are not enumerated
19+
/// in this enum, as there can be up to `2^16` different types of operators.
20+
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
21+
#[non_exhaustive]
22+
#[repr(i16)]
23+
pub enum PhysicalOperator {
24+
TableScan,
25+
IndexScan,
26+
NestedLoopJoin,
27+
HashJoin,
28+
}
29+
30+
/// A method to generate a fingerprint used to efficiently check if two
31+
/// expressions are equivalent.
32+
///
33+
/// TODO actually make efficient.
34+
fn fingerprint(variant_tag: i16, data: &serde_json::Value) -> i64 {
35+
let mut hasher = DefaultHasher::new();
36+
37+
variant_tag.hash(&mut hasher);
38+
data.hash(&mut hasher);
39+
40+
hasher.finish() as i64
41+
}
42+
43+
impl logical_expression::Model {
44+
/// Creates a new logical expression with an unset `id` and `group_id`.
45+
pub fn new(variant_tag: LogicalOperator, data: serde_json::Value) -> Self {
46+
let tag = variant_tag as i16;
47+
let fingerprint = fingerprint(tag, &data);
48+
49+
Self {
50+
id: 0,
51+
group_id: 0,
52+
fingerprint,
53+
variant_tag: tag,
54+
data,
55+
}
56+
}
57+
}
58+
59+
impl physical_expression::Model {
60+
/// Creates a new physical expression with an unset `id` and `group_id`.
61+
pub fn new(variant_tag: PhysicalOperator, data: serde_json::Value) -> Self {
62+
let tag = variant_tag as i16;
63+
let fingerprint = fingerprint(tag, &data);
64+
65+
Self {
66+
id: 0,
67+
group_id: 0,
68+
fingerprint,
69+
variant_tag: tag,
70+
data,
71+
}
72+
}
73+
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
use crate::StorageResult;
2+
3+
/// A trait representing an implementation of a memoization table.
4+
///
5+
/// Note that we use [`trait_variant`] here in order to add bounds on every method.
6+
/// See this [blog post](
7+
/// https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html#async-fn-in-public-traits)
8+
/// for more information.
9+
///
10+
/// TODO Figure out for each when to get the ID of a record or the entire record itself.
11+
#[trait_variant::make(Send)]
12+
pub trait Memo {
13+
/// A type representing a group in the Cascades framework.
14+
type Group;
15+
/// A type representing a unique identifier for a group.
16+
type GroupId;
17+
/// A type representing a logical expression.
18+
type LogicalExpression;
19+
/// A type representing a unique identifier for a logical expression.
20+
type LogicalExpressionId;
21+
/// A type representing a physical expression.
22+
type PhysicalExpression;
23+
/// A type representing a unique identifier for a physical expression.
24+
type PhysicalExpressionId;
25+
26+
/// Retrieves a [`Self::Group`] given a [`Self::GroupId`].
27+
///
28+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
29+
async fn get_group(&self, group_id: Self::GroupId) -> StorageResult<Self::Group>;
30+
31+
/// Retrieves all group IDs that are stored in the memo table.
32+
async fn get_all_groups(&self) -> StorageResult<Vec<Self::Group>>;
33+
34+
/// Retrieves a [`Self::LogicalExpression`] given a [`Self::LogicalExpressionId`].
35+
///
36+
/// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`]
37+
/// error.
38+
async fn get_logical_expression(
39+
&self,
40+
logical_expression_id: Self::LogicalExpressionId,
41+
) -> StorageResult<Self::LogicalExpression>;
42+
43+
/// Retrieves a [`Self::PhysicalExpression`] given a [`Self::PhysicalExpressionId`].
44+
///
45+
/// If the physical expression does not exist, returns a
46+
/// [`MemoError::UnknownPhysicalExpression`] error.
47+
async fn get_physical_expression(
48+
&self,
49+
physical_expression_id: Self::PhysicalExpressionId,
50+
) -> StorageResult<Self::PhysicalExpression>;
51+
52+
/// Retrieves the parent group ID of a logical expression given its expression ID.
53+
///
54+
/// If the logical expression does not exist, returns a [`MemoError::UnknownLogicalExpression`]
55+
/// error.
56+
async fn get_group_from_logical_expression(
57+
&self,
58+
logical_expression_id: Self::LogicalExpressionId,
59+
) -> StorageResult<Self::GroupId>;
60+
61+
/// Retrieves the parent group ID of a logical expression given its expression ID.
62+
///
63+
/// If the physical expression does not exist, returns a
64+
/// [`MemoError::UnknownPhysicalExpression`] error.
65+
async fn get_group_from_physical_expression(
66+
&self,
67+
physical_expression_id: Self::PhysicalExpressionId,
68+
) -> StorageResult<Self::GroupId>;
69+
70+
/// Retrieves all of the logical expression "children" of a group.
71+
///
72+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
73+
async fn get_group_logical_expressions(
74+
&self,
75+
group_id: Self::GroupId,
76+
) -> StorageResult<Vec<Self::LogicalExpression>>;
77+
78+
/// Retrieves all of the physical expression "children" of a group.
79+
///
80+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
81+
async fn get_group_physical_expressions(
82+
&self,
83+
group_id: Self::GroupId,
84+
) -> StorageResult<Vec<Self::PhysicalExpression>>;
85+
86+
/// Retrieves the best physical query plan (winner) for a given group.
87+
///
88+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
89+
async fn get_winner(
90+
&self,
91+
group_id: Self::GroupId,
92+
) -> StorageResult<Option<Self::PhysicalExpressionId>>;
93+
94+
/// Updates / replaces a group's best physical plan (winner). Optionally returns the previous
95+
/// winner's physical expression ID.
96+
///
97+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
98+
async fn update_group_winner(
99+
&self,
100+
group_id: Self::GroupId,
101+
physical_expression_id: Self::PhysicalExpressionId,
102+
) -> StorageResult<Option<Self::PhysicalExpressionId>>;
103+
104+
/// Adds a logical expression to an existing group via its [`Self::GroupId`].
105+
///
106+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
107+
async fn add_logical_expression_to_group(
108+
&self,
109+
group_id: Self::GroupId,
110+
logical_expression: Self::LogicalExpression,
111+
) -> StorageResult<()>;
112+
113+
/// Adds a physical expression to an existing group via its [`Self::GroupId`].
114+
///
115+
/// If the group does not exist, returns a [`MemoError::UnknownGroup`] error.
116+
async fn add_physical_expression_to_group(
117+
&self,
118+
group_id: Self::GroupId,
119+
physical_expression: Self::PhysicalExpression,
120+
) -> StorageResult<()>;
121+
122+
/// Adds a new logical expression into the memo table, creating a new group if the expression
123+
/// does not already exist.
124+
///
125+
/// The [`Self::LogicalExpression`] type should have some sort of mechanism for checking if
126+
/// the expression has been seen before, and if it has already been created, then the parent
127+
/// group ID should also be retrievable.
128+
///
129+
/// If the expression already exists, then this function will return the [`Self::GroupId`] of
130+
/// the parent group and the corresponding (already existing) [`Self::LogicalExpressionId`].
131+
///
132+
/// If the expression does not exist, this function will create a new group and a new
133+
/// expression, returning brand new IDs for both.
134+
async fn add_logical_expression(
135+
&self,
136+
expression: Self::LogicalExpression,
137+
) -> StorageResult<(Self::GroupId, Self::LogicalExpressionId)>;
138+
}

optd-persistent/src/memo/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
mod expression;
2+
3+
pub mod interface;
4+
pub mod orm;

0 commit comments

Comments
 (0)