Skip to content

Commit 174171a

Browse files
authored
Merge pull request #16 from cmu-db/cost-model-refine-schema
Modify cost model stats table schema to support update_stats
2 parents ee45576 + d8daabd commit 174171a

15 files changed

+330
-68
lines changed

optd-persistent/src/entities/event.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ pub enum Relation {
1818
GroupWinner,
1919
#[sea_orm(has_many = "super::plan_cost::Entity")]
2020
PlanCost,
21-
#[sea_orm(has_many = "super::statistic::Entity")]
22-
Statistic,
21+
#[sea_orm(has_many = "super::versioned_statistic::Entity")]
22+
VersionedStatistic,
2323
}
2424

2525
impl Related<super::group_winner::Entity> for Entity {
@@ -34,9 +34,9 @@ impl Related<super::plan_cost::Entity> for Entity {
3434
}
3535
}
3636

37-
impl Related<super::statistic::Entity> for Entity {
37+
impl Related<super::versioned_statistic::Entity> for Entity {
3838
fn to() -> RelationDef {
39-
Relation::Statistic.def()
39+
Relation::VersionedStatistic.def()
4040
}
4141
}
4242

optd-persistent/src/entities/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ pub mod logical_property;
1717
pub mod namespace_metadata;
1818
pub mod physical_children;
1919
pub mod physical_expression;
20+
pub mod physical_expression_to_statistic_junction;
2021
pub mod physical_property;
2122
pub mod plan_cost;
2223
pub mod statistic;
2324
pub mod statistic_to_attribute_junction;
2425
pub mod table_metadata;
2526
pub mod trigger;
27+
pub mod versioned_statistic;

optd-persistent/src/entities/physical_expression.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ pub enum Relation {
2727
GroupWinner,
2828
#[sea_orm(has_many = "super::physical_children::Entity")]
2929
PhysicalChildren,
30+
#[sea_orm(has_many = "super::physical_expression_to_statistic_junction::Entity")]
31+
PhysicalExpressionToStatisticJunction,
3032
#[sea_orm(has_many = "super::physical_property::Entity")]
3133
PhysicalProperty,
3234
#[sea_orm(has_many = "super::plan_cost::Entity")]
@@ -45,6 +47,12 @@ impl Related<super::physical_children::Entity> for Entity {
4547
}
4648
}
4749

50+
impl Related<super::physical_expression_to_statistic_junction::Entity> for Entity {
51+
fn to() -> RelationDef {
52+
Relation::PhysicalExpressionToStatisticJunction.def()
53+
}
54+
}
55+
4856
impl Related<super::physical_property::Entity> for Entity {
4957
fn to() -> RelationDef {
5058
Relation::PhysicalProperty.def()
@@ -70,4 +78,17 @@ impl Related<super::cascades_group::Entity> for Entity {
7078
}
7179
}
7280

81+
impl Related<super::statistic::Entity> for Entity {
82+
fn to() -> RelationDef {
83+
super::physical_expression_to_statistic_junction::Relation::Statistic.def()
84+
}
85+
fn via() -> Option<RelationDef> {
86+
Some(
87+
super::physical_expression_to_statistic_junction::Relation::PhysicalExpression
88+
.def()
89+
.rev(),
90+
)
91+
}
92+
}
93+
7394
impl ActiveModelBehavior for ActiveModel {}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0
2+
3+
use sea_orm::entity::prelude::*;
4+
5+
#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
6+
#[sea_orm(table_name = "physical_expression_to_statistic_junction")]
7+
pub struct Model {
8+
#[sea_orm(primary_key, auto_increment = false)]
9+
pub physical_expression_id: i32,
10+
#[sea_orm(primary_key, auto_increment = false)]
11+
pub statistic_id: i32,
12+
}
13+
14+
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
15+
pub enum Relation {
16+
#[sea_orm(
17+
belongs_to = "super::physical_expression::Entity",
18+
from = "Column::PhysicalExpressionId",
19+
to = "super::physical_expression::Column::Id",
20+
on_update = "Cascade",
21+
on_delete = "Cascade"
22+
)]
23+
PhysicalExpression,
24+
#[sea_orm(
25+
belongs_to = "super::statistic::Entity",
26+
from = "Column::StatisticId",
27+
to = "super::statistic::Column::Id",
28+
on_update = "Cascade",
29+
on_delete = "Cascade"
30+
)]
31+
Statistic,
32+
}
33+
34+
impl Related<super::physical_expression::Entity> for Entity {
35+
fn to() -> RelationDef {
36+
Relation::PhysicalExpression.def()
37+
}
38+
}
39+
40+
impl Related<super::statistic::Entity> for Entity {
41+
fn to() -> RelationDef {
42+
Relation::Statistic.def()
43+
}
44+
}
45+
46+
impl ActiveModelBehavior for ActiveModel {}

optd-persistent/src/entities/prelude.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ pub use super::logical_property::Entity as LogicalProperty;
1515
pub use super::namespace_metadata::Entity as NamespaceMetadata;
1616
pub use super::physical_children::Entity as PhysicalChildren;
1717
pub use super::physical_expression::Entity as PhysicalExpression;
18+
pub use super::physical_expression_to_statistic_junction::Entity as PhysicalExpressionToStatisticJunction;
1819
pub use super::physical_property::Entity as PhysicalProperty;
1920
pub use super::plan_cost::Entity as PlanCost;
2021
pub use super::statistic::Entity as Statistic;
2122
pub use super::statistic_to_attribute_junction::Entity as StatisticToAttributeJunction;
2223
pub use super::table_metadata::Entity as TableMetadata;
2324
pub use super::trigger::Entity as Trigger;
25+
pub use super::versioned_statistic::Entity as VersionedStatistic;

optd-persistent/src/entities/statistic.rs

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,23 @@
22
33
use sea_orm::entity::prelude::*;
44

5-
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
5+
#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
66
#[sea_orm(table_name = "statistic")]
77
pub struct Model {
88
#[sea_orm(primary_key)]
99
pub id: i32,
1010
pub name: String,
1111
pub table_id: i32,
12-
pub epoch_id: i32,
1312
pub created_time: DateTimeUtc,
1413
pub number_of_attributes: i32,
1514
pub statistic_type: i32,
16-
#[sea_orm(column_type = "Float")]
17-
pub statistic_value: f32,
15+
pub description: String,
1816
}
1917

2018
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
2119
pub enum Relation {
22-
#[sea_orm(
23-
belongs_to = "super::event::Entity",
24-
from = "Column::EpochId",
25-
to = "super::event::Column::EpochId",
26-
on_update = "Cascade",
27-
on_delete = "Cascade"
28-
)]
29-
Event,
20+
#[sea_orm(has_many = "super::physical_expression_to_statistic_junction::Entity")]
21+
PhysicalExpressionToStatisticJunction,
3022
#[sea_orm(has_many = "super::statistic_to_attribute_junction::Entity")]
3123
StatisticToAttributeJunction,
3224
#[sea_orm(
@@ -37,11 +29,13 @@ pub enum Relation {
3729
on_delete = "Cascade"
3830
)]
3931
TableMetadata,
32+
#[sea_orm(has_many = "super::versioned_statistic::Entity")]
33+
VersionedStatistic,
4034
}
4135

42-
impl Related<super::event::Entity> for Entity {
36+
impl Related<super::physical_expression_to_statistic_junction::Entity> for Entity {
4337
fn to() -> RelationDef {
44-
Relation::Event.def()
38+
Relation::PhysicalExpressionToStatisticJunction.def()
4539
}
4640
}
4741

@@ -57,6 +51,12 @@ impl Related<super::table_metadata::Entity> for Entity {
5751
}
5852
}
5953

54+
impl Related<super::versioned_statistic::Entity> for Entity {
55+
fn to() -> RelationDef {
56+
Relation::VersionedStatistic.def()
57+
}
58+
}
59+
6060
impl Related<super::attribute::Entity> for Entity {
6161
fn to() -> RelationDef {
6262
super::statistic_to_attribute_junction::Relation::Attribute.def()
@@ -70,4 +70,17 @@ impl Related<super::attribute::Entity> for Entity {
7070
}
7171
}
7272

73+
impl Related<super::physical_expression::Entity> for Entity {
74+
fn to() -> RelationDef {
75+
super::physical_expression_to_statistic_junction::Relation::PhysicalExpression.def()
76+
}
77+
fn via() -> Option<RelationDef> {
78+
Some(
79+
super::physical_expression_to_statistic_junction::Relation::Statistic
80+
.def()
81+
.rev(),
82+
)
83+
}
84+
}
85+
7386
impl ActiveModelBehavior for ActiveModel {}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0
2+
3+
use sea_orm::entity::prelude::*;
4+
5+
#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
6+
#[sea_orm(table_name = "versioned_statistic")]
7+
pub struct Model {
8+
#[sea_orm(primary_key)]
9+
pub id: i32,
10+
pub epoch_id: i32,
11+
pub statistic_id: i32,
12+
pub statistic_value: Json,
13+
}
14+
15+
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
16+
pub enum Relation {
17+
#[sea_orm(
18+
belongs_to = "super::event::Entity",
19+
from = "Column::EpochId",
20+
to = "super::event::Column::EpochId",
21+
on_update = "Cascade",
22+
on_delete = "Cascade"
23+
)]
24+
Event,
25+
#[sea_orm(
26+
belongs_to = "super::statistic::Entity",
27+
from = "Column::StatisticId",
28+
to = "super::statistic::Column::Id",
29+
on_update = "Cascade",
30+
on_delete = "Cascade"
31+
)]
32+
Statistic,
33+
}
34+
35+
impl Related<super::event::Entity> for Entity {
36+
fn to() -> RelationDef {
37+
Relation::Event.def()
38+
}
39+
}
40+
41+
impl Related<super::statistic::Entity> for Entity {
42+
fn to() -> RelationDef {
43+
Relation::Statistic.def()
44+
}
45+
}
46+
47+
impl ActiveModelBehavior for ActiveModel {}

optd-persistent/src/migrator/cost_model/m20241029_000001_event.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
//! Every time we insert/update statistics, we need to insert a new
2+
//! row into this table to record the event.
3+
14
use sea_orm_migration::{prelude::*, schema::*};
25

36
#[derive(Iden)]
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//! This table stores for a physical expression, which statistics are used, so we
2+
//! don't need to compute it again. It is especially useful for update_stats, where
3+
//! we need to invalidate all the costs based on the physical_expression_id, so we
4+
//! need to use this table to get the physical_expression_id via statistic_id.
5+
//!
6+
//! **NOTE:** When we compute the cost for a physical expression, we should also
7+
//! insert related mappings into this table.
8+
9+
use crate::migrator::cost_model::statistic::Statistic;
10+
use crate::migrator::memo::physical_expression::PhysicalExpression;
11+
12+
use sea_orm_migration::{prelude::*, schema::*};
13+
14+
#[derive(Iden)]
15+
pub enum PhysicalExpressionToStatisticJunction {
16+
Table,
17+
PhysicalExpressionId,
18+
StatisticId,
19+
}
20+
21+
#[derive(DeriveMigrationName)]
22+
pub struct Migration;
23+
24+
#[async_trait::async_trait]
25+
impl MigrationTrait for Migration {
26+
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
27+
manager
28+
.create_table(
29+
Table::create()
30+
.table(PhysicalExpressionToStatisticJunction::Table)
31+
.if_not_exists()
32+
.col(integer(
33+
PhysicalExpressionToStatisticJunction::PhysicalExpressionId,
34+
))
35+
.col(integer(PhysicalExpressionToStatisticJunction::StatisticId))
36+
.primary_key(
37+
Index::create()
38+
.col(PhysicalExpressionToStatisticJunction::PhysicalExpressionId)
39+
.col(PhysicalExpressionToStatisticJunction::StatisticId),
40+
)
41+
.foreign_key(
42+
ForeignKey::create()
43+
.from(
44+
PhysicalExpressionToStatisticJunction::Table,
45+
PhysicalExpressionToStatisticJunction::PhysicalExpressionId,
46+
)
47+
.to(PhysicalExpression::Table, PhysicalExpression::Id)
48+
.on_delete(ForeignKeyAction::Cascade)
49+
.on_update(ForeignKeyAction::Cascade),
50+
)
51+
.foreign_key(
52+
ForeignKey::create()
53+
.from(
54+
PhysicalExpressionToStatisticJunction::Table,
55+
PhysicalExpressionToStatisticJunction::StatisticId,
56+
)
57+
.to(Statistic::Table, Statistic::Id)
58+
.on_delete(ForeignKeyAction::Cascade)
59+
.on_update(ForeignKeyAction::Cascade),
60+
)
61+
.to_owned(),
62+
)
63+
.await
64+
}
65+
66+
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
67+
manager
68+
.drop_table(
69+
Table::drop()
70+
.table(PhysicalExpressionToStatisticJunction::Table)
71+
.to_owned(),
72+
)
73+
.await
74+
}
75+
}

optd-persistent/src/migrator/cost_model/m20241029_000001_plan_cost.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
1-
/*
2-
Table plan_cost {
3-
id integer PK
4-
physical_expression_id integer [ref: > physical_expression.id]
5-
epoch_id integer [ref: > event.epoch_id]
6-
cost integer
7-
// Whether the cost is valid or not. If the latest cost for an expr is invalid, then we need to recompute the cost.
8-
// We need to invalidate the cost when the related stats are updated.
9-
is_valid boolean
10-
}
11-
*/
1+
//! When a statistic is updated, then all the related costs should be invalidated. (IsValid is set to false)
2+
//! This design (using IsValid flag) is based on the assumption that update_stats will not be called very frequently.
3+
//! It favors the compute_cost performance over the update_stats performance.
124
135
use crate::migrator::cost_model::event::Event;
146
use crate::migrator::memo::physical_expression::PhysicalExpression;
@@ -21,6 +13,8 @@ pub enum PlanCost {
2113
PhysicalExpressionId,
2214
EpochId,
2315
Cost,
16+
// Whether the cost is valid or not. If the latest cost for an expr is invalid, then we need to recompute the cost.
17+
// We need to invalidate the cost when the related stats are updated.
2418
IsValid,
2519
}
2620

0 commit comments

Comments
 (0)