Skip to content

Commit 472ad48

Browse files
committed
fix: fix update_stats_from_catalog and improve the test
1 parent a0d6b32 commit 472ad48

File tree

3 files changed

+78
-22
lines changed

3 files changed

+78
-22
lines changed

optd-persistent/src/cost_model/catalog/mock_catalog.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
use sea_orm::prelude::Json;
2+
use serde_json::json;
3+
14
use crate::cost_model::interface::{AttrType, IndexType, StatType};
25

36
pub struct MockDatabaseMetadata {
@@ -31,7 +34,7 @@ pub struct MockStatistic {
3134
pub id: i32,
3235
pub stat_type: i32,
3336
// TODO(lanlou): what should I use for the value type?
34-
pub stat_value: String,
37+
pub stat_value: Json,
3538
pub attr_ids: Vec<i32>,
3639
pub table_id: Option<i32>,
3740
pub name: String,
@@ -111,23 +114,23 @@ impl MockCatalog {
111114
MockStatistic {
112115
id: 1,
113116
stat_type: StatType::Count as i32,
114-
stat_value: "100".to_string(),
117+
stat_value: json!(100),
115118
attr_ids: vec![1],
116119
table_id: None,
117120
name: "CountAttr1".to_string(),
118121
},
119122
MockStatistic {
120123
id: 2,
121124
stat_type: StatType::Count as i32,
122-
stat_value: "200".to_string(),
125+
stat_value: json!(200),
123126
attr_ids: vec![2],
124127
table_id: None,
125128
name: "CountAttr2".to_string(),
126129
},
127130
MockStatistic {
128131
id: 3,
129132
stat_type: StatType::Count as i32,
130-
stat_value: "300".to_string(),
133+
stat_value: json!(300),
131134
attr_ids: vec![],
132135
table_id: Some(1),
133136
name: "Table1Count".to_string(),

optd-persistent/src/cost_model/interface.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ pub enum ConstraintType {
4141

4242
/// TODO: documentation
4343
pub enum StatType {
44+
// TODO(lanlou): I am not sure which way to represent the type is better.
45+
// 1. `Count` means row count, (i.e. record count), and it only applies to
46+
// table statistics. In this way, we should introduce `NotNullCount` for attribute
47+
// statistics to indicate the number of non-null values.
48+
// 2. `Count` means the number of non-null values, and it applies to both table
49+
// and attribute statistics. (Will a table have a record with null values in all
50+
// attributes?)
51+
// For now, we just use the second way for simplicity.
4452
Count,
4553
Cardinality,
4654
Min,
@@ -78,11 +86,7 @@ pub trait CostModelStorageLayer {
7886
// TODO: Change EpochId to event::Model::epoch_id
7987
async fn create_new_epoch(&self, source: String, data: String) -> StorageResult<Self::EpochId>;
8088

81-
async fn update_stats_from_catalog(
82-
&self,
83-
c: CatalogSource,
84-
epoch_id: Self::EpochId,
85-
) -> StorageResult<()>;
89+
async fn update_stats_from_catalog(&self, c: CatalogSource) -> StorageResult<Self::EpochId>;
8690

8791
async fn update_stats(
8892
&self,

optd-persistent/src/cost_model/orm.rs

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,24 @@ impl CostModelStorageLayer for BackendManager {
5151
}
5252

5353
/// TODO: documentation
54-
async fn update_stats_from_catalog(
55-
&self,
56-
c: CatalogSource,
57-
epoch_id: Self::EpochId,
58-
) -> StorageResult<()> {
54+
async fn update_stats_from_catalog(&self, c: CatalogSource) -> StorageResult<Self::EpochId> {
55+
let transaction = self.db.begin().await?;
56+
let source = match c {
57+
CatalogSource::Mock => "Mock",
58+
CatalogSource::Iceberg() => "Iceberg",
59+
};
60+
let new_event = event::ActiveModel {
61+
source_variant: sea_orm::ActiveValue::Set(source.to_string()),
62+
timestamp: sea_orm::ActiveValue::Set(Utc::now()),
63+
data: sea_orm::ActiveValue::Set(sea_orm::JsonValue::String(
64+
"Update stats from catalog".to_string(),
65+
)),
66+
..Default::default()
67+
};
68+
let epoch_id = Event::insert(new_event)
69+
.exec(&transaction)
70+
.await?
71+
.last_insert_id;
5972
match c {
6073
CatalogSource::Mock => {
6174
let mock_catalog = MockCatalog::new();
@@ -66,7 +79,7 @@ impl CostModelStorageLayer for BackendManager {
6679
..Default::default()
6780
}
6881
}))
69-
.exec(&self.db)
82+
.exec(&transaction)
7083
.await?;
7184
NamespaceMetadata::insert_many(mock_catalog.namespaces.iter().map(|namespace| {
7285
namespace_metadata::ActiveModel {
@@ -76,7 +89,7 @@ impl CostModelStorageLayer for BackendManager {
7689
..Default::default()
7790
}
7891
}))
79-
.exec(&self.db)
92+
.exec(&transaction)
8093
.await?;
8194
TableMetadata::insert_many(mock_catalog.tables.iter().map(|table| {
8295
table_metadata::ActiveModel {
@@ -86,7 +99,7 @@ impl CostModelStorageLayer for BackendManager {
8699
..Default::default()
87100
}
88101
}))
89-
.exec(&self.db)
102+
.exec(&transaction)
90103
.await?;
91104
Attribute::insert_many(mock_catalog.attributes.iter().map(|attr| {
92105
attribute::ActiveModel {
@@ -101,7 +114,7 @@ impl CostModelStorageLayer for BackendManager {
101114
..Default::default()
102115
}
103116
}))
104-
.exec(&self.db)
117+
.exec(&transaction)
105118
.await?;
106119
Statistic::insert_many(mock_catalog.statistics.iter().map(|stat| {
107120
statistic::ActiveModel {
@@ -116,7 +129,29 @@ impl CostModelStorageLayer for BackendManager {
116129
..Default::default()
117130
}
118131
}))
119-
.exec(&self.db)
132+
.exec(&transaction)
133+
.await?;
134+
VersionedStatistic::insert_many(mock_catalog.statistics.iter().map(|stat| {
135+
versioned_statistic::ActiveModel {
136+
epoch_id: sea_orm::ActiveValue::Set(epoch_id),
137+
statistic_id: sea_orm::ActiveValue::Set(stat.id),
138+
statistic_value: sea_orm::ActiveValue::Set(stat.stat_value.clone()),
139+
..Default::default()
140+
}
141+
}))
142+
.exec(&transaction)
143+
.await?;
144+
StatisticToAttributeJunction::insert_many(mock_catalog.statistics.iter().flat_map(
145+
|stat| {
146+
stat.attr_ids.iter().map(move |attr_id| {
147+
statistic_to_attribute_junction::ActiveModel {
148+
statistic_id: sea_orm::ActiveValue::Set(stat.id),
149+
attribute_id: sea_orm::ActiveValue::Set(*attr_id),
150+
}
151+
})
152+
},
153+
))
154+
.exec(&transaction)
120155
.await?;
121156
IndexMetadata::insert_many(
122157
mock_catalog
@@ -140,12 +175,13 @@ impl CostModelStorageLayer for BackendManager {
140175
..Default::default()
141176
}),
142177
)
143-
.exec(&self.db)
178+
.exec(&transaction)
144179
.await?;
145-
Ok(())
146180
}
147181
CatalogSource::Iceberg() => todo!(),
148182
}
183+
transaction.commit().await?;
184+
Ok(epoch_id)
149185
}
150186

151187
/// TODO: improve the documentation
@@ -551,15 +587,28 @@ mod tests {
551587
let mut binding = super::BackendManager::new(Some(&database_url)).await;
552588
let backend_manager = binding.as_mut().unwrap();
553589
let res = backend_manager
554-
.update_stats_from_catalog(super::CatalogSource::Mock, 1)
590+
.update_stats_from_catalog(super::CatalogSource::Mock)
555591
.await;
556592
println!("{:?}", res);
557593
assert!(res.is_ok());
594+
let epoch_id = res.unwrap();
595+
assert_eq!(epoch_id, 1);
558596

559597
let lookup_res = Statistic::find().all(&backend_manager.db).await.unwrap();
560598
println!("{:?}", lookup_res);
561599
assert_eq!(lookup_res.len(), 3);
562600

601+
let stat_res = backend_manager
602+
.get_stats_for_table(1, StatType::Count as i32, Some(epoch_id))
603+
.await;
604+
assert!(stat_res.is_ok());
605+
assert_eq!(stat_res.unwrap().unwrap(), json!(300));
606+
let stat_res = backend_manager
607+
.get_stats_for_attr([2].to_vec(), StatType::Count as i32, None)
608+
.await;
609+
assert!(stat_res.is_ok());
610+
assert_eq!(stat_res.unwrap().unwrap(), json!(200));
611+
563612
remove_db_file(DATABASE_FILE);
564613
}
565614

0 commit comments

Comments
 (0)