Skip to content

Commit e61e7a6

Browse files
committed
implement get_stats_for_attr_indices_based
1 parent 34b1074 commit e61e7a6

File tree

3 files changed

+109
-16
lines changed

3 files changed

+109
-16
lines changed

optd-cost-model/src/storage.rs

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#![allow(unused_variables)]
22
use std::sync::Arc;
33

4-
use optd_persistent::{cost_model::interface::Attr, BackendManager, CostModelStorageLayer};
4+
use optd_persistent::{
5+
cost_model::interface::{Attr, StatType},
6+
BackendManager, CostModelStorageLayer,
7+
};
58

69
use crate::{
710
common::types::TableId, stats::AttributeCombValueStats, CostModelError, CostModelResult,
@@ -14,8 +17,8 @@ pub struct CostModelStorageManager<S: CostModelStorageLayer> {
1417
// TODO: in-memory cache
1518
}
1619

17-
impl CostModelStorageManager<BackendManager> {
18-
pub fn new(backend_manager: Arc<BackendManager>) -> Self {
20+
impl<S: CostModelStorageLayer> CostModelStorageManager<S> {
21+
pub fn new(backend_manager: Arc<S>) -> Self {
1922
Self { backend_manager }
2023
}
2124

@@ -25,28 +28,37 @@ impl CostModelStorageManager<BackendManager> {
2528
pub async fn get_attribute_info(
2629
&self,
2730
table_id: TableId,
28-
attribute_base_index: i32,
31+
attr_base_index: i32,
2932
) -> CostModelResult<Attr> {
3033
let attr = self
3134
.backend_manager
32-
.get_attribute(table_id.into(), attribute_base_index)
35+
.get_attribute(table_id.into(), attr_base_index)
3336
.await?;
3437
attr.ok_or_else(|| {
3538
CostModelError::SemanticError(SemanticError::AttributeNotFound(
3639
table_id,
37-
attribute_base_index,
40+
attr_base_index,
3841
))
3942
})
4043
}
4144

4245
/// TODO: documentation
4346
/// TODO: if we have memory cache,
4447
/// we should add the reference. (&AttributeCombValueStats)
45-
pub fn get_attributes_comb_statistics(
48+
pub async fn get_attributes_comb_statistics(
4649
&self,
4750
table_id: TableId,
48-
attr_comb: &[usize],
51+
attr_base_indices: &[i32],
4952
) -> CostModelResult<Option<AttributeCombValueStats>> {
50-
todo!()
53+
Ok(self
54+
.backend_manager
55+
.get_stats_for_attr_indices_based(
56+
table_id.into(),
57+
attr_base_indices.to_vec(),
58+
StatType::Comb,
59+
None,
60+
)
61+
.await?
62+
.map(|json| json.into()))
5163
}
5264
}

optd-persistent/src/cost_model/interface.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ pub enum ConstraintType {
4848
/// TODO: documentation
4949
#[derive(Copy, Clone, Debug, PartialEq)]
5050
pub enum StatType {
51+
/// A combination of multiple statistics, e.g. most common values, distribution.
52+
Comb,
5153
/// `TableRowCount` only applies to table statistics.
5254
TableRowCount,
5355
NotNullCount,
@@ -135,6 +137,17 @@ pub trait CostModelStorageLayer {
135137
epoch_id: Option<EpochId>,
136138
) -> StorageResult<Option<Json>>;
137139

140+
/// Get the (joint) statistics for one or more attributes based on attribute base indices.
141+
///
142+
/// If `epoch_id` is None, it will return the latest statistics.
143+
async fn get_stats_for_attr_indices_based(
144+
&self,
145+
table_id: TableId,
146+
attr_base_indices: Vec<i32>,
147+
stat_type: StatType,
148+
epoch_id: Option<EpochId>,
149+
) -> StorageResult<Option<Json>>;
150+
138151
async fn get_cost_analysis(
139152
&self,
140153
expr_id: ExprId,

optd-persistent/src/cost_model/orm.rs

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use sea_orm::prelude::{Expr, Json};
77
use sea_orm::sea_query::Query;
88
use sea_orm::{sqlx::types::chrono::Utc, EntityTrait};
99
use sea_orm::{
10-
ActiveModelTrait, ColumnTrait, DbBackend, DbErr, DeleteResult, EntityOrSelect, ModelTrait,
11-
QueryFilter, QueryOrder, QuerySelect, QueryTrait, RuntimeErr, TransactionTrait,
10+
ActiveModelTrait, ColumnTrait, Condition, DbBackend, DbErr, DeleteResult, EntityOrSelect,
11+
ModelTrait, QueryFilter, QueryOrder, QuerySelect, QueryTrait, RuntimeErr, TransactionTrait,
1212
};
1313
use serde_json::json;
1414

@@ -18,8 +18,10 @@ use super::interface::{
1818
};
1919

2020
impl BackendManager {
21-
fn get_description_from_attr_ids(&self, attr_ids: Vec<AttrId>) -> String {
22-
let mut attr_ids = attr_ids;
21+
/// The description is to concat `attr_ids` using commas
22+
/// Note that `attr_ids` should be sorted before concatenation
23+
/// e.g. [1, 2, 3] -> "1,2,3"
24+
fn get_description_from_attr_ids(&self, mut attr_ids: Vec<AttrId>) -> String {
2325
attr_ids.sort();
2426
attr_ids
2527
.iter()
@@ -399,9 +401,6 @@ impl CostModelStorageLayer for BackendManager {
399401
epoch_id: Option<EpochId>,
400402
) -> StorageResult<Option<Json>> {
401403
let attr_num = attr_ids.len() as i32;
402-
// The description is to concat `attr_ids` using commas
403-
// Note that `attr_ids` should be sorted before concatenation
404-
// e.g. [1, 2, 3] -> "1,2,3"
405404
attr_ids.sort();
406405
let description = self.get_description_from_attr_ids(attr_ids);
407406

@@ -429,6 +428,37 @@ impl CostModelStorageLayer for BackendManager {
429428
}
430429
}
431430

431+
async fn get_stats_for_attr_indices_based(
432+
&self,
433+
table_id: TableId,
434+
attr_base_indices: Vec<i32>,
435+
stat_type: StatType,
436+
epoch_id: Option<EpochId>,
437+
) -> StorageResult<Option<Json>> {
438+
// Get the attribute ids based on table id and attribute base indices
439+
let mut condition = Condition::any();
440+
for attr_base_index in &attr_base_indices {
441+
condition = condition.add(attribute::Column::BaseAttributeNumber.eq(*attr_base_index));
442+
}
443+
let attr_ids = Attribute::find()
444+
.filter(attribute::Column::TableId.eq(table_id))
445+
.filter(condition)
446+
.all(&self.db)
447+
.await?
448+
.iter()
449+
.map(|attr| attr.id)
450+
.collect::<Vec<_>>();
451+
452+
if attr_ids.len() != attr_base_indices.len() {
453+
return Err(BackendError::BackendError(format!(
454+
"Not all attributes found for table_id {} and base indices {:?}",
455+
table_id, attr_base_indices
456+
)));
457+
}
458+
459+
self.get_stats_for_attr(attr_ids, stat_type, epoch_id).await
460+
}
461+
432462
/// TODO: documentation
433463
async fn get_cost_analysis(
434464
&self,
@@ -1212,4 +1242,42 @@ mod tests {
12121242

12131243
remove_db_file(DATABASE_FILE);
12141244
}
1245+
1246+
#[tokio::test]
1247+
async fn test_get_stats_for_attr_indices_based() {
1248+
const DATABASE_FILE: &str = "test_get_stats_for_attr_indices_based.db";
1249+
let database_url = copy_init_db(DATABASE_FILE).await;
1250+
let mut binding = super::BackendManager::new(Some(&database_url)).await;
1251+
let backend_manager = binding.as_mut().unwrap();
1252+
let epoch_id = 1;
1253+
let table_id = 1;
1254+
let attr_base_indices = vec![0, 1];
1255+
let stat_type = StatType::Cardinality;
1256+
1257+
// Statistics exist in the database
1258+
let res = backend_manager
1259+
.get_stats_for_attr_indices_based(table_id, attr_base_indices.clone(), stat_type, None)
1260+
.await
1261+
.unwrap()
1262+
.unwrap();
1263+
let cardinality = res.as_i64().unwrap();
1264+
assert_eq!(cardinality, 0);
1265+
1266+
// Statistics do not exist in the database
1267+
let attr_base_indices = vec![1];
1268+
let res = backend_manager
1269+
.get_stats_for_attr_indices_based(table_id, attr_base_indices.clone(), stat_type, None)
1270+
.await
1271+
.unwrap();
1272+
assert!(res.is_none());
1273+
1274+
// Attribute base indices not valid.
1275+
let attr_base_indices = vec![1, 2];
1276+
let res = backend_manager
1277+
.get_stats_for_attr_indices_based(table_id, attr_base_indices.clone(), stat_type, None)
1278+
.await;
1279+
assert!(res.is_err());
1280+
1281+
remove_db_file(DATABASE_FILE);
1282+
}
12151283
}

0 commit comments

Comments
 (0)