Skip to content

Commit a6e4807

Browse files
authored
chore(query): add iceberg table metadata cache (#17780)
* feat(query): add iceberg table metadata cache * feat(query): add iceberg table metadata cache * feat(query): add iceberg table metadata cache * feat(query): add iceberg table metadata cache
1 parent 3dc7840 commit a6e4807

File tree

31 files changed

+356
-87
lines changed

31 files changed

+356
-87
lines changed

Cargo.lock

Lines changed: 2 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/meta/app/src/schema/catalog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ pub struct IcebergGlueCatalogOption {
129129
/// Same as `CatalogNameIdent`, but with `serde` support,
130130
/// and can be used a s part of a value.
131131
// #[derive(Clone, Debug, PartialEq, Eq)]
132-
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)]
132+
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Hash, Eq)]
133133
pub struct CatalogName {
134134
pub tenant: String,
135135
pub catalog_name: String,

src/query/catalog/src/catalog/interface.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use std::any::Any;
1616
use std::fmt::Debug;
1717
use std::sync::Arc;
1818

19-
use databend_common_config::InnerConfig;
2019
use databend_common_exception::ErrorCode;
2120
use databend_common_exception::Result;
2221
use databend_common_meta_app::schema::database_name_ident::DatabaseNameIdent;
@@ -106,7 +105,6 @@ use databend_common_meta_app::schema::UpsertTableOptionReply;
106105
use databend_common_meta_app::schema::UpsertTableOptionReq;
107106
use databend_common_meta_app::schema::VirtualColumnMeta;
108107
use databend_common_meta_app::tenant::Tenant;
109-
use databend_common_meta_store::MetaStore;
110108
use databend_common_meta_types::MetaId;
111109
use databend_common_meta_types::SeqV;
112110
use databend_storages_common_session::SessionState;
@@ -127,12 +125,7 @@ pub struct StorageDescription {
127125
}
128126

129127
pub trait CatalogCreator: Send + Sync + Debug {
130-
fn try_create(
131-
&self,
132-
info: Arc<CatalogInfo>,
133-
conf: InnerConfig,
134-
meta: &MetaStore,
135-
) -> Result<Arc<dyn Catalog>>;
128+
fn try_create(&self, info: Arc<CatalogInfo>) -> Result<Arc<dyn Catalog>>;
136129
}
137130

138131
#[async_trait::async_trait]

src/query/catalog/src/catalog/manager.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,6 @@ pub struct CatalogManager {
5454

5555
/// catalog_creators is the catalog creators that registered.
5656
pub catalog_creators: HashMap<CatalogType, Arc<dyn CatalogCreator>>,
57-
58-
conf: InnerConfig,
5957
}
6058

6159
impl CatalogManager {
@@ -114,7 +112,7 @@ impl CatalogManager {
114112
created_on: Utc::now(),
115113
},
116114
};
117-
let ctl = creator.try_create(Arc::new(ctl_info), conf.to_owned(), &meta)?;
115+
let ctl = creator.try_create(Arc::new(ctl_info))?;
118116
external_catalogs.insert(name.clone(), ctl);
119117
}
120118

@@ -123,7 +121,6 @@ impl CatalogManager {
123121
default_catalog,
124122
external_catalogs,
125123
catalog_creators,
126-
conf: conf.to_owned(),
127124
};
128125

129126
Ok(Arc::new(catalog_manager))
@@ -153,8 +150,7 @@ impl CatalogManager {
153150
.catalog_creators
154151
.get(&typ)
155152
.ok_or_else(|| ErrorCode::BadArguments(format!("unknown catalog type: {:?}", typ)))?;
156-
157-
creator.try_create(info, self.conf.clone(), &self.meta)
153+
creator.try_create(info)
158154
}
159155

160156
/// Get a catalog from manager.
@@ -184,7 +180,6 @@ impl CatalogManager {
184180

185181
// Get catalog from metasrv.
186182
let info = self.meta.get_catalog(&ident).await?;
187-
188183
self.build_catalog(info, session_state)
189184
}
190185

src/query/catalog/src/database.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ pub trait Database: DynClone + Sync + Send {
108108
Ok(vec![])
109109
}
110110

111+
#[async_backtrace::framed]
112+
async fn trigger_use(&self) -> Result<()> {
113+
Ok(())
114+
}
115+
111116
#[async_backtrace::framed]
112117
async fn list_tables_names(&self) -> Result<Vec<String>> {
113118
Ok(vec![])

src/query/config/src/config.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,13 @@ pub struct CacheConfig {
30703070
)]
30713071
pub table_data_deserialized_memory_ratio: u64,
30723072

3073+
#[clap(
3074+
long = "cache-iceberg-table-meta-count",
3075+
value_name = "VALUE",
3076+
default_value = "1024"
3077+
)]
3078+
pub iceberg_table_meta_count: u64,
3079+
30733080
// ----- the following options/args are all deprecated ----
30743081
/// Max number of cached table segment
30753082
#[clap(long = "cache-table-meta-segment-count", value_name = "VALUE")]
@@ -3297,6 +3304,7 @@ mod cache_config_converters {
32973304
data_cache_key_reload_policy: value.data_cache_key_reload_policy.try_into()?,
32983305
table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes,
32993306
table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio,
3307+
iceberg_table_meta_count: value.iceberg_table_meta_count,
33003308
disk_cache_table_bloom_index_meta_size: value
33013309
.disk_cache_table_bloom_index_meta_size,
33023310
})
@@ -3330,6 +3338,7 @@ mod cache_config_converters {
33303338
disk_cache_config: value.disk_cache_config.into(),
33313339
table_data_deserialized_data_bytes: value.table_data_deserialized_data_bytes,
33323340
table_data_deserialized_memory_ratio: value.table_data_deserialized_memory_ratio,
3341+
iceberg_table_meta_count: value.iceberg_table_meta_count,
33333342
table_meta_segment_count: None,
33343343
segment_block_metas_count: value.segment_block_metas_count,
33353344
}

src/query/config/src/inner.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,9 @@ pub struct CacheConfig {
636636
/// Only if query nodes have plenty of un-utilized memory, the working set can be fitted into,
637637
/// and the access pattern will benefit from caching, consider enabled this cache.
638638
pub table_data_deserialized_memory_ratio: u64,
639+
640+
/// Max number of cached table count of iceberg tables
641+
pub iceberg_table_meta_count: u64,
639642
}
640643

641644
#[derive(Clone, Debug, PartialEq, Eq)]
@@ -733,6 +736,7 @@ impl Default for CacheConfig {
733736
data_cache_key_reload_policy: Default::default(),
734737
table_data_deserialized_data_bytes: 0,
735738
table_data_deserialized_memory_ratio: 0,
739+
iceberg_table_meta_count: 1024,
736740
}
737741
}
738742
}

src/query/service/src/interpreters/interpreter_use_database.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,12 @@ impl Interpreter for UseDatabaseInterpreter {
5050
if self.plan.database.trim().is_empty() {
5151
return Err(ErrorCode::UnknownDatabase("No database selected"));
5252
}
53-
self.ctx
53+
let db = self
54+
.ctx
5455
.set_current_database(self.plan.database.clone())
5556
.await?;
57+
db.trigger_use().await?;
58+
5659
self.ctx.set_affect(QueryAffect::UseDB {
5760
name: self.plan.database.clone(),
5861
});

src/query/service/src/sessions/query_ctx.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ use databend_common_base::runtime::MemStat;
4141
use databend_common_base::runtime::TrySpawn;
4242
use databend_common_base::JoinHandle;
4343
use databend_common_catalog::catalog::CATALOG_DEFAULT;
44+
use databend_common_catalog::database::Database;
4445
use databend_common_catalog::lock::LockTableOption;
4546
use databend_common_catalog::merge_into_join::MergeIntoJoin;
4647
use databend_common_catalog::plan::DataSourceInfo;
@@ -257,22 +258,26 @@ impl QueryContext {
257258
}
258259

259260
#[async_backtrace::framed]
260-
pub async fn set_current_database(&self, new_database_name: String) -> Result<()> {
261+
pub async fn set_current_database(
262+
&self,
263+
new_database_name: String,
264+
) -> Result<Arc<dyn Database>> {
261265
let tenant_id = self.get_tenant();
262266
let catalog = self
263267
.get_catalog(self.get_current_catalog().as_str())
264268
.await?;
265269
match catalog.get_database(&tenant_id, &new_database_name).await {
266-
Ok(_) => self.shared.set_current_database(new_database_name),
270+
Ok(db) => {
271+
self.shared.set_current_database(new_database_name);
272+
Ok(db)
273+
}
267274
Err(_) => {
268275
return Err(ErrorCode::UnknownDatabase(format!(
269276
"Cannot use database '{}': It does not exist.",
270277
new_database_name
271278
)));
272279
}
273-
};
274-
275-
Ok(())
280+
}
276281
}
277282

278283
pub fn attach_table(&self, catalog: &str, database: &str, name: &str, table: Arc<dyn Table>) {

src/query/service/tests/it/storages/testdata/caches_table.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ DB.Table: 'system'.'caches', Table: caches-table_id:1, ver:0, Engine: SystemCach
88
| 'test-node' | 'memory_cache_bloom_index_filter' | 0 | 0 | 2147483648 | 'bytes' | 0 | 0 | 0 |
99
| 'test-node' | 'memory_cache_column_oriented_segment_info' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
1010
| 'test-node' | 'memory_cache_compact_segment_info' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
11+
| 'test-node' | 'memory_cache_iceberg_table' | 0 | 0 | 1024 | 'count' | 0 | 0 | 0 |
1112
| 'test-node' | 'memory_cache_inverted_index_file' | 0 | 0 | 2147483648 | 'bytes' | 0 | 0 | 0 |
1213
| 'test-node' | 'memory_cache_inverted_index_file_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |
1314
| 'test-node' | 'memory_cache_parquet_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |

0 commit comments

Comments
 (0)