Skip to content

Commit 71dc362

Browse files
alesapinilejn
authored andcommitted
Merge pull request ClickHouse#88341 from ClickHouse/disable_catalogs_in_system_tables
Disable catalogs in system tables
1 parent d4794d3 commit 71dc362

38 files changed

+245
-66
lines changed

src/Backups/BackupEntriesCollector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ void BackupEntriesCollector::gatherDatabasesMetadata()
397397

398398
case ASTBackupQuery::ElementType::ALL:
399399
{
400-
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
400+
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true}))
401401
{
402402
if (!element.except_databases.contains(database_name))
403403
{

src/Core/Settings.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6488,7 +6488,7 @@ Query Iceberg table using the snapshot that was current at a specific timestamp.
64886488
DECLARE(Int64, iceberg_snapshot_id, 0, R"(
64896489
Query Iceberg table using the specific snapshot id.
64906490
)", 0) \
6491-
DECLARE(Bool, show_data_lake_catalogs_in_system_tables, true, R"(
6491+
DECLARE(Bool, show_data_lake_catalogs_in_system_tables, false, R"(
64926492
Enables showing data lake catalogs in system tables.
64936493
)", 0) \
64946494
DECLARE(Bool, delta_lake_enable_expression_visitor_logging, false, R"(

src/Core/SettingsChangesHistory.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,62 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
5656
{"iceberg_timezone_for_timestamptz", "UTC", "UTC", "New setting."},
5757
{"hybrid_table_auto_cast_columns", true, true, "New setting to automatically cast Hybrid table columns when segments disagree on types. Default enabled."},
5858
{"allow_experimental_hybrid_table", false, false, "Added new setting to allow the Hybrid table engine."}
59+
{"show_data_lake_catalogs_in_system_tables", true, false, "Disable catalogs in system tables by default"},
60+
{"optimize_rewrite_like_perfect_affix", false, true, "New setting"},
61+
{"allow_dynamic_type_in_join_keys", true, false, "Disallow using Dynamic type in JOIN keys by default"},
62+
{"use_skip_indexes_on_data_read", false, true, "Enabled skip index usage in read phase by default"},
63+
{"s3queue_keeper_fault_injection_probablility", 0, 0, "New setting."},
64+
{"enable_join_runtime_filters", false, false, "New setting"},
65+
{"join_runtime_filter_exact_values_limit", 10000, 10000, "New setting"},
66+
{"join_runtime_bloom_filter_bytes", 512_KiB, 512_KiB, "New setting"},
67+
{"join_runtime_bloom_filter_hash_functions", 3, 3, "New setting"},
68+
{"use_join_disjunctions_push_down", false, false, "New setting."},
69+
{"joined_block_split_single_row", false, false, "New setting"},
70+
{"rewrite_in_to_join", false, false, "New experimental setting"},
71+
{"iceberg_insert_max_rows_in_data_file", 100000, 1000000, "New setting."},
72+
{"iceberg_insert_max_bytes_in_data_file", 100000000, 100000000, "New setting."},
73+
{"delta_lake_insert_max_rows_in_data_file", 100000, 1000000, "New setting."},
74+
{"delta_lake_log_metadata", false, false, "New setting."},
75+
{"distributed_cache_prefer_bigger_buffer_size", false, false, "New setting."},
76+
{"allow_experimental_qbit_type", false, false, "New experimental setting"},
77+
{"optimize_qbit_distance_function_reads", true, true, "New setting"},
78+
{"read_from_distributed_cache_if_exists_otherwise_bypass_cache", false, false, "New setting"},
79+
{"s3_slow_all_threads_after_retryable_error", false, false, "Disable the setting by default"},
80+
{"backup_slow_all_threads_after_retryable_s3_error", false, false, "Disable the setting by default"},
81+
{"enable_http_compression", false, true, "It should be beneficial in general"},
82+
{"inject_random_order_for_select_without_order_by", false, false, "New setting"},
83+
{"exclude_materialize_skip_indexes_on_insert", "", "", "New setting."},
84+
{"optimize_empty_string_comparisons", false, true, "A new setting."},
85+
{"query_plan_use_logical_join_step", true, true, "Added alias"},
86+
{"schema_inference_make_columns_nullable", 1, 3, "Take nullability information from Parquet/ORC/Arrow metadata by default, instead of making everything nullable."},
87+
{"materialized_views_squash_parallel_inserts", false, true, "Added setting to preserve old behavior if needed."},
88+
});
89+
addSettingsChanges(settings_changes_history, "25.9",
90+
{
91+
{"input_format_protobuf_oneof_presence", false, false, "New setting"},
92+
{"iceberg_delete_data_on_drop", false, false, "New setting"},
93+
{"use_skip_indexes_on_data_read", false, false, "New setting"},
94+
{"s3_slow_all_threads_after_retryable_error", false, false, "Added an alias for setting `backup_slow_all_threads_after_retryable_s3_error`"},
95+
{"iceberg_metadata_log_level", "none", "none", "New setting."},
96+
{"iceberg_insert_max_rows_in_data_file", 100000, 100000, "New setting."},
97+
{"iceberg_insert_max_bytes_in_data_file", 100000000, 100000000, "New setting."},
98+
{"query_plan_optimize_join_order_limit", 0, 1, "New setting"},
99+
{"query_plan_display_internal_aliases", false, false, "New setting"},
100+
{"query_plan_max_step_description_length", 1000000000, 500, "New setting"},
101+
{"allow_experimental_delta_lake_writes", false, false, "New setting."},
102+
{"query_plan_convert_any_join_to_semi_or_anti_join", true, true, "New setting."},
103+
{"text_index_use_bloom_filter", true, true, "New setting."},
104+
{"query_plan_direct_read_from_text_index", true, true, "New setting."},
105+
{"enable_producing_buckets_out_of_order_in_aggregation", false, true, "New setting"},
106+
{"jemalloc_enable_profiler", false, false, "New setting"},
107+
{"jemalloc_collect_profile_samples_in_trace_log", false, false, "New setting"},
108+
{"delta_lake_insert_max_bytes_in_data_file", 1_GiB, 1_GiB, "New setting."},
109+
{"delta_lake_insert_max_rows_in_data_file", 100000, 100000, "New setting."},
110+
{"promql_evaluation_time", Field{"auto"}, Field{"auto"}, "The setting was renamed. The previous name is `evaluation_time`."},
111+
{"evaluation_time", 0, 0, "Old setting which popped up here being renamed."},
112+
{"os_threads_nice_value_query", 0, 0, "New setting."},
113+
{"os_threads_nice_value_materialized_view", 0, 0, "New setting."},
114+
{"os_thread_priority", 0, 0, "Alias for os_threads_nice_value_query."},
59115
});
60116
addSettingsChanges(settings_changes_history, "25.8",
61117
{

src/Databases/DataLake/DatabaseDataLake.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -537,14 +537,10 @@ DatabaseTablesIteratorPtr DatabaseDataLake::getTablesIterator(
537537
DatabaseTablesIteratorPtr DatabaseDataLake::getLightweightTablesIterator(
538538
ContextPtr context_,
539539
const FilterByNameFunction & filter_by_table_name,
540-
bool skip_not_loaded,
541-
bool skip_data_lake_catalog) const
540+
bool skip_not_loaded) const
542541
{
543542
Tables tables;
544543

545-
if (skip_data_lake_catalog)
546-
return std::make_unique<DatabaseTablesSnapshotIterator>(tables, getDatabaseName());
547-
548544
auto catalog = getCatalog();
549545
DB::Names iceberg_tables;
550546

src/Databases/DataLake/DatabaseDataLake.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class DatabaseDataLake final : public IDatabase, WithContext
3131
bool canContainDistributedTables() const override { return false; }
3232
bool canContainRocksDBTables() const override { return false; }
3333
bool shouldBeEmptyOnDetach() const override { return false; }
34+
bool isDatalakeCatalog() const override { return true; }
3435

3536
bool empty() const override;
3637

@@ -47,8 +48,7 @@ class DatabaseDataLake final : public IDatabase, WithContext
4748
DatabaseTablesIteratorPtr getLightweightTablesIterator(
4849
ContextPtr context,
4950
const FilterByNameFunction & filter_by_table_name,
50-
bool skip_not_loaded,
51-
bool skip_data_lake_catalog) const override;
51+
bool skip_not_loaded) const override;
5252

5353

5454
void shutdown() override {}

src/Databases/IDatabase.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ class IDatabase : public std::enable_shared_from_this<IDatabase>
179179

180180
virtual bool canContainRocksDBTables() const { return true; }
181181

182+
virtual bool isDatalakeCatalog() const { return false; }
183+
182184
/// Load a set of existing tables.
183185
/// You can call only once, right after the object is created.
184186
virtual void loadStoredObjects( /// NOLINT
@@ -267,7 +269,7 @@ class IDatabase : public std::enable_shared_from_this<IDatabase>
267269

268270
/// Same as above, but may return non-fully initialized StoragePtr objects which are not suitable for reading.
269271
/// Useful for queries like "SHOW TABLES"
270-
virtual DatabaseTablesIteratorPtr getLightweightTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false, [[maybe_unused]] bool skip_data_lake_catalog = false) const /// NOLINT
272+
virtual DatabaseTablesIteratorPtr getLightweightTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const /// NOLINT
271273
{
272274
return getTablesIterator(context, filter_by_table_name, skip_not_loaded);
273275
}

src/Interpreters/DatabaseCatalog.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class DatabaseNameHints : public IHints<>
115115
const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES);
116116

117117
Names result;
118-
auto databases_list = database_catalog.getDatabases();
118+
auto databases_list = database_catalog.getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true});
119119
for (const auto & database_name : databases_list | boost::adaptors::map_keys)
120120
{
121121
if (need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name))
@@ -341,7 +341,10 @@ void DatabaseCatalog::shutdownImpl(std::function<void()> shutdown_system_logs)
341341
auto it = std::find_if(elem.map.begin(), elem.map.end(), not_empty_mapping);
342342
return it != elem.map.end();
343343
}) == uuid_map.end());
344+
344345
databases.clear();
346+
databases_without_datalake_catalogs.clear();
347+
345348
referential_dependencies.clear();
346349
loading_dependencies.clear();
347350
view_dependencies.clear();
@@ -576,6 +579,18 @@ void DatabaseCatalog::assertDatabaseExists(const String & database_name) const
576579
}
577580
}
578581

582+
bool DatabaseCatalog::hasDatalakeCatalogs() const
583+
{
584+
std::lock_guard lock{databases_mutex};
585+
return databases.size() != databases_without_datalake_catalogs.size();
586+
}
587+
588+
bool DatabaseCatalog::isDatalakeCatalog(const String & database_name) const
589+
{
590+
std::lock_guard lock{databases_mutex};
591+
return databases.contains(database_name) && !databases_without_datalake_catalogs.contains(database_name);
592+
}
593+
579594
void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) const
580595
{
581596
std::lock_guard lock{databases_mutex};
@@ -594,6 +609,9 @@ void DatabaseCatalog::attachDatabase(const String & database_name, const Databas
594609
std::lock_guard lock{databases_mutex};
595610
assertDatabaseDoesntExistUnlocked(database_name);
596611
databases.emplace(database_name, database);
612+
if (!database->isDatalakeCatalog())
613+
databases_without_datalake_catalogs.emplace(database_name, database);
614+
597615
NOEXCEPT_SCOPE({
598616
UUID db_uuid = database->getUUID();
599617
if (db_uuid != UUIDHelpers::Nil)
@@ -618,8 +636,9 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri
618636
if (db_uuid != UUIDHelpers::Nil)
619637
removeUUIDMapping(db_uuid);
620638
databases.erase(database_name);
621-
622639
}
640+
if (auto it = databases_without_datalake_catalogs.find(database_name); it != databases_without_datalake_catalogs.end())
641+
databases_without_datalake_catalogs.erase(it);
623642
}
624643
if (!db)
625644
{
@@ -685,7 +704,13 @@ void DatabaseCatalog::updateDatabaseName(const String & old_name, const String &
685704
databases.erase(it);
686705
databases.emplace(new_name, db);
687706

688-
/// Update dependencies.
707+
auto no_catalogs_it = databases_without_datalake_catalogs.find(old_name);
708+
if (no_catalogs_it != databases_without_datalake_catalogs.end())
709+
{
710+
databases_without_datalake_catalogs.erase(no_catalogs_it);
711+
databases_without_datalake_catalogs.emplace(new_name, db);
712+
}
713+
689714
for (const auto & table_name : tables_in_database)
690715
{
691716
auto removed_ref_deps = referential_dependencies.removeDependencies(StorageID{old_name, table_name}, /* remove_isolated_tables= */ true);
@@ -806,10 +831,13 @@ bool DatabaseCatalog::isDatabaseExist(const String & database_name) const
806831
return databases.end() != databases.find(database_name);
807832
}
808833

809-
Databases DatabaseCatalog::getDatabases() const
834+
Databases DatabaseCatalog::getDatabases(GetDatabasesOptions options) const
810835
{
811836
std::lock_guard lock{databases_mutex};
812-
return databases;
837+
if (options.with_datalake_catalogs)
838+
return databases;
839+
840+
return databases_without_datalake_catalogs;
813841
}
814842

815843
bool DatabaseCatalog::isTableExist(const DB::StorageID & table_id, ContextPtr context_) const
@@ -1075,7 +1103,7 @@ void DatabaseCatalog::loadMarkedAsDroppedTables()
10751103
std::map<String, std::pair<StorageID, DiskPtr>> dropped_metadata;
10761104
String path = fs::path("metadata_dropped") / "";
10771105

1078-
auto db_map = getDatabases();
1106+
auto db_map = getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true});
10791107
std::set<DiskPtr> metadata_disk_list;
10801108
for (const auto & [_, db] : db_map)
10811109
{
@@ -1965,7 +1993,7 @@ void DatabaseCatalog::reloadDisksTask()
19651993
disks.swap(disks_to_reload);
19661994
}
19671995

1968-
for (auto & database : getDatabases())
1996+
for (auto & database : getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
19691997
{
19701998
// WARNING: In case of `async_load_databases = true` getTablesIterator() call wait for all table in the database to be loaded.
19711999
// WARNING: It means that no database will be able to update configuration until all databases are fully loaded.
@@ -2120,7 +2148,8 @@ std::pair<String, String> TableNameHints::getExtendedHintForTable(const String &
21202148
{
21212149
/// load all available databases from the DatabaseCatalog instance
21222150
auto & database_catalog = DatabaseCatalog::instance();
2123-
auto all_databases = database_catalog.getDatabases();
2151+
/// NOTE Skip datalake catalogs to avoid unnecessary access to remote catalogs (can be expensive)
2152+
auto all_databases = database_catalog.getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false});
21242153

21252154
for (const auto & [db_name, db] : all_databases)
21262155
{

src/Interpreters/DatabaseCatalog.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ using TemporaryTablesMapping = std::map<String, TemporaryTableHolderPtr>;
121121

122122
class BackgroundSchedulePoolTaskHolder;
123123

124+
struct GetDatabasesOptions
125+
{
126+
bool with_datalake_catalogs{false};
127+
};
128+
124129
/// For some reason Context is required to get Storage from Database object
125130
class DatabaseCatalog : boost::noncopyable, WithMutableContext
126131
{
@@ -171,7 +176,13 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext
171176
DatabasePtr getDatabase(const UUID & uuid) const;
172177
DatabasePtr tryGetDatabase(const UUID & uuid) const;
173178
bool isDatabaseExist(const String & database_name) const;
174-
Databases getDatabases() const;
179+
/// Datalake catalogs are implement at IDatabase level in ClickHouse.
180+
/// In general case Datalake catalog is a some remote service which contains iceberg/delta tables.
181+
/// Sometimes this service charges money for requests. With this flag we explicitly protect ourself
182+
/// to not accidentally query external non-free service for some trivial things like
183+
/// autocompletion hints or system.tables query. We have a setting which allow to show
184+
/// these databases everywhere, but user must explicitly specify it.
185+
Databases getDatabases(GetDatabasesOptions options) const;
175186

176187
/// Same as getDatabase(const String & database_name), but if database_name is empty, current database of local_context is used
177188
DatabasePtr getDatabase(const String & database_name, ContextPtr local_context) const;
@@ -272,6 +283,8 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext
272283
bool canPerformReplicatedDDLQueries() const;
273284

274285
void updateMetadataFile(const DatabasePtr & database);
286+
bool hasDatalakeCatalogs() const;
287+
bool isDatalakeCatalog(const String & database_name) const;
275288

276289
private:
277290
// The global instance of database catalog. unique_ptr is to allow
@@ -319,6 +332,7 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext
319332
mutable std::mutex databases_mutex;
320333

321334
Databases databases TSA_GUARDED_BY(databases_mutex);
335+
Databases databases_without_datalake_catalogs TSA_GUARDED_BY(databases_mutex);
322336
UUIDToStorageMap uuid_map;
323337

324338
/// Referential dependencies between tables: table "A" depends on table "B"

src/Interpreters/InterpreterCheckQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, ContextP
395395
static Strings getAllDatabases(const ContextPtr & context)
396396
{
397397
Strings res;
398-
const auto & databases = DatabaseCatalog::instance().getDatabases();
398+
const auto & databases = DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false});
399399
res.reserve(databases.size());
400400
for (const auto & [database_name, _] : databases)
401401
{

src/Interpreters/InterpreterCreateQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
206206
auto db_num_limit = getContext()->getGlobalContext()->getServerSettings()[ServerSetting::max_database_num_to_throw].value;
207207
if (db_num_limit > 0 && !internal)
208208
{
209-
size_t db_count = DatabaseCatalog::instance().getDatabases().size();
209+
size_t db_count = DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true}).size();
210210
std::initializer_list<std::string_view> system_databases =
211211
{
212212
DatabaseCatalog::TEMPORARY_DATABASE,

0 commit comments

Comments
 (0)