Skip to content

Commit 02ac2fb

Browse files
authored
Merge pull request #1239 from Altinity/backports/antalya-25.8/88341_disable_catalogs_in_system_tables
Antalya 25.8 Backport of ClickHouse#88341: Disable catalogs in system tables
2 parents c24224b + 81bf874 commit 02ac2fb

37 files changed

+193
-66
lines changed

src/Backups/BackupEntriesCollector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ void BackupEntriesCollector::gatherDatabasesMetadata()
397397

398398
case ASTBackupQuery::ElementType::ALL:
399399
{
400-
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
400+
for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true}))
401401
{
402402
if (!element.except_databases.contains(database_name))
403403
{

src/Databases/DataLake/DatabaseDataLake.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -537,14 +537,10 @@ DatabaseTablesIteratorPtr DatabaseDataLake::getTablesIterator(
537537
DatabaseTablesIteratorPtr DatabaseDataLake::getLightweightTablesIterator(
538538
ContextPtr context_,
539539
const FilterByNameFunction & filter_by_table_name,
540-
bool skip_not_loaded,
541-
bool skip_data_lake_catalog) const
540+
bool skip_not_loaded) const
542541
{
543542
Tables tables;
544543

545-
if (skip_data_lake_catalog)
546-
return std::make_unique<DatabaseTablesSnapshotIterator>(tables, getDatabaseName());
547-
548544
auto catalog = getCatalog();
549545
DB::Names iceberg_tables;
550546

src/Databases/DataLake/DatabaseDataLake.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class DatabaseDataLake final : public IDatabase, WithContext
3131
bool canContainDistributedTables() const override { return false; }
3232
bool canContainRocksDBTables() const override { return false; }
3333
bool shouldBeEmptyOnDetach() const override { return false; }
34+
bool isDatalakeCatalog() const override { return true; }
3435

3536
bool empty() const override;
3637

@@ -47,8 +48,7 @@ class DatabaseDataLake final : public IDatabase, WithContext
4748
DatabaseTablesIteratorPtr getLightweightTablesIterator(
4849
ContextPtr context,
4950
const FilterByNameFunction & filter_by_table_name,
50-
bool skip_not_loaded,
51-
bool skip_data_lake_catalog) const override;
51+
bool skip_not_loaded) const override;
5252

5353

5454
void shutdown() override {}

src/Databases/IDatabase.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ class IDatabase : public std::enable_shared_from_this<IDatabase>
179179

180180
virtual bool canContainRocksDBTables() const { return true; }
181181

182+
virtual bool isDatalakeCatalog() const { return false; }
183+
182184
/// Load a set of existing tables.
183185
/// You can call only once, right after the object is created.
184186
virtual void loadStoredObjects( /// NOLINT
@@ -267,7 +269,7 @@ class IDatabase : public std::enable_shared_from_this<IDatabase>
267269

268270
/// Same as above, but may return non-fully initialized StoragePtr objects which are not suitable for reading.
269271
/// Useful for queries like "SHOW TABLES"
270-
virtual DatabaseTablesIteratorPtr getLightweightTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false, [[maybe_unused]] bool skip_data_lake_catalog = false) const /// NOLINT
272+
virtual DatabaseTablesIteratorPtr getLightweightTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const /// NOLINT
271273
{
272274
return getTablesIterator(context, filter_by_table_name, skip_not_loaded);
273275
}

src/Interpreters/DatabaseCatalog.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class DatabaseNameHints : public IHints<>
115115
const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES);
116116

117117
Names result;
118-
auto databases_list = database_catalog.getDatabases();
118+
auto databases_list = database_catalog.getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true});
119119
for (const auto & database_name : databases_list | boost::adaptors::map_keys)
120120
{
121121
if (need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name))
@@ -341,7 +341,10 @@ void DatabaseCatalog::shutdownImpl(std::function<void()> shutdown_system_logs)
341341
auto it = std::find_if(elem.map.begin(), elem.map.end(), not_empty_mapping);
342342
return it != elem.map.end();
343343
}) == uuid_map.end());
344+
344345
databases.clear();
346+
databases_without_datalake_catalogs.clear();
347+
345348
referential_dependencies.clear();
346349
loading_dependencies.clear();
347350
view_dependencies.clear();
@@ -576,6 +579,18 @@ void DatabaseCatalog::assertDatabaseExists(const String & database_name) const
576579
}
577580
}
578581

582+
bool DatabaseCatalog::hasDatalakeCatalogs() const
583+
{
584+
std::lock_guard lock{databases_mutex};
585+
return databases.size() != databases_without_datalake_catalogs.size();
586+
}
587+
588+
bool DatabaseCatalog::isDatalakeCatalog(const String & database_name) const
589+
{
590+
std::lock_guard lock{databases_mutex};
591+
return databases.contains(database_name) && !databases_without_datalake_catalogs.contains(database_name);
592+
}
593+
579594
void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) const
580595
{
581596
std::lock_guard lock{databases_mutex};
@@ -594,6 +609,9 @@ void DatabaseCatalog::attachDatabase(const String & database_name, const Databas
594609
std::lock_guard lock{databases_mutex};
595610
assertDatabaseDoesntExistUnlocked(database_name);
596611
databases.emplace(database_name, database);
612+
if (!database->isDatalakeCatalog())
613+
databases_without_datalake_catalogs.emplace(database_name, database);
614+
597615
NOEXCEPT_SCOPE({
598616
UUID db_uuid = database->getUUID();
599617
if (db_uuid != UUIDHelpers::Nil)
@@ -618,8 +636,9 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri
618636
if (db_uuid != UUIDHelpers::Nil)
619637
removeUUIDMapping(db_uuid);
620638
databases.erase(database_name);
621-
622639
}
640+
if (auto it = databases_without_datalake_catalogs.find(database_name); it != databases_without_datalake_catalogs.end())
641+
databases_without_datalake_catalogs.erase(it);
623642
}
624643
if (!db)
625644
{
@@ -685,7 +704,13 @@ void DatabaseCatalog::updateDatabaseName(const String & old_name, const String &
685704
databases.erase(it);
686705
databases.emplace(new_name, db);
687706

688-
/// Update dependencies.
707+
auto no_catalogs_it = databases_without_datalake_catalogs.find(old_name);
708+
if (no_catalogs_it != databases_without_datalake_catalogs.end())
709+
{
710+
databases_without_datalake_catalogs.erase(no_catalogs_it);
711+
databases_without_datalake_catalogs.emplace(new_name, db);
712+
}
713+
689714
for (const auto & table_name : tables_in_database)
690715
{
691716
auto removed_ref_deps = referential_dependencies.removeDependencies(StorageID{old_name, table_name}, /* remove_isolated_tables= */ true);
@@ -806,10 +831,13 @@ bool DatabaseCatalog::isDatabaseExist(const String & database_name) const
806831
return databases.end() != databases.find(database_name);
807832
}
808833

809-
Databases DatabaseCatalog::getDatabases() const
834+
Databases DatabaseCatalog::getDatabases(GetDatabasesOptions options) const
810835
{
811836
std::lock_guard lock{databases_mutex};
812-
return databases;
837+
if (options.with_datalake_catalogs)
838+
return databases;
839+
840+
return databases_without_datalake_catalogs;
813841
}
814842

815843
bool DatabaseCatalog::isTableExist(const DB::StorageID & table_id, ContextPtr context_) const
@@ -1075,7 +1103,7 @@ void DatabaseCatalog::loadMarkedAsDroppedTables()
10751103
std::map<String, std::pair<StorageID, DiskPtr>> dropped_metadata;
10761104
String path = fs::path("metadata_dropped") / "";
10771105

1078-
auto db_map = getDatabases();
1106+
auto db_map = getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true});
10791107
std::set<DiskPtr> metadata_disk_list;
10801108
for (const auto & [_, db] : db_map)
10811109
{
@@ -1965,7 +1993,7 @@ void DatabaseCatalog::reloadDisksTask()
19651993
disks.swap(disks_to_reload);
19661994
}
19671995

1968-
for (auto & database : getDatabases())
1996+
for (auto & database : getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
19691997
{
19701998
// WARNING: In case of `async_load_databases = true` getTablesIterator() call wait for all table in the database to be loaded.
19711999
// WARNING: It means that no database will be able to update configuration until all databases are fully loaded.
@@ -2120,7 +2148,8 @@ std::pair<String, String> TableNameHints::getExtendedHintForTable(const String &
21202148
{
21212149
/// load all available databases from the DatabaseCatalog instance
21222150
auto & database_catalog = DatabaseCatalog::instance();
2123-
auto all_databases = database_catalog.getDatabases();
2151+
/// NOTE Skip datalake catalogs to avoid unnecessary access to remote catalogs (can be expensive)
2152+
auto all_databases = database_catalog.getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false});
21242153

21252154
for (const auto & [db_name, db] : all_databases)
21262155
{

src/Interpreters/DatabaseCatalog.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ using TemporaryTablesMapping = std::map<String, TemporaryTableHolderPtr>;
121121

122122
class BackgroundSchedulePoolTaskHolder;
123123

124+
struct GetDatabasesOptions
125+
{
126+
bool with_datalake_catalogs{false};
127+
};
128+
124129
/// For some reason Context is required to get Storage from Database object
125130
class DatabaseCatalog : boost::noncopyable, WithMutableContext
126131
{
@@ -171,7 +176,13 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext
171176
DatabasePtr getDatabase(const UUID & uuid) const;
172177
DatabasePtr tryGetDatabase(const UUID & uuid) const;
173178
bool isDatabaseExist(const String & database_name) const;
174-
Databases getDatabases() const;
179+
/// Datalake catalogs are implement at IDatabase level in ClickHouse.
180+
/// In general case Datalake catalog is a some remote service which contains iceberg/delta tables.
181+
/// Sometimes this service charges money for requests. With this flag we explicitly protect ourself
182+
/// to not accidentally query external non-free service for some trivial things like
183+
/// autocompletion hints or system.tables query. We have a setting which allow to show
184+
/// these databases everywhere, but user must explicitly specify it.
185+
Databases getDatabases(GetDatabasesOptions options) const;
175186

176187
/// Same as getDatabase(const String & database_name), but if database_name is empty, current database of local_context is used
177188
DatabasePtr getDatabase(const String & database_name, ContextPtr local_context) const;
@@ -272,6 +283,8 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext
272283
bool canPerformReplicatedDDLQueries() const;
273284

274285
void updateMetadataFile(const DatabasePtr & database);
286+
bool hasDatalakeCatalogs() const;
287+
bool isDatalakeCatalog(const String & database_name) const;
275288

276289
private:
277290
// The global instance of database catalog. unique_ptr is to allow
@@ -319,6 +332,7 @@ class DatabaseCatalog : boost::noncopyable, WithMutableContext
319332
mutable std::mutex databases_mutex;
320333

321334
Databases databases TSA_GUARDED_BY(databases_mutex);
335+
Databases databases_without_datalake_catalogs TSA_GUARDED_BY(databases_mutex);
322336
UUIDToStorageMap uuid_map;
323337

324338
/// Referential dependencies between tables: table "A" depends on table "B"

src/Interpreters/InterpreterCheckQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, ContextP
395395
static Strings getAllDatabases(const ContextPtr & context)
396396
{
397397
Strings res;
398-
const auto & databases = DatabaseCatalog::instance().getDatabases();
398+
const auto & databases = DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false});
399399
res.reserve(databases.size());
400400
for (const auto & [database_name, _] : databases)
401401
{

src/Interpreters/InterpreterCreateQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
206206
auto db_num_limit = getContext()->getGlobalContext()->getServerSettings()[ServerSetting::max_database_num_to_throw].value;
207207
if (db_num_limit > 0 && !internal)
208208
{
209-
size_t db_count = DatabaseCatalog::instance().getDatabases().size();
209+
size_t db_count = DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = true}).size();
210210
std::initializer_list<std::string_view> system_databases =
211211
{
212212
DatabaseCatalog::TEMPORARY_DATABASE,

src/Interpreters/InterpreterShowTablesQuery.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <Storages/ColumnsDescription.h>
1515
#include <Common/Macros.h>
1616
#include <Common/typeid_cast.h>
17+
#include <Core/Settings.h>
1718

1819

1920
namespace DB
@@ -229,8 +230,20 @@ BlockIO InterpreterShowTablesQuery::execute()
229230

230231
return res;
231232
}
233+
auto rewritten_query = getRewrittenQuery();
234+
String database = getContext()->resolveDatabase(query.getFrom());
235+
if (DatabaseCatalog::instance().isDatalakeCatalog(database))
236+
{
237+
auto context_copy = Context::createCopy(getContext());
238+
/// HACK To always show them in explicit "SHOW TABLES" queries
239+
context_copy->setSetting("show_data_lake_catalogs_in_system_tables", true);
240+
return executeQuery(rewritten_query, context_copy, QueryFlags{ .internal = true }).second;
241+
}
242+
else
243+
{
244+
return executeQuery(rewritten_query, getContext(), QueryFlags{ .internal = true }).second;
245+
}
232246

233-
return executeQuery(getRewrittenQuery(), getContext(), QueryFlags{ .internal = true }).second;
234247
}
235248

236249
/// (*) Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to

src/Interpreters/InterpreterSystemQuery.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ void InterpreterSystemQuery::startStopAction(StorageActionBlockType action_type,
247247
}
248248
else
249249
{
250-
for (auto & elem : DatabaseCatalog::instance().getDatabases())
250+
for (auto & elem : DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
251251
{
252252
startStopActionInDatabase(action_type, start, elem.first, elem.second, getContext(), log);
253253
}
@@ -1110,7 +1110,7 @@ void InterpreterSystemQuery::restartReplicas(ContextMutablePtr system_context)
11101110
bool access_is_granted_globally = access->isGranted(AccessType::SYSTEM_RESTART_REPLICA);
11111111
bool show_tables_is_granted_globally = access->isGranted(AccessType::SHOW_TABLES);
11121112

1113-
for (auto & elem : catalog.getDatabases())
1113+
for (auto & elem : catalog.getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
11141114
{
11151115
if (!elem.second->canContainMergeTreeTables())
11161116
continue;
@@ -1172,7 +1172,7 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query)
11721172
}
11731173
else if (query.is_drop_whole_replica)
11741174
{
1175-
auto databases = DatabaseCatalog::instance().getDatabases();
1175+
auto databases = DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false});
11761176
auto access = getContext()->getAccess();
11771177
bool access_is_granted_globally = access->isGranted(AccessType::SYSTEM_DROP_REPLICA);
11781178

@@ -1213,7 +1213,7 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query)
12131213
String remote_replica_path = fs::path(query.replica_zk_path) / "replicas" / query.replica;
12141214

12151215
/// This check is actually redundant, but it may prevent from some user mistakes
1216-
for (auto & elem : DatabaseCatalog::instance().getDatabases())
1216+
for (auto & elem : DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
12171217
{
12181218
DatabasePtr & database = elem.second;
12191219
for (auto iterator = database->getTablesIterator(getContext()); iterator->isValid(); iterator->next())
@@ -1307,7 +1307,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
13071307
}
13081308
else if (query.is_drop_whole_replica)
13091309
{
1310-
auto databases = DatabaseCatalog::instance().getDatabases();
1310+
auto databases = DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false});
13111311
auto access = getContext()->getAccess();
13121312
bool access_is_granted_globally = access->isGranted(AccessType::SYSTEM_DROP_REPLICA);
13131313

@@ -1333,7 +1333,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
13331333
getContext()->checkAccess(AccessType::SYSTEM_DROP_REPLICA);
13341334

13351335
/// This check is actually redundant, but it may prevent from some user mistakes
1336-
for (auto & elem : DatabaseCatalog::instance().getDatabases())
1336+
for (auto & elem : DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
13371337
if (auto * replicated = dynamic_cast<DatabaseReplicated *>(elem.second.get()))
13381338
check_not_local_replica(replicated, query);
13391339

@@ -1451,7 +1451,7 @@ void InterpreterSystemQuery::loadOrUnloadPrimaryKeysImpl(bool load)
14511451
getContext()->checkAccess(load ? AccessType::SYSTEM_LOAD_PRIMARY_KEY : AccessType::SYSTEM_UNLOAD_PRIMARY_KEY);
14521452
LOG_TRACE(log, "{} primary keys for all tables", load ? "Loading" : "Unloading");
14531453

1454-
for (auto & database : DatabaseCatalog::instance().getDatabases())
1454+
for (auto & database : DatabaseCatalog::instance().getDatabases(GetDatabasesOptions{.with_datalake_catalogs = false}))
14551455
{
14561456
for (auto it = database.second->getTablesIterator(getContext()); it->isValid(); it->next())
14571457
{

0 commit comments

Comments
 (0)