Skip to content

Commit 302f7c3

Browse files
authored
Merge pull request ClickHouse#78741 from azat/rdb-digest-fix-2
Fix Digest does not match (from Replicated database) on CI
2 parents 133882c + 0962e07 commit 302f7c3

File tree

4 files changed

+65
-75
lines changed

4 files changed

+65
-75
lines changed

src/Databases/DatabaseReplicated.cpp

Lines changed: 57 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -810,35 +810,12 @@ void DatabaseReplicated::stopLoading()
810810
DatabaseAtomic::stopLoading();
811811
}
812812

813-
void DatabaseReplicated::dumpLocalTablesForDebugOnly(const ContextPtr & local_context) const
813+
ASTPtr DatabaseReplicated::tryGetCreateOrAttachTableQuery(const String & name, ContextPtr local_context) const
814814
{
815-
auto table_names = getAllTableNames(context.lock());
816-
for (const auto & table_name : table_names)
817-
{
818-
auto ast_ptr = tryGetCreateTableQuery(table_name, local_context);
819-
if (ast_ptr)
820-
LOG_DEBUG(log, "[local] Table {} create query is {}", table_name, ast_ptr->formatForLogging());
821-
else
822-
LOG_DEBUG(log, "[local] Table {} has no create query", table_name);
823-
}
824-
}
825-
826-
void DatabaseReplicated::dumpTablesInZooKeeperForDebugOnly() const
827-
{
828-
UInt32 max_log_ptr{};
829-
auto table_name_to_metadata = tryGetConsistentMetadataSnapshot(getZooKeeper(), max_log_ptr);
830-
for (const auto & [table_name, create_table_query] : table_name_to_metadata)
831-
{
832-
auto query_ast = parseQueryFromMetadataInZooKeeper(table_name, create_table_query);
833-
if (query_ast)
834-
{
835-
LOG_DEBUG(log, "[zookeeper] Table {} create query is {}", table_name, query_ast->formatForLogging());
836-
}
837-
else
838-
{
839-
LOG_DEBUG(log, "[zookeeper] Table {} has no create query", table_name);
840-
}
841-
}
815+
auto res = tryGetCreateTableQuery(name, local_context);
816+
auto & create = res->as<ASTCreateQuery &>();
817+
create.attach = create.is_materialized_view_with_inner_table();
818+
return res;
842819
}
843820

844821
void DatabaseReplicated::tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(const ContextPtr & local_context) const
@@ -848,57 +825,58 @@ void DatabaseReplicated::tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnl
848825
auto table_names_local = getAllTableNames(local_context);
849826

850827
if (table_name_to_metadata_in_zk.size() != table_names_local.size())
851-
LOG_DEBUG(log, "Amount of tables in zk {} locally {}", table_name_to_metadata_in_zk.size(), table_names_local.size());
828+
{
829+
LOG_ERROR(log, "Amount of tables in coordinator {} differs from number of tables locally {}",
830+
table_name_to_metadata_in_zk.size(), table_names_local.size());
831+
}
852832

853833
std::unordered_set<std::string> checked_tables;
854834

855835
for (const auto & table_name : table_names_local)
856836
{
857-
auto local_ast_ptr = tryGetCreateTableQuery(table_name, local_context);
858-
if (table_name_to_metadata_in_zk.contains(table_name))
859-
{
860-
checked_tables.insert(table_name);
861-
auto create_table_query_in_zk = table_name_to_metadata_in_zk[table_name];
862-
auto zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, create_table_query_in_zk);
837+
checked_tables.insert(table_name);
863838

864-
if (local_ast_ptr == nullptr && zk_ast_ptr == nullptr)
865-
{
866-
LOG_DEBUG(log, "AST for table {} is the same (nullptr) in local and ZK", table_name);
867-
}
868-
else if (local_ast_ptr != nullptr && zk_ast_ptr != nullptr && local_ast_ptr->formatWithSecretsOneLine() != zk_ast_ptr->formatWithSecretsOneLine())
869-
{
870-
LOG_ERROR(log, "AST differs for table {}, local {}, in zookeeper {}", table_name, local_ast_ptr->formatForLogging(), zk_ast_ptr->formatForLogging());
871-
}
872-
else if (local_ast_ptr == nullptr)
873-
{
874-
LOG_ERROR(log, "AST differs for table {}, local nullptr, in zookeeper {}", table_name, zk_ast_ptr->formatForLogging());
875-
}
876-
else if (zk_ast_ptr == nullptr)
877-
{
878-
LOG_ERROR(log, "AST differs for table {}, local {}, in zookeeper nullptr", table_name, local_ast_ptr->formatForLogging());
879-
}
880-
else
881-
{
882-
LOG_DEBUG(log, "AST for table {} is the same in local and ZK", table_name);
883-
}
839+
auto local_ast_ptr = tryGetCreateOrAttachTableQuery(table_name, local_context);
840+
auto zk_metadata_it = table_name_to_metadata_in_zk.find(table_name);
841+
842+
auto on_disk_ast_ptr = parseQueryFromMetadataOnDisk(table_name);
843+
844+
ASTPtr zk_ast_ptr;
845+
if (zk_metadata_it != table_name_to_metadata_in_zk.end())
846+
zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, zk_metadata_it->second);
847+
848+
auto local_query_with_secrets = local_ast_ptr ? local_ast_ptr->formatWithSecretsOneLine() : "";
849+
auto zookeeper_query_with_secrets = zk_ast_ptr ? zk_ast_ptr->formatWithSecretsOneLine() : "";
850+
auto on_disk_query_with_secrets = on_disk_ast_ptr ? on_disk_ast_ptr->formatWithSecretsOneLine() : "";
851+
852+
if (local_query_with_secrets != zookeeper_query_with_secrets || local_query_with_secrets != on_disk_query_with_secrets)
853+
{
854+
/// NOTE: due to transaction will be committed **before**
855+
/// tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly()
856+
/// runs, you will almost never enter this code path, since it will
857+
/// update on disk metadata. But the checkDigestValid() will still
858+
/// throw LOGICAL_ERROR since database relies on the on-disk data
859+
/// (for tracking tables_metadata_digest)
860+
LOG_ERROR(log, "AST differs for table {}", table_name);
861+
LOG_ERROR(log, "\t in memory: {}", local_ast_ptr ? local_ast_ptr->formatForLogging() : "nullptr");
862+
LOG_ERROR(log, "\tcoordinator: {}", zk_ast_ptr ? zk_ast_ptr->formatForLogging() : "nullptr");
863+
LOG_ERROR(log, "\t on disk: {}", on_disk_ast_ptr ? on_disk_ast_ptr->formatForLogging() : "nullptr");
884864
}
885865
else
886866
{
887-
if (local_ast_ptr == nullptr)
888-
LOG_ERROR(log, "Table {} exists locally, but missing in ZK", table_name);
889-
else
890-
LOG_ERROR(log, "Table {} exists locally with AST {}, but missing in ZK", table_name, local_ast_ptr->formatForLogging());
867+
LOG_DEBUG(log, "AST for table {} is the same", table_name);
868+
LOG_DEBUG(log, "\t in memory: {}", local_ast_ptr ? local_ast_ptr->formatForLogging() : "nullptr");
869+
LOG_DEBUG(log, "\tcoordinator: {}", zk_ast_ptr ? zk_ast_ptr->formatForLogging() : "nullptr");
870+
LOG_DEBUG(log, "\t on disk: {}", on_disk_ast_ptr ? on_disk_ast_ptr->formatForLogging() : "nullptr");
891871
}
892872
}
893873
for (const auto & [table_name, table_metadata] : table_name_to_metadata_in_zk)
894874
{
895875
if (!checked_tables.contains(table_name))
896876
{
897877
auto zk_ast_ptr = parseQueryFromMetadataInZooKeeper(table_name, table_metadata);
898-
if (zk_ast_ptr == nullptr)
899-
LOG_ERROR(log, "Table {} exists in ZK with AST {}, but missing locally", table_name, zk_ast_ptr->formatForLogging());
900-
else
901-
LOG_ERROR(log, "Table {} exists in ZK, but missing locally", table_name);
878+
auto zookeeper_query = zk_ast_ptr ? zk_ast_ptr->formatForLogging() : "nullptr";
879+
LOG_ERROR(log, "Table {} exists in ZK, but missing locally: {}", table_name, zookeeper_query);
902880
}
903881
}
904882
}
@@ -1037,7 +1015,7 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context) cons
10371015
UInt64 local_digest = 0;
10381016
{
10391017
std::lock_guard lock{mutex};
1040-
for (const auto & table : TSA_SUPPRESS_WARNING_FOR_READ(tables))
1018+
for (const auto & table : tables)
10411019
local_digest += getMetadataHash(table.first);
10421020
}
10431021

@@ -1046,8 +1024,6 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context) cons
10461024
LOG_ERROR(log, "Digest of local metadata ({}) is not equal to in-memory digest ({})", local_digest, tables_metadata_digest);
10471025

10481026
#ifndef NDEBUG
1049-
dumpLocalTablesForDebugOnly(local_context);
1050-
dumpTablesInZooKeeperForDebugOnly();
10511027
tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(local_context);
10521028
#endif
10531029

@@ -1068,8 +1044,6 @@ bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context) cons
10681044
{
10691045
LOG_ERROR(log, "Digest of local metadata ({}) is not equal to digest in Keeper ({})", local_digest_str, zk_digest);
10701046
#ifndef NDEBUG
1071-
dumpLocalTablesForDebugOnly(local_context);
1072-
dumpTablesInZooKeeperForDebugOnly();
10731047
tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(local_context);
10741048
#endif
10751049
return false;
@@ -1601,10 +1575,9 @@ std::map<String, String> DatabaseReplicated::getConsistentMetadataSnapshotImpl(
16011575
return table_name_to_metadata;
16021576
}
16031577

1604-
ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const
1578+
ASTPtr DatabaseReplicated::parseQueryFromMetadata(const String & table_name, const String & query, const String & description) const
16051579
{
16061580
ParserCreateQuery parser;
1607-
String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name;
16081581
auto ast = parseQuery(
16091582
parser,
16101583
query,
@@ -1615,14 +1588,26 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
16151588

16161589
auto & create = ast->as<ASTCreateQuery &>();
16171590
if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database)
1618-
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", node_name, query);
1591+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected query from {}: {}", table_name, query);
16191592

16201593
create.setDatabase(getDatabaseName());
1621-
create.setTable(unescapeForFileName(node_name));
1594+
create.setTable(unescapeForFileName(table_name));
16221595
create.attach = create.is_materialized_view_with_inner_table();
16231596

16241597
return ast;
16251598
}
1599+
ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const
1600+
{
1601+
String description = fmt::format("in ZooKeeper {}/metadata/{}", zookeeper_path, node_name);
1602+
return parseQueryFromMetadata(node_name, query, description);
1603+
}
1604+
ASTPtr DatabaseReplicated::parseQueryFromMetadataOnDisk(const String & table_name) const
1605+
{
1606+
auto file_path = getObjectMetadataPath(table_name);
1607+
String description = fmt::format("in metadata {}", file_path);
1608+
String query = DB::readMetadataFile(db_disk, file_path);
1609+
return parseQueryFromMetadata(table_name, query, description);
1610+
}
16261611

16271612
void DatabaseReplicated::dropReplica(
16281613
DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop)

src/Databases/DatabaseReplicated.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ class DatabaseReplicated : public DatabaseAtomic
105105
bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
106106
static bool looksLikeReplicatedDatabasePath(const ZooKeeperPtr & current_zookeeper, const String & path);
107107
void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
108+
/// For Replicated database will return ATTACH for MVs with inner table
109+
ASTPtr tryGetCreateOrAttachTableQuery(const String & name, ContextPtr context) const;
108110

109111
struct
110112
{
@@ -127,7 +129,9 @@ class DatabaseReplicated : public DatabaseAtomic
127129
std::map<String, String> getConsistentMetadataSnapshotImpl(const ZooKeeperPtr & zookeeper, const FilterByNameFunction & filter_by_table_name,
128130
size_t max_retries, UInt32 & max_log_ptr) const;
129131

132+
ASTPtr parseQueryFromMetadata(const String & table_name, const String & query, const String & description) const;
130133
ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query) const;
134+
ASTPtr parseQueryFromMetadataOnDisk(const String & table_name) const;
131135
String readMetadataFile(const String & table_name) const;
132136

133137
ClusterPtr getClusterImpl(bool all_groups = false) const;
@@ -151,8 +155,6 @@ class DatabaseReplicated : public DatabaseAtomic
151155
void assertDigestInTransactionOrInline(const ContextPtr & local_context, const ZooKeeperMetadataTransactionPtr & txn) TSA_REQUIRES(metadata_mutex);
152156

153157
/// For debug purposes only, don't use in production code
154-
void dumpLocalTablesForDebugOnly(const ContextPtr & local_context) const;
155-
void dumpTablesInZooKeeperForDebugOnly() const;
156158
void tryCompareLocalAndZooKeeperTablesAndDumpDiffForDebugOnly(const ContextPtr & local_context) const;
157159

158160
void waitDatabaseStarted() const override;

tests/queries/0_stateless/03001_matview_columns_after_modify_query.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/usr/bin/env bash
2+
# Tags: no-replicated-database, no-shared-catalog
3+
# Tag no-replicated-database -- modify on-disk metadata that may lead to "Digest does not match" in case Replicated database
4+
# Tag no-shared-catalog -- same
25

36
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
47
# shellcheck source=../shell_config.sh

tests/queries/0_stateless/03350_alter_table_fetch_partition_thread_pool.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
-- Tags: no-parallel, no-replicated-database, no-shared-merge-tree
22
-- Tag: no-parallel - to avoid polluting FETCH PARTITION thread pool with other fetches
3-
-- Tag: no-database-replicated - replica_path is different
3+
-- Tag: no-replicated-database - replica_path is different
44

55
drop table if exists data1;
66
drop table if exists data2;

0 commit comments

Comments
 (0)