Skip to content

Commit eff6616

Browse files
Merge pull request ClickHouse#84129 from ClickHouse/backport/25.6/84007
Backport ClickHouse#84007 to 25.6: Fix rare bug with `MATERIALIZE COLUMN` query
2 parents 880bb9b + 6650147 commit eff6616

File tree

3 files changed

+50
-3
lines changed

3 files changed

+50
-3
lines changed

src/Storages/MergeTree/MutateTask.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ static void splitAndModifyMutationCommands(
168168
for_interpreter.push_back(command);
169169
mutated_columns.emplace(command.column_name);
170170
}
171+
172+
/// Materialize column in case of complex data types like tuple can remove some nested columns
173+
/// Here we add it "for renames" because these set of commands also removes redundant files
174+
if (part_columns.has(command.column_name))
175+
for_file_renames.push_back(command);
171176
}
172177
if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
173178
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTICS
@@ -307,6 +312,11 @@ static void splitAndModifyMutationCommands(
307312
auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name);
308313
if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary))
309314
for_interpreter.push_back(command);
315+
316+
/// Materialize column in case of complex data types like tuple can remove some nested columns
317+
/// Here we add it "for renames" because these set of commands also removes redundant files
318+
if (part_columns.has(command.column_name))
319+
for_file_renames.push_back(command);
310320
}
311321
else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX
312322
|| command.type == MutationCommand::Type::MATERIALIZE_STATISTICS
@@ -887,12 +897,11 @@ static NameToNameVector collectFilesForRenames(
887897
if (source_part->checksums.has(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX))
888898
add_rename(STATS_FILE_PREFIX + command.column_name + STATS_FILE_SUFFIX, STATS_FILE_PREFIX + command.rename_to + STATS_FILE_SUFFIX);
889899
}
890-
else if (command.type == MutationCommand::Type::READ_COLUMN)
900+
else if (command.type == MutationCommand::Type::READ_COLUMN || command.type == MutationCommand::Type::MATERIALIZE_COLUMN)
891901
{
892902
/// Remove files for streams that exist in source_part,
893-
/// but were removed in new_part by MODIFY COLUMN from
903+
/// but were removed in new_part by MODIFY COLUMN or MATERIALIZE COLUMN from
894904
/// type with higher number of streams (e.g. LowCardinality -> String).
895-
896905
auto old_streams = getStreamCounts(source_part, source_part->checksums, source_part->getColumns().getNames());
897906
auto new_streams = getStreamCounts(new_part, source_part->checksums, source_part->getColumns().getNames());
898907

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env bash
2+
# Tags: no-random-settings, no-random-merge-tree-settings
3+
4+
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
# shellcheck source=../shell_config.sh
6+
. "$CURDIR"/../shell_config.sh
7+
8+
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS customer_dbt_materialize"
9+
10+
$CLICKHOUSE_CLIENT -n --query "CREATE TABLE customer_dbt_materialize(
11+
key UInt64,
12+
value Array(Tuple(transaction_hash String, instruction_sig_hash String)) MATERIALIZED array((toString(key), toString(key)))
13+
)
14+
ENGINE = ReplicatedMergeTree('/zookeeper/{database}/test_replicated_merge_tree', 'customer_dbt_materialize')
15+
ORDER BY tuple()
16+
SETTINGS min_bytes_for_wide_part = 0, min_bytes_for_full_part_storage = 0;"
17+
18+
$CLICKHOUSE_CLIENT --query "INSERT INTO customer_dbt_materialize SELECT number FROM numbers(1000)"
19+
20+
# NOTE This command looks like noop (pure metadata change which we will override with next ALTER), however it leads to important logic in the codebase:
21+
# When we apply MODIFY COLUMN we validate that we changed something in PHYSICAL column. If we don't change anything in PHYSICAL column, we will not touch any data parts.
22+
#
23+
# After this MODIFY `value` column is not a physical column anymore, however it still exists in data part. So the next ALTER MODIFY COLUMN to MATERIALIZED state
24+
# will also do nothing with data parts (because `value` is ALIAS, not PHYSICAL column).
25+
#
26+
# And the last MATERIALIZE COLUMN will trigger real mutation which will rewrite data part and leave incorrect checksum on disk.
27+
$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MODIFY COLUMN value Array(Tuple(transaction_hash String, instruction_sig_hash String)) ALIAS array((toString(key), toString(key))) SETTINGS mutations_sync = 2"
28+
29+
$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MODIFY COLUMN value Array(Tuple(transaction_hash String, transaction_index_data String)) MATERIALIZED array((toString(key), toString(key))) SETTINGS mutations_sync = 2"
30+
31+
$CLICKHOUSE_CLIENT --query "ALTER TABLE customer_dbt_materialize MATERIALIZE COLUMN value"
32+
33+
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA customer_dbt_materialize"
34+
35+
$CLICKHOUSE_CLIENT --query "CHECK TABLE customer_dbt_materialize"
36+
37+
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS customer_dbt_materialize"

0 commit comments

Comments
 (0)