Skip to content

Commit 4d2e00b

Browse files
Merge pull request ClickHouse#78022 from GrigoryPervakov/summing-keys
Validate SummungMergeTree columns to sum are not in sort key, fix partition key validation. Fix auto resolve of summing cols.
2 parents aafa86f + 7d73754 commit 4d2e00b

17 files changed

+101
-50
lines changed

docs/en/engines/table-engines/mergetree-family/summingmergetree.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ For a description of request parameters, see [request description](../../../sql-
3535
#### columns {#columns}
3636

3737
`columns` - a tuple with the names of columns where values will be summarized. Optional parameter.
38-
The columns must be of a numeric type and must not be in the primary key.
38+
The columns must be of a numeric type and must not be in the partition or sorting key.
3939

40-
If `columns` is not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key.
40+
If `columns` is not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the sorting key.
4141

4242
### Query clauses {#query-clauses}
4343

src/Core/SettingsChangesHistory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ const VersionToSettingsChangesMap & getMergeTreeSettingsChangesHistory()
701701
{"max_postpone_time_for_failed_replicated_fetches_ms", 1ULL * 60 * 1000, 1ULL * 60 * 1000, "Added new setting to enable postponing fetch tasks in the replication queue."},
702702
{"max_postpone_time_for_failed_replicated_merges_ms", 1ULL * 60 * 1000, 1ULL * 60 * 1000, "Added new setting to enable postponing merge tasks in the replication queue."},
703703
{"max_postpone_time_for_failed_replicated_tasks_ms", 5ULL * 60 * 1000, 5ULL * 60 * 1000, "Added new setting to enable postponing tasks in the replication queue."},
704-
704+
{"allow_summing_columns_in_partition_or_order_key", true, false, "New setting to allow summing of partition or sorting key columns"},
705705
});
706706
addSettingsChanges(merge_tree_settings_changes_history, "25.3",
707707
{

src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ struct SummingSortedAlgorithm::AggregateDescription
9999
};
100100

101101

102-
static bool isInPrimaryKey(const SortDescription & description, const std::string & name)
102+
static bool isInSortingKey(const SortDescription & description, const std::string & name)
103103
{
104104
for (const auto & desc : description)
105105
if (desc.column_name == name)
@@ -108,10 +108,10 @@ static bool isInPrimaryKey(const SortDescription & description, const std::strin
108108
return false;
109109
}
110110

111-
static bool isInPartitionKey(const std::string & column_name, const Names & partition_key_columns)
111+
static bool isInNames(const std::string & column_name, const Names & names)
112112
{
113-
auto is_in_partition_key = std::find(partition_key_columns.begin(), partition_key_columns.end(), column_name);
114-
return is_in_partition_key != partition_key_columns.end();
113+
auto is_in_partition_key = std::find(names.begin(), names.end(), column_name);
114+
return is_in_partition_key != names.end();
115115
}
116116

117117

@@ -205,7 +205,7 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
205205
const Block & header,
206206
const SortDescription & description,
207207
const Names & column_names_to_sum,
208-
const Names & partition_key_columns)
208+
const Names & partition_and_sorting_required_columns)
209209
{
210210
size_t num_columns = header.columns();
211211
SummingSortedAlgorithm::ColumnsDefinition def;
@@ -254,16 +254,14 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
254254
continue;
255255
}
256256

257-
/// Are they inside the primary key or partition key?
258-
if (isInPrimaryKey(description, column.name) || isInPartitionKey(column.name, partition_key_columns))
257+
/// Are they inside the sorting key or partition key? Check both to ignore columns with order expression.
258+
if (isInSortingKey(description, column.name) || isInNames(column.name, partition_and_sorting_required_columns))
259259
{
260260
def.column_numbers_not_to_aggregate.push_back(i);
261261
continue;
262262
}
263263

264-
if (column_names_to_sum.empty()
265-
|| column_names_to_sum.end() !=
266-
std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name))
264+
if (column_names_to_sum.empty() || isInNames(column.name, column_names_to_sum))
267265
{
268266
// Create aggregator to sum this column
269267
SummingSortedAlgorithm::AggregateDescription desc;
@@ -311,7 +309,8 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
311309
/// no elements of map could be in primary key
312310
auto column_num_it = map.second.begin();
313311
for (; column_num_it != map.second.end(); ++column_num_it)
314-
if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name))
312+
if (isInSortingKey(description, header.safeGetByPosition(*column_num_it).name)
313+
|| isInNames(header.safeGetByPosition(*column_num_it).name, partition_and_sorting_required_columns))
315314
break;
316315
if (column_num_it != map.second.end())
317316
{
@@ -690,11 +689,12 @@ SummingSortedAlgorithm::SummingSortedAlgorithm(
690689
size_t num_inputs,
691690
SortDescription description_,
692691
const Names & column_names_to_sum,
693-
const Names & partition_key_columns,
692+
const Names & partition_and_sorting_required_columns,
694693
size_t max_block_size_rows,
695694
size_t max_block_size_bytes)
696695
: IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_))
697-
, columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns))
696+
, columns_definition(
697+
defineColumns(header_, description, column_names_to_sum, partition_and_sorting_required_columns))
698698
, merged_data(max_block_size_rows, max_block_size_bytes, columns_definition)
699699
{
700700
}

src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class SummingSortedAlgorithm final : public IMergingAlgorithmWithDelayedChunk
2121
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
2222
const Names & column_names_to_sum,
2323
/// List of partition key columns. They have to be excluded.
24-
const Names & partition_key_columns,
24+
const Names & partition_and_sorting_required_columns,
2525
size_t max_block_size_rows,
2626
size_t max_block_size_bytes);
2727

src/Processors/Merges/SummingSortedTransform.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class SummingSortedTransform final : public IMergingTransform<SummingSortedAlgor
2020
const Block & header, size_t num_inputs,
2121
SortDescription description_,
2222
/// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
23-
const Names & column_names_to_sum,
23+
const Names & partition_and_sorting_required_columns,
2424
const Names & partition_key_columns,
2525
size_t max_block_size_rows,
2626
size_t max_block_size_bytes
@@ -30,7 +30,7 @@ class SummingSortedTransform final : public IMergingTransform<SummingSortedAlgor
3030
header,
3131
num_inputs,
3232
std::move(description_),
33-
column_names_to_sum,
33+
partition_and_sorting_required_columns,
3434
partition_key_columns,
3535
max_block_size_rows,
3636
max_block_size_bytes)

src/Processors/QueryPlan/ReadFromMergeTree.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ static void addMergingFinal(
12831283
Pipe & pipe,
12841284
const SortDescription & sort_description,
12851285
MergeTreeData::MergingParams merging_params,
1286-
Names partition_key_columns,
1286+
const StorageMetadataPtr & metadata_snapshot,
12871287
size_t max_block_size_rows,
12881288
bool enable_vertical_final)
12891289
{
@@ -1304,9 +1304,12 @@ static void addMergingFinal(
13041304
return std::make_shared<CollapsingSortedTransform>(header, num_outputs,
13051305
sort_description, merging_params.sign_column, true, max_block_size_rows, /*max_block_size_bytes=*/0);
13061306

1307-
case MergeTreeData::MergingParams::Summing:
1307+
case MergeTreeData::MergingParams::Summing: {
1308+
auto required_columns = metadata_snapshot->getPartitionKey().expression->getRequiredColumns();
1309+
required_columns.append_range(metadata_snapshot->getSortingKey().expression->getRequiredColumns());
13081310
return std::make_shared<SummingSortedTransform>(header, num_outputs,
1309-
sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size_rows, /*max_block_size_bytes=*/0);
1311+
sort_description, merging_params.columns_to_sum, required_columns, max_block_size_rows, /*max_block_size_bytes=*/0);
1312+
}
13101313

13111314
case MergeTreeData::MergingParams::Aggregating:
13121315
return std::make_shared<AggregatingSortedTransform>(header, num_outputs,
@@ -1518,8 +1521,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
15181521
size_t sort_columns_size = sort_columns.size();
15191522
sort_description.reserve(sort_columns_size);
15201523

1521-
Names partition_key_columns = storage_snapshot->metadata->getPartitionKey().column_names;
1522-
15231524
for (size_t i = 0; i < sort_columns_size; ++i)
15241525
{
15251526
if (!reverse_flags.empty() && reverse_flags[i])
@@ -1533,7 +1534,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
15331534
pipe,
15341535
sort_description,
15351536
data.merging_params,
1536-
partition_key_columns,
1537+
storage_snapshot->metadata,
15371538
block_size.max_block_size_rows,
15381539
enable_vertical_final);
15391540

src/Storages/MergeTree/MergeTask.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,7 +1524,7 @@ class MergePartsStep : public ITransformingStep
15241524
MergePartsStep(
15251525
const Header & input_header_,
15261526
const SortDescription & sort_description_,
1527-
const Names partition_key_columns_,
1527+
const Names partition_and_sorting_required_columns_,
15281528
const MergeTreeData::MergingParams & merging_params_,
15291529
const String & rows_sources_temporary_file_name_,
15301530
UInt64 merge_block_size_rows_,
@@ -1534,7 +1534,7 @@ class MergePartsStep : public ITransformingStep
15341534
time_t time_of_merge_)
15351535
: ITransformingStep(input_header_, input_header_, getTraits())
15361536
, sort_description(sort_description_)
1537-
, partition_key_columns(partition_key_columns_)
1537+
, partition_and_sorting_required_columns(partition_and_sorting_required_columns_)
15381538
, merging_params(merging_params_)
15391539
, rows_sources_temporary_file_name(rows_sources_temporary_file_name_)
15401540
, merge_block_size_rows(merge_block_size_rows_)
@@ -1588,7 +1588,7 @@ class MergePartsStep : public ITransformingStep
15881588

15891589
case MergeTreeData::MergingParams::Summing:
15901590
merged_transform = std::make_shared<SummingSortedTransform>(
1591-
header, input_streams_count, sort_description, merging_params.columns_to_sum, partition_key_columns, merge_block_size_rows, merge_block_size_bytes);
1591+
header, input_streams_count, sort_description, merging_params.columns_to_sum, partition_and_sorting_required_columns, merge_block_size_rows, merge_block_size_bytes);
15921592
break;
15931593

15941594
case MergeTreeData::MergingParams::Aggregating:
@@ -1651,7 +1651,7 @@ class MergePartsStep : public ITransformingStep
16511651
}
16521652

16531653
const SortDescription sort_description;
1654-
const Names partition_key_columns;
1654+
const Names partition_and_sorting_required_columns;
16551655
const MergeTreeData::MergingParams merging_params{};
16561656
const String rows_sources_temporary_file_name;
16571657
const UInt64 merge_block_size_rows;
@@ -1807,7 +1807,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
18071807
size_t sort_columns_size = sort_columns.size();
18081808
sort_description.reserve(sort_columns_size);
18091809

1810-
Names partition_key_columns = global_ctx->metadata_snapshot->getPartitionKey().column_names;
1810+
auto partition_and_sorting_required_columns = global_ctx->metadata_snapshot->getPartitionKey().expression->getRequiredColumns();
1811+
partition_and_sorting_required_columns.append_range(global_ctx->metadata_snapshot->getSortingKey().expression->getRequiredColumns());
18111812

18121813
for (size_t i = 0; i < sort_columns_size; ++i)
18131814
{
@@ -1829,7 +1830,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
18291830
auto merge_step = std::make_unique<MergePartsStep>(
18301831
merge_parts_query_plan.getCurrentHeader(),
18311832
sort_description,
1832-
partition_key_columns,
1833+
partition_and_sorting_required_columns,
18331834
global_ctx->merging_params,
18341835
(is_vertical_merge ? RowsSourcesTemporaryFile::FILE_ID : ""), /// rows_sources' temporary file is used only for vertical merge
18351836
(*merge_tree_settings)[MergeTreeSetting::merge_max_block_size],

src/Storages/MergeTree/MergeTreeData.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ namespace MergeTreeSetting
196196
extern const MergeTreeSettingsBool allow_nullable_key;
197197
extern const MergeTreeSettingsBool allow_remote_fs_zero_copy_replication;
198198
extern const MergeTreeSettingsBool allow_suspicious_indices;
199+
extern const MergeTreeSettingsBool allow_summing_columns_in_partition_or_order_key;
199200
extern const MergeTreeSettingsBool assign_part_uuids;
200201
extern const MergeTreeSettingsBool async_insert;
201202
extern const MergeTreeSettingsBool check_sample_column_is_correct;
@@ -556,7 +557,7 @@ MergeTreeData::MergeTreeData(
556557
setProperties(metadata_, metadata_, !sanity_checks);
557558

558559
/// NOTE: using the same columns list as is read when performing actual merges.
559-
merging_params.check(metadata_);
560+
merging_params.check(*settings, metadata_);
560561

561562
if (metadata_.sampling_key.definition_ast != nullptr)
562563
{
@@ -1123,7 +1124,7 @@ void MergeTreeData::checkStoragePolicy(const StoragePolicyPtr & new_storage_poli
11231124
}
11241125

11251126

1126-
void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadata) const
1127+
void MergeTreeData::MergingParams::check(const MergeTreeSettings & settings, const StorageInMemoryMetadata & metadata) const
11271128
{
11281129
const auto columns = metadata.getColumns().getAllPhysical();
11291130

@@ -1238,9 +1239,9 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
12381239

12391240
if (mode == MergingParams::Summing)
12401241
{
1241-
auto columns_to_sum_copy = columns_to_sum;
1242-
std::sort(columns_to_sum_copy.begin(), columns_to_sum_copy.end());
1243-
if (const auto it = std::adjacent_find(columns_to_sum_copy.begin(), columns_to_sum_copy.end()); it != columns_to_sum_copy.end())
1242+
auto columns_to_sum_sorted = columns_to_sum;
1243+
std::sort(columns_to_sum_sorted.begin(), columns_to_sum_sorted.end());
1244+
if (const auto it = std::adjacent_find(columns_to_sum_sorted.begin(), columns_to_sum_sorted.end()); it != columns_to_sum_sorted.end())
12441245
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} is listed multiple times in the list of columns to sum", *it);
12451246

12461247
/// If columns_to_sum are set, then check that such columns exist.
@@ -1257,20 +1258,39 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
12571258
column_to_sum);
12581259
}
12591260

1261+
auto allow_summing_columns_in_partition_or_order_key = settings[MergeTreeSetting::allow_summing_columns_in_partition_or_order_key];
1262+
12601263
/// Check that summing columns are not in partition key.
1261-
if (metadata.isPartitionKeyDefined())
1264+
if (!allow_summing_columns_in_partition_or_order_key && metadata.isPartitionKeyDefined())
12621265
{
1263-
auto partition_key_columns = metadata.getPartitionKey().column_names;
1266+
auto partition_key_columns = metadata.getPartitionKey().expression->getRequiredColumns();
1267+
std::sort(partition_key_columns.begin(), partition_key_columns.end());
12641268

12651269
Names names_intersection;
1266-
std::set_intersection(columns_to_sum.begin(), columns_to_sum.end(),
1270+
std::set_intersection(columns_to_sum_sorted.begin(), columns_to_sum_sorted.end(),
12671271
partition_key_columns.begin(), partition_key_columns.end(),
12681272
std::back_inserter(names_intersection));
12691273

12701274
if (!names_intersection.empty())
12711275
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Columns: {} listed both in columns to sum and in partition key. "
12721276
"That is not allowed.", boost::algorithm::join(names_intersection, ", "));
12731277
}
1278+
1279+
/// Check that summing columns are not in sorting key.
1280+
if (!allow_summing_columns_in_partition_or_order_key && metadata.isSortingKeyDefined())
1281+
{
1282+
auto sorting_key_columns = metadata.getSortingKey().expression->getRequiredColumns();
1283+
std::sort(sorting_key_columns.begin(), sorting_key_columns.end());
1284+
1285+
Names names_intersection;
1286+
std::set_intersection(columns_to_sum_sorted.begin(), columns_to_sum_sorted.end(),
1287+
sorting_key_columns.begin(), sorting_key_columns.end(),
1288+
std::back_inserter(names_intersection));
1289+
1290+
if (!names_intersection.empty())
1291+
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Columns: {} listed both in columns to sum and in sorting key. "
1292+
"That is not allowed.", boost::algorithm::join(names_intersection, ", "));
1293+
}
12741294
}
12751295

12761296
if (mode == MergingParams::Replacing)

src/Storages/MergeTree/MergeTreeData.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ class MergeTreeData : public IStorage, public WithMutableContext
368368
Graphite::Params graphite_params;
369369

370370
/// Check that needed columns are present and have correct types.
371-
void check(const StorageInMemoryMetadata & metadata) const;
371+
void check(const MergeTreeSettings & settings, const StorageInMemoryMetadata & metadata) const;
372372

373373
String getModeName() const;
374374
};

0 commit comments

Comments
 (0)