Skip to content

Commit 48a3233

Browse files
authored
Merge pull request ClickHouse#79945 from Algunenano/micro_clickbench
Speedup queries with trivial count optimization
2 parents 1f93c8a + ad18f77 commit 48a3233

File tree

8 files changed

+134
-48
lines changed

8 files changed

+134
-48
lines changed

src/Common/OpenTelemetryTraceContext.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ struct TracingContextHolder
153153
using TracingContextHolderPtr = std::unique_ptr<TracingContextHolder>;
154154

155155
/// A span holder that creates span automatically in a (function) scope if tracing is enabled.
156-
/// Once it's created or destructed, it automatically maitains the tracing context on the thread that it lives.
156+
/// Once it's created or destructed, it automatically maintains the tracing context on the thread that it lives.
157157
struct SpanHolder : public Span
158158
{
159159
explicit SpanHolder(std::string_view, SpanKind _kind = SpanKind::INTERNAL);

src/Core/SettingsQuirks.cpp

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,14 @@ namespace Setting
5252
{
5353
extern const SettingsBool async_query_sending_for_remote;
5454
extern const SettingsBool async_socket_for_remote;
55+
extern const SettingsUInt64 input_format_parquet_max_block_size;
56+
extern const SettingsUInt64 max_block_size;
57+
extern const SettingsUInt64 max_insert_block_size;
58+
extern const SettingsUInt64 min_insert_block_size_rows;
59+
extern const SettingsUInt64 min_insert_block_size_bytes_for_materialized_views;
60+
extern const SettingsUInt64 min_external_table_block_size_rows;
61+
extern const SettingsUInt64 max_joined_block_size_rows;
62+
extern const SettingsMaxThreads max_threads;
5563
extern const SettingsUInt64 query_profiler_cpu_time_period_ns;
5664
extern const SettingsUInt64 query_profiler_real_time_period_ns;
5765
extern const SettingsBool use_hedged_requests;
@@ -101,51 +109,43 @@ void applySettingsQuirks(Settings & settings, LoggerPtr log)
101109

102110
void doSettingsSanityCheckClamp(Settings & current_settings, LoggerPtr log)
103111
{
104-
auto get_current_value = [&current_settings](const std::string_view name) -> Field
105-
{
106-
Field current_value;
107-
bool has_current_value = current_settings.tryGet(name, current_value);
108-
chassert(has_current_value);
109-
return current_value;
110-
};
111-
112-
UInt64 max_threads = get_current_value("max_threads").safeGet<UInt64>();
112+
UInt64 max_threads = current_settings[Setting::max_threads];
113113
UInt64 max_threads_max_value = 256 * getNumberOfCPUCoresToUse();
114114
if (max_threads > max_threads_max_value)
115115
{
116116
if (log)
117117
LOG_WARNING(log, "Sanity check: Too many threads requested ({}). Reduced to {}", max_threads, max_threads_max_value);
118-
current_settings.set("max_threads", max_threads_max_value);
118+
current_settings[Setting::max_threads] = max_threads_max_value;
119119
}
120120

121121
static constexpr UInt64 max_sane_block_rows_size = 4294967296; // 2^32
122122

123123
using namespace std::literals;
124-
static constexpr std::array block_rows_settings{
125-
"max_block_size"sv,
126-
"max_insert_block_size"sv,
127-
"min_insert_block_size_rows"sv,
128-
"min_insert_block_size_bytes_for_materialized_views"sv,
129-
"min_external_table_block_size_rows"sv,
130-
"max_joined_block_size_rows"sv,
131-
"input_format_parquet_max_block_size"sv};
132-
133-
for (auto const setting : block_rows_settings)
134-
{
135-
if (auto block_size = get_current_value(setting).safeGet<UInt64>();
136-
block_size > max_sane_block_rows_size)
137-
{
138-
if (log)
139-
LOG_WARNING(log, "Sanity check: '{}' value is too high ({}). Reduced to {}", setting, block_size, max_sane_block_rows_size);
140-
current_settings.set(setting, max_sane_block_rows_size);
141-
}
124+
#define CHECK_MAX_VALUE(SETTING_VALUE) \
125+
if (UInt64 block_size = current_settings[Setting::SETTING_VALUE]; block_size > max_sane_block_rows_size) \
126+
{ \
127+
if (log) \
128+
LOG_WARNING( \
129+
log, "Sanity check: '{}' value is too high ({}). Reduced to {}", #SETTING_VALUE, block_size, max_sane_block_rows_size); \
130+
current_settings[Setting::SETTING_VALUE] = max_sane_block_rows_size; \
142131
}
143132

144-
if (auto max_block_size = get_current_value("max_block_size").safeGet<UInt64>(); max_block_size == 0)
133+
CHECK_MAX_VALUE(max_block_size)
134+
CHECK_MAX_VALUE(max_insert_block_size)
135+
CHECK_MAX_VALUE(min_insert_block_size_rows)
136+
CHECK_MAX_VALUE(min_insert_block_size_bytes_for_materialized_views)
137+
CHECK_MAX_VALUE(min_external_table_block_size_rows)
138+
CHECK_MAX_VALUE(max_joined_block_size_rows)
139+
CHECK_MAX_VALUE(input_format_parquet_max_block_size)
140+
141+
#undef CHECK_MAX_VALUE
142+
143+
144+
if (auto max_block_size = current_settings[Setting::max_block_size]; max_block_size == 0)
145145
{
146146
if (log)
147147
LOG_WARNING(log, "Sanity check: 'max_block_size' cannot be 0. Set to default value {}", DEFAULT_BLOCK_SIZE);
148-
current_settings.set("max_block_size", DEFAULT_BLOCK_SIZE);
148+
current_settings[Setting::max_block_size] = DEFAULT_BLOCK_SIZE;
149149
}
150150
}
151151

src/Interpreters/executeQuery.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ void logQueryFinishImpl(
632632

633633
}
634634

635-
if (query_span)
635+
if (query_span && query_span->isTraceEnabled())
636636
{
637637
query_span->addAttribute("db.statement", elem.query);
638638
query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id);

src/Planner/Planner.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
#include <Storages/StorageDummy.h>
5252
#include <Storages/StorageMerge.h>
5353

54+
#include <AggregateFunctions/IAggregateFunction.h>
55+
5456
#include <Analyzer/Utils.h>
5557
#include <Analyzer/ColumnNode.h>
5658
#include <Analyzer/ConstantNode.h>
@@ -564,13 +566,21 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
564566
* but it can work more slowly.
565567
*/
566568

567-
auto keys = aggregation_analysis_result.aggregation_keys;
569+
const auto & keys = aggregation_analysis_result.aggregation_keys;
570+
571+
/// For count() without parameters try to use just one thread
572+
/// Typically this will either be a trivial count or a really small number of states
573+
size_t max_threads = settings[Setting::max_threads];
574+
if (keys.empty() && aggregation_analysis_result.aggregate_descriptions.size() == 1
575+
&& aggregation_analysis_result.aggregate_descriptions[0].function->getName() == String{"count"}
576+
&& aggregation_analysis_result.grouping_sets_parameters_list.empty())
577+
max_threads = 1;
568578

569579
Aggregator::Params params(
570580
keys,
571581
aggregation_analysis_result.aggregate_descriptions,
572582
query_analysis_result.aggregate_overflow_row,
573-
settings[Setting::max_threads],
583+
max_threads,
574584
settings[Setting::max_block_size],
575585
settings[Setting::min_hit_rate_to_use_consecutive_keys_optimization]);
576586

@@ -1317,7 +1327,7 @@ void Planner::buildQueryPlanIfNeeded()
13171327
return;
13181328

13191329
LOG_TRACE(
1320-
getLogger("Planner"),
1330+
log,
13211331
"Query to stage {}{}",
13221332
QueryProcessingStage::toString(select_query_options.to_stage),
13231333
select_query_options.only_analyze ? " only analyze" : "");
@@ -1483,7 +1493,7 @@ void Planner::buildPlanForQueryNode()
14831493

14841494
auto & mutable_context = planner_context->getMutableQueryContext();
14851495
mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
1486-
LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas to execute a query with IN with subquery");
1496+
LOG_DEBUG(log, "Disabling parallel replicas to execute a query with IN with subquery");
14871497
}
14881498
}
14891499

@@ -1520,9 +1530,7 @@ void Planner::buildPlanForQueryNode()
15201530
if (settings[Setting::allow_experimental_parallel_reading_from_replicas] >= 2)
15211531
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas");
15221532

1523-
LOG_DEBUG(
1524-
getLogger("Planner"),
1525-
"FINAL modifier is not supported with parallel replicas. Query will be executed without using them.");
1533+
LOG_DEBUG(log, "FINAL modifier is not supported with parallel replicas. Query will be executed without using them.");
15261534
auto & mutable_context = planner_context->getMutableQueryContext();
15271535
mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
15281536
}
@@ -1537,7 +1545,7 @@ void Planner::buildPlanForQueryNode()
15371545
if (settings[Setting::allow_experimental_parallel_reading_from_replicas] >= 2)
15381546
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOINs are not supported with parallel replicas");
15391547

1540-
LOG_DEBUG(getLogger("Planner"), "JOINs are not supported with parallel replicas. Query will be executed without using them.");
1548+
LOG_DEBUG(log, "JOINs are not supported with parallel replicas. Query will be executed without using them.");
15411549

15421550
auto & mutable_context = planner_context->getMutableQueryContext();
15431551
mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0));
@@ -1568,7 +1576,7 @@ void Planner::buildPlanForQueryNode()
15681576
query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
15691577

15701578
LOG_TRACE(
1571-
getLogger("Planner"),
1579+
log,
15721580
"Query from stage {} to stage {}{}",
15731581
QueryProcessingStage::toString(from_stage),
15741582
QueryProcessingStage::toString(select_query_options.to_stage),

src/Planner/Planner.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class Planner
7575

7676
void buildPlanForQueryNode();
7777

78+
LoggerPtr log = getLogger("Planner");
7879
QueryTreeNodePtr query_tree;
7980
SelectQueryOptions & select_query_options;
8081
PlannerContextPtr planner_context;

tests/queries/0_stateless/00172_early_constant_folding.reference

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@ ExpressionTransform × 10
88
(ReadFromPreparedSource)
99
SourceFromSingleChunk 0 → 1
1010
(Expression)
11-
ExpressionTransform × 10
11+
ExpressionTransform
1212
(MergingAggregated)
13-
Resize 1 → 10
14-
MergingAggregatedTransform
15-
(Expression)
16-
ExpressionTransform
17-
(ReadFromPreparedSource)
18-
SourceFromSingleChunk 0 → 1
13+
MergingAggregatedTransform
14+
(Expression)
15+
ExpressionTransform
16+
(ReadFromPreparedSource)
17+
SourceFromSingleChunk 0 → 1
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
-- { echo On }
2+
-- We should use just a single thread to merge the state of trivial count
3+
EXPLAIN PIPELINE SELECT count() FROM trivial_count;
4+
(Expression)
5+
ExpressionTransform
6+
(MergingAggregated)
7+
MergingAggregatedTransform
8+
(Expression)
9+
ExpressionTransform
10+
(ReadFromPreparedSource)
11+
SourceFromSingleChunk 0 → 1
12+
-- But not if we are filtering or doing other operations (no trivial count)
13+
EXPLAIN PIPELINE SELECT count() FROM trivial_count WHERE number % 3 = 2;
14+
(Expression)
15+
ExpressionTransform × 4
16+
(Aggregating)
17+
Resize 1 → 4
18+
AggregatingTransform
19+
(Expression)
20+
ExpressionTransform
21+
(Filter)
22+
FilterTransform
23+
(ReadFromMergeTree)
24+
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
25+
EXPLAIN PIPELINE SELECT count() FROM trivial_count GROUP BY number % 10;
26+
(Expression)
27+
ExpressionTransform × 4
28+
(Aggregating)
29+
Resize 1 → 4
30+
AggregatingTransform
31+
(Expression)
32+
ExpressionTransform
33+
(ReadFromMergeTree)
34+
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
35+
-- Other aggregations should still use as many threads as necessary
36+
EXPLAIN PIPELINE SELECT sum(number) FROM trivial_count;
37+
(Expression)
38+
ExpressionTransform × 4
39+
(Aggregating)
40+
Resize 1 → 4
41+
AggregatingTransform
42+
(Expression)
43+
ExpressionTransform
44+
(ReadFromMergeTree)
45+
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
46+
EXPLAIN PIPELINE SELECT count(), sum(number) FROM trivial_count;
47+
(Expression)
48+
ExpressionTransform × 4
49+
(Aggregating)
50+
Resize 1 → 4
51+
AggregatingTransform
52+
(Expression)
53+
ExpressionTransform
54+
(ReadFromMergeTree)
55+
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
56+
DROP TABLE IF EXISTS trivial_count;
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- Tags: no-object-storage
2+
-- no-object-storage since the output of the pipeline depends on the read method
3+
4+
SET enable_analyzer = 1;
5+
SET max_threads=4;
6+
7+
DROP TABLE IF EXISTS trivial_count;
8+
CREATE TABLE trivial_count ENGINE = MergeTree() ORDER BY number AS Select * from numbers(10) ;
9+
10+
-- { echo On }
11+
-- We should use just a single thread to merge the state of trivial count
12+
EXPLAIN PIPELINE SELECT count() FROM trivial_count;
13+
14+
-- But not if we are filtering or doing other operations (no trivial count)
15+
EXPLAIN PIPELINE SELECT count() FROM trivial_count WHERE number % 3 = 2;
16+
EXPLAIN PIPELINE SELECT count() FROM trivial_count GROUP BY number % 10;
17+
18+
-- Other aggregations should still use as many threads as necessary
19+
EXPLAIN PIPELINE SELECT sum(number) FROM trivial_count;
20+
EXPLAIN PIPELINE SELECT count(), sum(number) FROM trivial_count;
21+
22+
DROP TABLE IF EXISTS trivial_count;

0 commit comments

Comments
 (0)