Skip to content

Commit 09f82bb

Browse files
authored
Merge pull request ClickHouse#77356 from rschu1ze/fixups-qc-cache
Follow-up for query condition cache
2 parents f9b4065 + 66ef6b9 commit 09f82bb

32 files changed

+298
-130
lines changed

src/Common/ProfileEvents.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,10 @@
7575
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
7676
M(PrimaryIndexCacheHits, "Number of times an entry has been found in the primary index cache, so we didn't have to load a index file.", ValueType::Number) \
7777
M(PrimaryIndexCacheMisses, "Number of times an entry has not been found in the primary index cache, so we had to load a index file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
78-
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
79-
M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
8078
M(QueryConditionCacheHits, "Number of times an entry has been found in the query condition cache (and reading of marks can be skipped). Only updated for SELECT queries with SETTING use_query_condition_cache = 1.", ValueType::Number) \
8179
M(QueryConditionCacheMisses, "Number of times an entry has not been found in the query condition cache (and reading of mark cannot be skipped). Only updated for SELECT queries with SETTING use_query_condition_cache = 1.", ValueType::Number) \
80+
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
81+
M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
8282
M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).", ValueType::Number) \
8383
M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).", ValueType::Number) \
8484
M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.", ValueType::Number) \

src/Interpreters/Cache/QueryConditionCache.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include <Interpreters/Cache/QueryConditionCache.h>
2-
#include <Storages/MergeTree/MergeTreeData.h>
2+
#include <Common/ProfileEvents.h>
3+
#include <Common/SipHash.h>
4+
#include <Common/logger_useful.h>
5+
#include <IO/WriteHelpers.h>
36

47
namespace ProfileEvents
58
{
@@ -16,10 +19,10 @@ QueryConditionCache::QueryConditionCache(const String & cache_policy, size_t max
1619
}
1720

1821
void QueryConditionCache::write(
19-
const UUID & table_id, const String & part_name, size_t predicate_hash,
22+
const UUID & table_id, const String & part_name, size_t condition_hash,
2023
const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark)
2124
{
22-
Key key = {table_id, part_name, predicate_hash};
25+
Key key = {table_id, part_name, condition_hash};
2326

2427
auto load_func = [&](){ return std::make_shared<Entry>(marks_count); };
2528
auto [entry, inserted] = cache.getOrSet(key, load_func);
@@ -38,33 +41,33 @@ void QueryConditionCache::write(
3841

3942
LOG_DEBUG(
4043
logger,
41-
"{} entry for table_id: {}, part_name: {}, predicate_hash: {}, marks_count: {}, has_final_mark: {}, ranges: {}",
44+
"{} entry for table_id: {}, part_name: {}, condition_hash: {}, marks_count: {}, has_final_mark: {}, ranges: {}",
4245
inserted ? "Inserted" : "Updated",
4346
table_id,
4447
part_name,
45-
predicate_hash,
48+
condition_hash,
4649
marks_count,
4750
has_final_mark,
4851
toString(mark_ranges));
4952
}
5053
}
5154

52-
std::optional<QueryConditionCache::MatchingMarks> QueryConditionCache::read(const UUID & table_id, const String & part_name, size_t predicate_hash)
55+
std::optional<QueryConditionCache::MatchingMarks> QueryConditionCache::read(const UUID & table_id, const String & part_name, size_t condition_hash)
5356
{
54-
Key key = {table_id, part_name, predicate_hash};
57+
Key key = {table_id, part_name, condition_hash};
5558

5659
if (auto entry = cache.get(key))
5760
{
5861
ProfileEvents::increment(ProfileEvents::QueryConditionCacheHits);
5962

60-
std::lock_guard lock(entry->mutex);
63+
std::shared_lock lock(entry->mutex);
6164

6265
LOG_DEBUG(
6366
logger,
64-
"Read entry for table_uuid: {}, part: {}, predicate_hash: {}, ranges: {}",
67+
"Read entry for table_uuid: {}, part: {}, condition_hash: {}, ranges: {}",
6568
table_id,
6669
part_name,
67-
predicate_hash,
70+
condition_hash,
6871
toString(entry->matching_marks));
6972

7073
return {entry->matching_marks};
@@ -75,16 +78,21 @@ std::optional<QueryConditionCache::MatchingMarks> QueryConditionCache::read(cons
7578

7679
LOG_DEBUG(
7780
logger,
78-
"Could not find entry for table_uuid: {}, part: {}, predicate_hash: {}",
81+
"Could not find entry for table_uuid: {}, part: {}, condition_hash: {}",
7982
table_id,
8083
part_name,
81-
predicate_hash);
84+
condition_hash);
8285

8386
return {};
8487
}
8588

8689
}
8790

91+
std::vector<QueryConditionCache::Cache::KeyMapped> QueryConditionCache::dump() const
92+
{
93+
return cache.dump();
94+
}
95+
8896
void QueryConditionCache::clear()
8997
{
9098
cache.clear();
@@ -99,7 +107,7 @@ bool QueryConditionCache::Key::operator==(const Key & other) const
99107
{
100108
return table_id == other.table_id
101109
&& part_name == other.part_name
102-
&& predicate_hash == other.predicate_hash;
110+
&& condition_hash == other.condition_hash;
103111
}
104112

105113
QueryConditionCache::Entry::Entry(size_t mark_count)
@@ -112,7 +120,7 @@ size_t QueryConditionCache::KeyHasher::operator()(const Key & key) const
112120
SipHash hash;
113121
hash.update(key.table_id);
114122
hash.update(key.part_name);
115-
hash.update(key.predicate_hash);
123+
hash.update(key.condition_hash);
116124
return hash.get64();
117125
}
118126

src/Interpreters/Cache/QueryConditionCache.h

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <Common/CacheBase.h>
44
#include <Storages/MergeTree/MarkRange.h>
5+
#include <shared_mutex>
56

67
namespace DB
78
{
@@ -19,35 +20,21 @@ class QueryConditionCache
1920
/// True means at least one row in the mark matches the predicate. We need to read such marks.
2021
using MatchingMarks = std::vector<bool>;
2122

22-
QueryConditionCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio);
23-
24-
/// Add an entry to the cache. The passed marks represent ranges of the column with matches of the predicate.
25-
void write(
26-
const UUID & table_id, const String & part_name, size_t predicate_hash,
27-
const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark);
28-
29-
/// Check the cache if it contains an entry for the given table + part id and predicate hash.
30-
std::optional<MatchingMarks> read(const UUID & table_id, const String & part_name, size_t predicate_hash);
31-
32-
void clear();
33-
34-
void setMaxSizeInBytes(size_t max_size_in_bytes);
35-
3623
private:
3724
/// Key + entry represent a mark range result.
3825
struct Key
3926
{
4027
const UUID table_id;
4128
const String part_name;
42-
const size_t predicate_hash;
29+
const size_t condition_hash;
4330

4431
bool operator==(const Key & other) const;
4532
};
4633

4734
struct Entry
4835
{
4936
MatchingMarks matching_marks;
50-
std::mutex mutex; /// (*)
37+
std::shared_mutex mutex; /// (*)
5138

5239
explicit Entry(size_t mark_count);
5340
};
@@ -67,9 +54,28 @@ class QueryConditionCache
6754
size_t operator()(const Entry & entry) const;
6855
};
6956

57+
public:
7058
using Cache = CacheBase<Key, Entry, KeyHasher, QueryConditionCacheEntryWeight>;
71-
Cache cache;
7259

60+
QueryConditionCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio);
61+
62+
/// Add an entry to the cache. The passed marks represent ranges of the column with matches of the predicate.
63+
void write(
64+
const UUID & table_id, const String & part_name, size_t condition_hash,
65+
const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark);
66+
67+
/// Check the cache if it contains an entry for the given table + part id and predicate hash.
68+
std::optional<MatchingMarks> read(const UUID & table_id, const String & part_name, size_t condition_hash);
69+
70+
/// For debugging and system tables
71+
std::vector<QueryConditionCache::Cache::KeyMapped> dump() const;
72+
73+
void clear();
74+
75+
void setMaxSizeInBytes(size_t max_size_in_bytes);
76+
77+
private:
78+
Cache cache;
7379
LoggerPtr logger = getLogger("QueryConditionCache");
7480
};
7581

src/Processors/Chunk.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
#include <Columns/IColumn_fwd.h>
44
#include <Common/CollectionOfDerived.h>
5-
#include <Columns/IColumn.h>
5+
#include <Core/Types_fwd.h>
66
#include <Storages/MergeTree/MarkRange.h>
77

88
#include <memory>
@@ -158,21 +158,23 @@ class AsyncInsertInfo : public ChunkInfoCloneable<AsyncInsertInfo>
158158

159159
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
160160

161-
class IMergeTreeDataPart;
162-
163-
/// The query condition cache needs to know the mark ranges of which part the chunk data comes from.
161+
/// Lineage information: from which table, part and mark range does the chunk come from?
162+
/// This information is needed by the query condition cache.
164163
class MarkRangesInfo : public ChunkInfoCloneable<MarkRangesInfo>
165164
{
166165
public:
167-
MarkRangesInfo(std::shared_ptr<const IMergeTreeDataPart> data_part_, MarkRanges mark_ranges_)
168-
: data_part(data_part_)
166+
MarkRangesInfo(UUID table_uuid_, const String & part_name_, size_t marks_count_, bool has_final_mark_, MarkRanges mark_ranges_)
167+
: table_uuid(table_uuid_)
168+
, part_name(part_name_)
169+
, marks_count(marks_count_)
170+
, has_final_mark(has_final_mark_)
169171
, mark_ranges(std::move(mark_ranges_))
170172
{}
171173

172-
std::shared_ptr<const IMergeTreeDataPart> getDataPart() const { return data_part; }
173-
const MarkRanges & getMarkRanges() const { return mark_ranges; }
174-
private:
175-
std::shared_ptr<const IMergeTreeDataPart> data_part;
174+
UUID table_uuid;
175+
String part_name;
176+
size_t marks_count;
177+
bool has_final_mark;
176178
MarkRanges mark_ranges;
177179
};
178180

src/Processors/QueryPlan/FilterStep.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,11 +173,7 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
173173
pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type)
174174
{
175175
bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals;
176-
if (condition_hash)
177-
return std::make_shared<FilterTransform>(header, expression, filter_column_name, remove_filter_column,
178-
on_totals, nullptr, condition_hash);
179-
180-
return std::make_shared<FilterTransform>(header, expression, filter_column_name, remove_filter_column, on_totals);
176+
return std::make_shared<FilterTransform>(header, expression, filter_column_name, remove_filter_column, on_totals, nullptr, condition_hash);
181177
});
182178

183179
if (!blocksHaveEqualStructure(pipeline.getHeader(), *output_header))
@@ -252,7 +248,7 @@ void FilterStep::updateOutputHeader()
252248
return;
253249
}
254250

255-
void FilterStep::setQueryConditionKey(size_t condition_hash_)
251+
void FilterStep::setQueryConditionHash(size_t condition_hash_)
256252
{
257253
condition_hash = condition_hash_;
258254
}

src/Processors/QueryPlan/FilterStep.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#pragma once
22
#include <Processors/QueryPlan/ITransformingStep.h>
33
#include <Interpreters/ActionsDAG.h>
4-
#include <Interpreters/Cache/QueryConditionCache.h>
54

65
namespace DB
76
{
@@ -26,7 +25,8 @@ class FilterStep : public ITransformingStep
2625
ActionsDAG & getExpression() { return actions_dag; }
2726
const String & getFilterColumnName() const { return filter_column_name; }
2827
bool removesFilterColumn() const { return remove_filter_column; }
29-
void setQueryConditionKey(size_t condition_hash_);
28+
29+
void setQueryConditionHash(size_t condition_hash_);
3030

3131
static bool canUseType(const DataTypePtr & type);
3232

src/Processors/QueryPlan/Optimizations/Optimizations.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ bool optimizeJoinLegacy(QueryPlan::Node & node, QueryPlan::Nodes &, const QueryP
128128
bool optimizeJoinLogical(QueryPlan::Node & node, QueryPlan::Nodes &, const QueryPlanOptimizationSettings &);
129129
bool convertLogicalJoinToPhysical(QueryPlan::Node & node, QueryPlan::Nodes &, const QueryPlanOptimizationSettings & optimization_settings);
130130
void optimizeDistinctInOrder(QueryPlan::Node & node, QueryPlan::Nodes &);
131-
void tryUpdateQueryConditionCache(const QueryPlanOptimizationSettings & optimization_settings, const Stack & stack);
131+
void updateQueryConditionCache(const Stack & stack, const QueryPlanOptimizationSettings & optimization_settings);
132132

133133
// Should be called once the query plan tree structure is finalized, i.e. no nodes addition, deletion or pushing down should happen after that call.
134134
// Since those hashes are used for join optimization, the calculation performed before join optimization.

src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ namespace DB
88
namespace Setting
99
{
1010
extern const SettingsBool allow_aggregate_partitions_independently;
11+
extern const SettingsBool allow_experimental_analyzer;
1112
extern const SettingsBool force_optimize_projection;
1213
extern const SettingsBool optimize_aggregation_in_order;
1314
extern const SettingsBool optimize_distinct_in_order;
@@ -30,16 +31,15 @@ namespace Setting
3031
extern const SettingsBool query_plan_remove_redundant_distinct;
3132
extern const SettingsBool query_plan_remove_redundant_sorting;
3233
extern const SettingsBool query_plan_reuse_storage_ordering_for_window_functions;
33-
extern const SettingsBoolAuto query_plan_join_swap_table;
3434
extern const SettingsBool query_plan_split_filter;
3535
extern const SettingsBool query_plan_try_use_vector_search;
36+
extern const SettingsBool use_query_condition_cache;
37+
extern const SettingsBoolAuto query_plan_join_swap_table;
38+
extern const SettingsMaxThreads max_threads;
39+
extern const SettingsSeconds lock_acquire_timeout;
3640
extern const SettingsString force_optimize_projection_name;
3741
extern const SettingsUInt64 max_limit_for_ann_queries;
38-
extern const SettingsSeconds lock_acquire_timeout;
39-
extern const SettingsMaxThreads max_threads;
4042
extern const SettingsUInt64 query_plan_max_optimizations_to_apply;
41-
extern const SettingsBool use_query_condition_cache;
42-
extern const SettingsBool allow_experimental_analyzer;
4343
}
4444

4545
namespace ServerSetting
@@ -81,6 +81,7 @@ QueryPlanOptimizationSettings::QueryPlanOptimizationSettings(
8181
optimize_sorting_by_input_stream_properties = from[Setting::query_plan_enable_optimizations] && from[Setting::optimize_sorting_by_input_stream_properties];
8282
aggregation_in_order = from[Setting::query_plan_enable_optimizations] && from[Setting::optimize_aggregation_in_order] && from[Setting::query_plan_aggregation_in_order];
8383
optimize_projection = from[Setting::optimize_use_projections];
84+
use_query_condition_cache = from[Setting::use_query_condition_cache] && from[Setting::allow_experimental_analyzer];
8485

8586
optimize_use_implicit_projections = optimize_projection && from[Setting::optimize_use_implicit_projections];
8687
force_use_projection = optimize_projection && from[Setting::force_optimize_projection];
@@ -90,7 +91,6 @@ QueryPlanOptimizationSettings::QueryPlanOptimizationSettings(
9091

9192
/// These settings comes from EXPLAIN settings not query settings and outside of the scope of this class
9293
keep_logical_steps = false;
93-
use_query_condition_cache = from[Setting::use_query_condition_cache] && from[Setting::allow_experimental_analyzer];
9494
is_explain = false;
9595

9696
max_entries_for_hash_table_stats = max_entries_for_hash_table_stats_;

src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ struct QueryPlanOptimizationSettings
6161
bool optimize_sorting_by_input_stream_properties;
6262
bool aggregation_in_order;
6363
bool optimize_projection;
64+
bool use_query_condition_cache = false;
6465

6566
/// --- Third-pass optimizations (Processors/QueryPlan/QueryPlan.cpp)
6667
bool build_sets = true; /// this one doesn't have a corresponding setting
@@ -91,8 +92,6 @@ struct QueryPlanOptimizationSettings
9192

9293
bool keep_logical_steps;
9394

94-
/// If query condition cache is enabled, the query condition cache needs to be updated in the WHERE stage.
95-
bool use_query_condition_cache = false;
9695
bool is_explain;
9796
};
9897

src/Processors/QueryPlan/Optimizations/optimizeTree.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
124124
{
125125
optimizePrimaryKeyConditionAndLimit(stack);
126126

127-
tryUpdateQueryConditionCache(optimization_settings, stack);
127+
updateQueryConditionCache(stack, optimization_settings);
128128

129129
/// NOTE: optimizePrewhere can modify the stack.
130130
/// Prewhere optimization relies on PK optimization (getConditionSelectivityEstimatorByPredicate)

0 commit comments

Comments
 (0)