Skip to content

Commit e516076

Browse files
authored
Merge pull request ClickHouse#78671 from rschu1ze/qcc_plaintext_condition
`system.query_condition_cache`: Add field for plaintext condition
2 parents 8d46a90 + e6a34c9 commit e516076

20 files changed

+129
-32
lines changed

docs/en/operations/query-condition-cache.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ To clear the query condition cache, run [`SYSTEM DROP QUERY CONDITION CACHE`](..
5858

5959
The content of the cache is displayed in system table [system.query_condition_cache](system-tables/query_condition_cache.md).
6060
To calculate the current size of the query condition cache in MB, run `SELECT formatReadableSize(sum(entry_size)) FROM system.query_condition_cache`.
61+
If you like to investigate individual filter conditions, you can check field `condition` in `system.query_condition_cache`.
62+
Note that the field is only populated if the query runs with enabled setting [query_condition_cache_store_conditions_as_plaintext](settings/settings#query_condition_cache_store_conditions_as_plaintext).
6163

6264
The number of query condition cache hits and misses since database start are shown as events "QueryConditionCacheHits" and "QueryConditionCacheMisses" in system table [system.events](system-tables/events.md).
6365
Both counters are only updated for `SELECT` queries which run with setting `use_query_condition_cache = true`, other queries do not affect "QueryCacheMisses".

docs/en/operations/system-tables/query_condition_cache.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ Columns:
1717

1818
- `table_uuid` ([String](../../sql-reference/data-types/string.md)) — The table UUID.
1919
- `part_name` ([String](../../sql-reference/data-types/string.md)) — The part name.
20-
- `key_hash` ([String](/sql-reference/data-types/string.md)) — The hash of the filter condition.
20+
- `condition` ([String](/sql-reference/data-types/string.md)) — The hashed filter condition. Only set if setting query_condition_cache_store_conditions_as_plaintext = true.
21+
- `condition_hash` ([String](/sql-reference/data-types/string.md)) — The hash of the filter condition.
2122
- `entry_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the entry in bytes.
2223
- `matching_marks` ([String](../../sql-reference/data-types/string.md)) — Matching marks.
2324

@@ -32,7 +33,8 @@ Row 1:
3233
──────
3334
table_uuid: 28270a24-ea27-49f6-99cd-97b9bee976ac
3435
part_name: all_1_1_0
35-
key_hash: 5456494897146899690 -- 5.46 quintillion
36+
condition: or(equals(b, 10000_UInt16), equals(c, 10000_UInt16))
37+
condition_hash: 5456494897146899690 -- 5.46 quintillion
3638
entry_size: 40
3739
matching_marks: 111111110000000000000000000000000000000000000000000000000111111110000000000000000
3840

src/Core/Settings.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4831,6 +4831,16 @@ and reuse this information as an ephemeral index for subsequent queries.
48314831
48324832
Possible values:
48334833
4834+
- 0 - Disabled
4835+
- 1 - Enabled
4836+
)", 0) \
4837+
DECLARE(Bool, query_condition_cache_store_conditions_as_plaintext, false, R"(
4838+
Stores the filter condition for the [query condition cache](/operations/query-condition-cache) in plaintext.
4839+
If enabled, system.query_condition_cache shows the verbatim filter condition which makes it easier to debug issues with the cache.
4840+
Disabled by default because plaintext filter conditions may expose sensitive information.
4841+
4842+
Possible values:
4843+
48344844
- 0 - Disabled
48354845
- 1 - Enabled
48364846
)", 0) \

src/Core/SettingsChangesHistory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ const VersionToSettingsChangesMap & getSettingsChangesHistory()
8383
{"allow_special_bool_values_inside_variant", true, false, "Don't allow special bool values during Variant type parsing"},
8484
{"cast_string_to_variant_use_inference", true, true, "New setting to enable/disable types inference during CAST from String to Variant"},
8585
{"distributed_cache_read_request_max_tries", 20, 20, "New setting"},
86+
{"query_condition_cache_store_conditions_as_plaintext", false, false, "New setting"},
8687
{"min_os_cpu_wait_time_ratio_to_throw", 0, 2, "New setting"},
8788
{"max_os_cpu_wait_time_ratio_to_throw", 0, 6, "New setting"},
8889
/// Release closed. Please use 25.5

src/Interpreters/Cache/QueryConditionCache.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ QueryConditionCache::QueryConditionCache(const String & cache_policy, size_t max
1919
}
2020

2121
void QueryConditionCache::write(
22-
const UUID & table_id, const String & part_name, size_t condition_hash,
22+
const UUID & table_id, const String & part_name, size_t condition_hash, const String & condition,
2323
const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark)
2424
{
25-
Key key = {table_id, part_name, condition_hash};
25+
Key key = {table_id, part_name, condition_hash, condition};
2626

2727
auto load_func = [&](){ return std::make_shared<Entry>(marks_count); };
2828
auto [entry, inserted] = cache.getOrSet(key, load_func);
@@ -40,19 +40,20 @@ void QueryConditionCache::write(
4040

4141
LOG_DEBUG(
4242
logger,
43-
"{} entry for table_id: {}, part_name: {}, condition_hash: {}, marks_count: {}, has_final_mark: {}, ranges: {}",
43+
"{} entry for table_id: {}, part_name: {}, condition_hash: {}, condition: {}, marks_count: {}, has_final_mark: {}, ranges: {}",
4444
inserted ? "Inserted" : "Updated",
4545
table_id,
4646
part_name,
4747
condition_hash,
48+
condition,
4849
marks_count,
4950
has_final_mark,
5051
toString(mark_ranges));
5152
}
5253

5354
std::optional<QueryConditionCache::MatchingMarks> QueryConditionCache::read(const UUID & table_id, const String & part_name, size_t condition_hash)
5455
{
55-
Key key = {table_id, part_name, condition_hash};
56+
Key key = {table_id, part_name, condition_hash, ""};
5657

5758
if (auto entry = cache.get(key))
5859
{
@@ -130,7 +131,7 @@ size_t QueryConditionCache::KeyHasher::operator()(const Key & key) const
130131
size_t QueryConditionCache::QueryConditionCacheEntryWeight::operator()(const Entry & entry) const
131132
{
132133
/// Estimate the memory size of `std::vector<bool>` (it uses bit-packing internally)
133-
size_t dynamic_memory = (entry.matching_marks.capacity() + 7) / 8; /// round up to bytes.
134-
return dynamic_memory + sizeof(decltype(entry.matching_marks));
134+
size_t memory = (entry.matching_marks.capacity() + 7) / 8; /// round up to bytes.
135+
return memory + sizeof(decltype(entry.matching_marks));
135136
}
136137
}

src/Interpreters/Cache/QueryConditionCache.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ class QueryConditionCache
3535
const String part_name;
3636
const size_t condition_hash;
3737

38+
/// -- Additional members, conceptually not part of the key. Only included for pretty-printing
39+
/// in system.query_condition_cache:
40+
const String condition;
41+
3842
bool operator==(const Key & other) const;
3943
};
4044

@@ -74,7 +78,7 @@ class QueryConditionCache
7478

7579
/// Add an entry to the cache. The passed marks represent ranges of the column with matches of the predicate.
7680
void write(
77-
const UUID & table_id, const String & part_name, size_t condition_hash,
81+
const UUID & table_id, const String & part_name, size_t condition_hash, const String & condition,
7882
const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark);
7983

8084
/// Check the cache if it contains an entry for the given table + part id and predicate hash.

src/Processors/QueryPlan/FilterStep.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
173173
pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type)
174174
{
175175
bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals;
176-
return std::make_shared<FilterTransform>(header, expression, filter_column_name, remove_filter_column, on_totals, nullptr, condition_hash);
176+
return std::make_shared<FilterTransform>(header, expression, filter_column_name, remove_filter_column, on_totals, nullptr, condition);
177177
});
178178

179179
if (!blocksHaveEqualStructure(pipeline.getHeader(), *output_header))
@@ -248,9 +248,9 @@ void FilterStep::updateOutputHeader()
248248
return;
249249
}
250250

251-
void FilterStep::setQueryConditionHash(size_t condition_hash_)
251+
void FilterStep::setConditionForQueryConditionCache(size_t condition_hash_, const String & condition_)
252252
{
253-
condition_hash = condition_hash_;
253+
condition = {condition_hash_, condition_};
254254
}
255255

256256
bool FilterStep::canUseType(const DataTypePtr & filter_type)

src/Processors/QueryPlan/FilterStep.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class FilterStep : public ITransformingStep
2626
const String & getFilterColumnName() const { return filter_column_name; }
2727
bool removesFilterColumn() const { return remove_filter_column; }
2828

29-
void setQueryConditionHash(size_t condition_hash_);
29+
void setConditionForQueryConditionCache(size_t condition_hash_, const String & condition_);
3030

3131
static bool canUseType(const DataTypePtr & type);
3232

@@ -42,7 +42,7 @@ class FilterStep : public ITransformingStep
4242
String filter_column_name;
4343
bool remove_filter_column;
4444

45-
std::optional<size_t> condition_hash;
45+
std::optional<std::pair<size_t, String>> condition; /// for query condition cache
4646
};
4747

4848
}

src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ namespace Setting
3535
extern const SettingsBool query_plan_try_use_vector_search;
3636
extern const SettingsBool query_plan_convert_join_to_in;
3737
extern const SettingsBool use_query_condition_cache;
38+
extern const SettingsBool query_condition_cache_store_conditions_as_plaintext;
3839
extern const SettingsBoolAuto query_plan_join_swap_table;
3940
extern const SettingsMaxThreads max_threads;
4041
extern const SettingsSeconds lock_acquire_timeout;
@@ -92,6 +93,7 @@ QueryPlanOptimizationSettings::QueryPlanOptimizationSettings(
9293
aggregation_in_order = from[Setting::query_plan_enable_optimizations] && from[Setting::optimize_aggregation_in_order] && from[Setting::query_plan_aggregation_in_order];
9394
optimize_projection = from[Setting::optimize_use_projections];
9495
use_query_condition_cache = from[Setting::use_query_condition_cache] && from[Setting::allow_experimental_analyzer];
96+
query_condition_cache_store_conditions_as_plaintext = from[Setting::query_condition_cache_store_conditions_as_plaintext];
9597

9698
optimize_use_implicit_projections = optimize_projection && from[Setting::optimize_use_implicit_projections];
9799
force_use_projection = optimize_projection && from[Setting::force_optimize_projection];

src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ struct QueryPlanOptimizationSettings
6767
bool optimize_sorting_by_input_stream_properties;
6868
bool aggregation_in_order;
6969
bool optimize_projection;
70-
bool use_query_condition_cache = false;
70+
bool use_query_condition_cache;
71+
bool query_condition_cache_store_conditions_as_plaintext;
7172

7273
/// --- Third-pass optimizations (Processors/QueryPlan/QueryPlan.cpp)
7374
bool build_sets = true; /// this one doesn't have a corresponding setting

0 commit comments

Comments
 (0)