Skip to content

Commit 5eec057

Browse files
rschu1zejiebinn
authored andcommitted
Cleanup
1 parent 81ed531 commit 5eec057

File tree

2 files changed

+25
-49
lines changed

2 files changed

+25
-49
lines changed

src/Interpreters/Cache/QueryConditionCache.cpp

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,6 @@ QueryConditionCache::QueryConditionCache(const String & cache_policy, size_t max
1818
{
1919
}
2020

21-
inline bool QueryConditionCache::needUpdate(const std::shared_ptr<Entry> & entry, const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark) const
22-
{
23-
// If no marks to process, we may only need to update the final mark
24-
if (mark_ranges.empty()) {
25-
// Only acquire the lock and check final mark when has_final_mark is true
26-
std::shared_lock read_lock(entry->mutex);
27-
// Return true (update needed) only if final mark needs to be set to false
28-
return has_final_mark && entry->matching_marks[marks_count - 1];
29-
}
30-
else {
31-
// Acquire shared lock for read access to the matching_marks vector
32-
std::shared_lock read_lock(entry->mutex);
33-
34-
// Check if any mark within the ranges is still true
35-
for (const auto & mark_range : mark_ranges) {
36-
if (std::find(
37-
entry->matching_marks.begin() + mark_range.begin,
38-
entry->matching_marks.begin() + mark_range.end,
39-
true) != (entry->matching_marks.begin() + mark_range.end)) {
40-
return true; // Found at least one true mark in range, need update
41-
}
42-
}
43-
44-
// If all marks in ranges are already false, check if final mark needs update
45-
return has_final_mark && entry->matching_marks[marks_count - 1];
46-
}
47-
}
48-
4921
void QueryConditionCache::write(
5022
const UUID & table_id, const String & part_name, size_t condition_hash, const String & condition,
5123
const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark)
@@ -55,11 +27,29 @@ void QueryConditionCache::write(
5527
auto load_func = [&](){ return std::make_shared<Entry>(marks_count); };
5628
auto [entry, inserted] = cache.getOrSet(key, load_func);
5729

58-
// Skip update if not needed - optimization to avoid unnecessary locks and updates
59-
if (!needUpdate(entry, mark_ranges, marks_count, has_final_mark))
60-
return;
30+
/// Try to avoid acquiring the RW lock below (*) by early-ing out. Matters for systems with lots of cores.
31+
{
32+
std::shared_lock shared_lock(entry->mutex); /// cheap
33+
34+
bool need_not_update_marks = true;
35+
for (const auto & mark_range : mark_ranges)
36+
{
37+
/// If the bits are already in the desired state (false), we don't need to update them.
38+
need_not_update_marks = std::all_of(entry->matching_marks.begin() + mark_range.begin,
39+
entry->matching_marks.begin() + mark_range.end,
40+
[](auto b) { return b == false; });
41+
if (!need_not_update_marks)
42+
break;
43+
}
44+
45+
/// Do we either have no final mark or final mark is already in the desired state?
46+
bool need_not_update_final_mark = !has_final_mark || entry->matching_marks[marks_count - 1] == false;
6147

62-
std::lock_guard lock(entry->mutex);
48+
if (need_not_update_marks && need_not_update_final_mark)
49+
return;
50+
}
51+
52+
std::lock_guard lock(entry->mutex); /// (*)
6353

6454
chassert(marks_count == entry->matching_marks.size());
6555

@@ -164,4 +154,5 @@ size_t QueryConditionCache::QueryConditionCacheEntryWeight::operator()(const Ent
164154
size_t memory = (entry.matching_marks.capacity() + 7) / 8; /// round up to bytes.
165155
return memory + sizeof(decltype(entry.matching_marks));
166156
}
157+
167158
}

src/Interpreters/Cache/QueryConditionCache.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace DB
1616
///
1717
/// Note: The cache may store more than the minimal number of matching marks.
1818
/// For example, assume a very selective predicate that matches just a single row in a single mark.
19-
/// One would expect that the cache records just the single mark as potentially matching:
19+
/// One would expect that the cache records just a single mark as potentially matching:
2020
/// 000000010000000000000000000
2121
/// But it is equally correct for the cache to store this: (it is just less efficient for pruning)
2222
/// 000001111111110000000000000
@@ -51,14 +51,13 @@ class QueryConditionCache
5151

5252
/// (*) You might wonder why Entry has its own mutex considering that CacheBase locks internally already.
5353
/// The reason is that ClickHouse scans ranges within the same part in parallel. The first scan creates
54-
/// and inserts a new Key + Entry into the cache, the 2nd ... Nth scan find the existing Key and update
54+
/// and inserts a new Key + Entry into the cache, the 2nd ... Nth scans find the existing Key and update
5555
/// its Entry for the new ranges. This can only be done safely in a synchronized fashion.
5656

5757
/// (**) About error handling: There could be an exception after the i-th scan and cache entries could
5858
/// (theoretically) be left in a corrupt state. If we are not careful, future scans queries could then
5959
/// skip too many ranges. To prevent this, it is important to initialize all marks of each entry as
6060
/// non-matching. In case of an exception, future scans will then not skip them.
61-
6261
};
6362

6463
struct KeyHasher
@@ -71,20 +70,6 @@ class QueryConditionCache
7170
size_t operator()(const Entry & entry) const;
7271
};
7372

74-
/**
75-
* Check if we need to update the cache entry.
76-
* This function determines whether a cache update is necessary by examining:
77-
* 1. Whether the mark ranges are empty
78-
* 2. Whether marks in specified ranges are already set to false
79-
* 3. Whether the final mark (if applicable) is already set correctly
80-
*
81-
* @param entry The cache entry to check
82-
* @param mark_ranges The ranges of marks to potentially update
83-
* @param marks_count Total number of marks in the entry
84-
* @param has_final_mark Flag indicating if the final mark needs special handling
85-
* @return true if an update is needed, false if we can skip the update
86-
*/
87-
inline bool needUpdate(const std::shared_ptr<Entry> & entry, const MarkRanges & mark_ranges, size_t marks_count, bool has_final_mark) const;
8873

8974
public:
9075
using Cache = CacheBase<Key, Entry, KeyHasher, QueryConditionCacheEntryWeight>;

0 commit comments

Comments
 (0)