Merge pull request ClickHouse#80247 from jiebinn/QueryConditionCache

rschu1ze · web-flow · commit 63058b459b70 · 2025-05-18T14:29:38.000Z
Reduce lock contention in QueryConditionCache
diff --git a/src/Interpreters/Cache/QueryConditionCache.cpp b/src/Interpreters/Cache/QueryConditionCache.cpp
@@ -27,7 +27,29 @@ void QueryConditionCache::write(
     auto load_func = [&](){ return std::make_shared<Entry>(marks_count); };
     auto [entry, inserted] = cache.getOrSet(key, load_func);
 
-    std::lock_guard lock(entry->mutex);
+    /// Try to avoid acquiring the RW lock below (*) by early-ing out. Matters for systems with lots of cores.
+    {
+        std::shared_lock shared_lock(entry->mutex); /// cheap
+
+        bool need_not_update_marks = true;
+        for (const auto & mark_range : mark_ranges)
+        {
+            /// If the bits are already in the desired state (false), we don't need to update them.
+            need_not_update_marks = std::all_of(entry->matching_marks.begin() + mark_range.begin,
+                                                entry->matching_marks.begin() + mark_range.end,
+                                                [](auto b) { return b == false; });
+            if (!need_not_update_marks)
+                break;
+        }
+
+        /// Do we either have no final mark or final mark is already in the desired state?
+        bool need_not_update_final_mark = !has_final_mark || entry->matching_marks[marks_count - 1] == false;
+
+        if (need_not_update_marks && need_not_update_final_mark)
+            return;
+    }
+
+    std::lock_guard lock(entry->mutex); /// (*)
 
     chassert(marks_count == entry->matching_marks.size());
 
@@ -132,4 +154,5 @@ size_t QueryConditionCache::QueryConditionCacheEntryWeight::operator()(const Ent
     size_t memory = (entry.matching_marks.capacity() + 7) / 8; /// round up to bytes.
     return memory + sizeof(decltype(entry.matching_marks));
 }
+
 }
diff --git a/src/Interpreters/Cache/QueryConditionCache.h b/src/Interpreters/Cache/QueryConditionCache.h
@@ -16,7 +16,7 @@ namespace DB
 ///
 /// Note: The cache may store more than the minimal number of matching marks.
 /// For example, assume a very selective predicate that matches just a single row in a single mark.
-/// One would expect that the cache records just the single mark as potentially matching:
+/// One would expect that the cache records just a single mark as potentially matching:
 ///     000000010000000000000000000
 /// But it is equally correct for the cache to store this: (it is just less efficient for pruning)
 ///     000001111111110000000000000
@@ -51,14 +51,13 @@ class QueryConditionCache
 
         /// (*) You might wonder why Entry has its own mutex considering that CacheBase locks internally already.
         ///     The reason is that ClickHouse scans ranges within the same part in parallel. The first scan creates
-        ///     and inserts a new Key + Entry into the cache, the 2nd ... Nth scan find the existing Key and update
+        ///     and inserts a new Key + Entry into the cache, the 2nd ... Nth scans find the existing Key and update
         ///     its Entry for the new ranges. This can only be done safely in a synchronized fashion.
 
         /// (**) About error handling: There could be an exception after the i-th scan and cache entries could
         ///     (theoretically) be left in a corrupt state. If we are not careful, future scans queries could then
         ///     skip too many ranges. To prevent this, it is important to initialize all marks of each entry as
         ///     non-matching. In case of an exception, future scans will then not skip them.
-
     };
 
     struct KeyHasher
@@ -71,6 +70,7 @@ class QueryConditionCache
         size_t operator()(const Entry & entry) const;
     };
 
+
 public:
     using Cache = CacheBase<Key, Entry, KeyHasher, QueryConditionCacheEntryWeight>;