graphql: Simplify herd cache

leoyvens · leoyvens · commit ecc6016cc1ed · 2020-07-08T17:54:00.000-03:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/graphql/Cargo.toml b/graphql/Cargo.toml
@@ -14,6 +14,7 @@ uuid = { version = "0.8.1", features = ["v4"] }
 lru_time_cache = "0.10"
 stable-hash = { git = "https://github.com/graphprotocol/stable-hash" }
 once_cell = "1.4.0"
+defer = "0.1"
 
 [dev-dependencies]
 pretty_assertions = "0.6.1"
diff --git a/graphql/src/execution/cache.rs b/graphql/src/execution/cache.rs
@@ -3,73 +3,16 @@ use once_cell::sync::OnceCell;
 use stable_hash::crypto::SetHasher;
 use stable_hash::prelude::*;
 use std::collections::hash_map::Entry;
-use std::collections::{HashMap, VecDeque};
-use std::ops::Deref;
-use std::sync::{Arc, Condvar, Mutex, Weak};
+use std::collections::HashMap;
+use std::sync::{Arc, Condvar, Mutex};
 
 type Hash = <SetHasher as StableHasher>::Out;
 
-/// A queue of items which (may) have expired from the cache.
-/// This is kept separate to avoid circular references. The way
-/// the code is implemented ensure that this does not grow without
-/// bound, and generally cleanup stays ahead of insertion.
-#[derive(Default, Clone, Debug)]
-struct CleanupQueue {
-    inner: Arc<Mutex<VecDeque<Hash>>>,
-}
-
-impl CleanupQueue {
-    /// Schedule an item for cleanup later
-    fn push(&self, value: Hash) {
-        let mut inner = self.inner.lock().unwrap();
-        inner.push_back(value);
-    }
-    /// Take an item to clean up. The consumer MUST
-    /// deal with this without fail or memory will leak.
-    fn pop(&self) -> Option<Hash> {
-        let mut inner = self.inner.lock().unwrap();
-        inner.pop_front()
-    }
-}
-
-// Implemented on top of Arc, so this is ok.
-impl CheapClone for CleanupQueue {}
-
-/// A handle to a cached item. As long as this handle is kept alive,
-/// the value remains in the cache.
-///
-/// The cached value may not be immediately available when used.
-/// In this case this will block until the value is available.
-#[derive(Debug)]
-pub struct CachedResponse<R> {
-    inner: Arc<CacheEntryInner<R>>,
-}
-
-impl<R> Deref for CachedResponse<R> {
-    type Target = R;
-    fn deref(&self) -> &R {
-        self.inner.wait()
-    }
-}
-
-// Manual impl required because of generic parameter.
-impl<R> Clone for CachedResponse<R> {
-    fn clone(&self) -> Self {
-        Self {
-            inner: self.inner.clone(),
-        }
-    }
-}
-
-// Ok, because implemented on top of Arc
-impl<R> CheapClone for CachedResponse<R> {}
-
 /// The 'true' cache entry that lives inside the Arc.
 /// When the last Arc is dropped, this is dropped,
 /// and the cache is removed.
 #[derive(Debug)]
 struct CacheEntryInner<R> {
-    cleanup: CleanupQueue,
     hash: Hash,
     // Considered using once_cell::sync::Lazy,
     // but that quickly becomes a mess of generics
@@ -83,9 +26,8 @@ struct CacheEntryInner<R> {
 }
 
 impl<R> CacheEntryInner<R> {
-    fn new(hash: Hash, cleanup: &CleanupQueue) -> Arc<Self> {
+    fn new(hash: Hash) -> Arc<Self> {
         Arc::new(Self {
-            cleanup: cleanup.cheap_clone(),
             hash,
             result: OnceCell::new(),
             condvar: Condvar::new(),
@@ -133,13 +75,6 @@ impl<R> CacheEntryInner<R> {
     }
 }
 
-/// Once the last reference is removed, schedule for cleanup in the cache.
-impl<R> Drop for CacheEntryInner<R> {
-    fn drop(&mut self) {
-        self.cleanup.push(self.hash);
-    }
-}
-
 /// On drop, call set_panic on self.value,
 /// unless set was called.
 struct PanicHelper<R> {
@@ -165,79 +100,60 @@ impl<R> PanicHelper<R> {
     }
 }
 
-/// Cache that keeps a result around as long as it is still in use somewhere.
+/// Cache that keeps a result around as long as it is still being processed.
 /// The cache ensures that the query is not re-entrant, so multiple consumers
 /// of identical queries will not execute them in parallel.
 ///
 /// This has a lot in common with AsyncCache in the network-services repo,
 /// but is sync instead of async, and more specialized.
 pub struct QueryCache<R> {
-    cleanup: CleanupQueue,
-    cache: Arc<Mutex<HashMap<Hash, Weak<CacheEntryInner<R>>>>>,
+    cache: Arc<Mutex<HashMap<Hash, Arc<CacheEntryInner<R>>>>>,
 }
 
-impl<R> QueryCache<R> {
+impl<R: Clone> QueryCache<R> {
     pub fn new() -> Self {
         Self {
-            cleanup: CleanupQueue::default(),
             cache: Arc::new(Mutex::new(HashMap::new())),
         }
     }
+
     /// Assumption: Whatever F is passed in consistently returns the same
     /// value for any input - for all values of F used with this Cache.
-    pub fn cached_query<F: FnOnce() -> R>(&self, hash: Hash, f: F) -> CachedResponse<R> {
-        // This holds it's own lock so make sure that this happens outside of
-        // holding any other lock.
-        let cleanup = self.cleanup.pop();
-
-        let mut cache = self.cache.lock().unwrap();
-
-        // Execute the amortized cleanup step, checking that the content is
-        // still missing since it may have been re-inserted. By always cleaning
-        // up one item before potentially inserting another item we ensure that
-        // the memory usage stays bounded. There is no need to stay ahead of
-        // this work, because this step doesn't actually free any real memory,
-        // it just ensures the memory doesn't grow unnecessarily when inserting.
-        if let Some(cleanup) = cleanup {
-            if let Entry::Occupied(entry) = cache.entry(cleanup) {
-                if entry.get().strong_count() == 0 {
-                    entry.remove_entry();
+    pub fn cached_query<F: FnOnce() -> R>(&self, hash: Hash, f: F) -> R {
+        let work = {
+            let mut cache = self.cache.lock().unwrap();
+
+            // Try to pull the item out of the cache and return it.
+            // If we get past this expr, it means this thread will do
+            // the work and fullfil that 'promise' in this work variable.
+            match cache.entry(hash) {
+                Entry::Occupied(entry) => {
+                    // Another thread is doing the work, release the lock and wait for it.
+                    let entry = entry.get().cheap_clone();
+                    drop(cache);
+                    return entry.wait().clone();
                 }
-            }
-        }
-
-        // Try to pull the item out of the cache and return it.
-        // If we get past this expr, it means this thread will do
-        // the work and fullfil that 'promise' in this work variable.
-        let work = match cache.entry(hash) {
-            Entry::Occupied(mut entry) => {
-                // Cache hit!
-                if let Some(cached) = entry.get().upgrade() {
-                    return CachedResponse { inner: cached };
+                Entry::Vacant(entry) => {
+                    let uncached = CacheEntryInner::new(hash);
+                    entry.insert(uncached.clone());
+                    uncached
                 }
-                // Need to re-add to cache
-                let uncached = CacheEntryInner::new(hash, &self.cleanup);
-                *entry.get_mut() = Arc::downgrade(&uncached);
-                uncached
-            }
-            Entry::Vacant(entry) => {
-                let uncached = CacheEntryInner::new(hash, &self.cleanup);
-                entry.insert(Arc::downgrade(&uncached));
-                uncached
             }
         };
 
-        // Don't hold the lock.
-        drop(cache);
+        defer::defer(|| {
+            // Remove this from the list of in-flight work.
+            self.cache.lock().unwrap().remove(&hash);
+        });
 
         // Now that we have taken on the responsibility, propagate panics to
         // make sure that no threads wait forever on a result that will never
         // come.
         let work = PanicHelper::new(work);
 
-        // After all that ceremony, this part is easy enough.
-        CachedResponse {
-            inner: work.set(f()),
-        }
+        // Actually compute the value and then share it with waiters.
+        let value = f();
+        work.set(value.clone());
+        value
     }
 }
diff --git a/graphql/src/execution/execution.rs b/graphql/src/execution/execution.rs
@@ -1,4 +1,4 @@
-use super::cache::{CachedResponse, QueryCache};
+use super::cache::QueryCache;
 use graph::prelude::CheapClone;
 use graphql_parser::query as q;
 use graphql_parser::schema as s;
@@ -9,7 +9,6 @@ use stable_hash::prelude::*;
 use stable_hash::utils::stable_hash;
 use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
 use std::iter;
-use std::ops::Deref;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::RwLock;
 use std::time::Instant;
@@ -33,7 +32,7 @@ struct CacheByBlock {
     block: EthereumBlockPointer,
     max_weight: usize,
     weight: usize,
-    cache: HashMap<QueryHash, CachedResponse<QueryResponse>>,
+    cache: HashMap<QueryHash, QueryResponse>,
 }
 
 impl CacheByBlock {
@@ -47,14 +46,13 @@ impl CacheByBlock {
     }
 
     /// Returns `true` if the insert was successful or `false` if the cache was full.
-    fn insert(&mut self, key: QueryHash, value: &CachedResponse<QueryResponse>) -> bool {
+    fn insert(&mut self, key: QueryHash, value: QueryResponse) -> bool {
         // Unwrap: We never try to insert errors into this cache.
-        let weight = value.deref().as_ref().ok().unwrap().weight();
-
+        let weight = value.as_ref().unwrap().weight();
         let fits_in_cache = self.weight + weight <= self.max_weight;
         if fits_in_cache {
             self.weight += weight;
-            self.cache.insert(key, value.cheap_clone());
+            self.cache.insert(key, value);
         }
         fits_in_cache
     }
@@ -100,21 +98,6 @@ lazy_static! {
     static ref QUERY_HERD_CACHE: QueryCache<QueryResponse> = QueryCache::new();
 }
 
-pub enum MaybeCached<T> {
-    NotCached(T),
-    Cached(CachedResponse<T>),
-}
-
-impl<T: Clone> MaybeCached<T> {
-    // Note that this drops any handle to the cache that may exist.
-    pub fn to_inner(self) -> T {
-        match self {
-            MaybeCached::NotCached(t) => t,
-            MaybeCached::Cached(t) => t.deref().clone(),
-        }
-    }
-}
-
 struct HashableQuery<'a> {
     query_schema_id: &'a SubgraphDeploymentId,
     query_variables: &'a HashMap<q::Name, q::Value>,
@@ -317,7 +300,7 @@ pub fn execute_root_selection_set(
     selection_set: &q::SelectionSet,
     root_type: &s::ObjectType,
     block_ptr: Option<EthereumBlockPointer>,
-) -> MaybeCached<QueryResponse> {
+) -> QueryResponse {
     // Cache the cache key to not have to calculate it twice - once for lookup
     // and once for insert.
     let mut key: Option<QueryHash> = None;
@@ -337,7 +320,7 @@ pub fn execute_root_selection_set(
                 // Iterate from the most recent block looking for a block that matches.
                 if let Some(cache_by_block) = cache.iter().find(|c| c.block == block_ptr) {
                     if let Some(response) = cache_by_block.cache.get(&cache_key) {
-                        return MaybeCached::Cached(response.cheap_clone());
+                        return response.clone();
                     }
                 }
 
@@ -347,51 +330,47 @@ pub fn execute_root_selection_set(
     }
 
     let result = if let Some(key) = key {
-        let cached = QUERY_HERD_CACHE.cached_query(key, || {
+        QUERY_HERD_CACHE.cached_query(key, || {
             execute_root_selection_set_uncached(ctx, selection_set, root_type)
-        });
-        MaybeCached::Cached(cached)
+        })
     } else {
-        let not_cached = execute_root_selection_set_uncached(ctx, selection_set, root_type);
-        MaybeCached::NotCached(not_cached)
+        execute_root_selection_set_uncached(ctx, selection_set, root_type)
     };
 
     // Check if this query should be cached.
-    if let (MaybeCached::Cached(cached), Some(key), Some(block_ptr)) = (&result, key, block_ptr) {
-        // Share errors from the herd cache, but don't store them in generational cache.
-        // In particular, there is a problem where asking for a block pointer beyond the chain
-        // head can cause the legitimate cache to be thrown out.
-        if cached.is_ok() {
-            let mut cache = QUERY_CACHE.write().unwrap();
-
-            // If there is already a cache by the block of this query, just add it there.
-            if let Some(cache_by_block) = cache.iter_mut().find(|c| c.block == block_ptr) {
-                let cache_insert = cache_by_block.insert(key, cached);
-                ctx.cache_insert.store(cache_insert, Ordering::SeqCst);
-            } else if *QUERY_CACHE_BLOCKS > 0 {
-                // We're creating a new `CacheByBlock` if:
-                // - There are none yet, this is the first query being cached, or
-                // - `block_ptr` is of higher or equal number than the most recent block in the cache.
-                // Otherwise this is a historical query which will not be cached.
-                let should_insert = match cache.iter().next() {
-                    None => true,
-                    Some(highest) if highest.block.number <= block_ptr.number => true,
-                    Some(_) => false,
-                };
-
-                if should_insert {
-                    if cache.len() == *QUERY_CACHE_BLOCKS {
-                        // At capacity, so pop the oldest block.
-                        cache.pop_back();
-                    }
+    // Share errors from the herd cache, but don't store them in generational cache.
+    // In particular, there is a problem where asking for a block pointer beyond the chain
+    // head can cause the legitimate cache to be thrown out.
+    if let (Ok(_), Some(key), Some(block_ptr)) = (&result, key, block_ptr) {
+        let mut cache = QUERY_CACHE.write().unwrap();
+
+        // If there is already a cache by the block of this query, just add it there.
+        if let Some(cache_by_block) = cache.iter_mut().find(|c| c.block == block_ptr) {
+            let cache_insert = cache_by_block.insert(key, result.clone());
+            ctx.cache_insert.store(cache_insert, Ordering::SeqCst);
+        } else if *QUERY_CACHE_BLOCKS > 0 {
+            // We're creating a new `CacheByBlock` if:
+            // - There are none yet, this is the first query being cached, or
+            // - `block_ptr` is of higher or equal number than the most recent block in the cache.
+            // Otherwise this is a historical query which will not be cached.
+            let should_insert = match cache.iter().next() {
+                None => true,
+                Some(highest) if highest.block.number <= block_ptr.number => true,
+                Some(_) => false,
+            };
 
-                    // Create a new cache by block, insert this entry, and add it to the QUERY_CACHE.
-                    let max_weight = *QUERY_CACHE_MAX_MEM / *QUERY_CACHE_BLOCKS;
-                    let mut cache_by_block = CacheByBlock::new(block_ptr, max_weight);
-                    let cache_insert = cache_by_block.insert(key, cached);
-                    ctx.cache_insert.store(cache_insert, Ordering::SeqCst);
-                    cache.push_front(cache_by_block);
+            if should_insert {
+                if cache.len() == *QUERY_CACHE_BLOCKS {
+                    // At capacity, so pop the oldest block.
+                    cache.pop_back();
                 }
+
+                // Create a new cache by block, insert this entry, and add it to the QUERY_CACHE.
+                let max_weight = *QUERY_CACHE_MAX_MEM / *QUERY_CACHE_BLOCKS;
+                let mut cache_by_block = CacheByBlock::new(block_ptr, max_weight);
+                let cache_insert = cache_by_block.insert(key, result.clone());
+                ctx.cache_insert.store(cache_insert, Ordering::SeqCst);
+                cache.push_front(cache_by_block);
             }
         }
     }
diff --git a/graphql/src/query/mod.rs b/graphql/src/query/mod.rs
@@ -102,5 +102,5 @@ where
             "complexity" => &query.complexity
         );
     }
-    result.to_inner()
+    result
 }
diff --git a/graphql/src/subscription/mod.rs b/graphql/src/subscription/mod.rs

Original file line number	Diff line number	Diff line change
`@@ -102,5 +102,5 @@ where`
`102`	`102`	`"complexity" => &query.complexity`
`103`	`103`	`);`
`104`	`104`	`}`
`105`		`- result.to_inner()`
	`105`	`+ result`
`106`	`106`	`}`