1414import enum
1515import logging
1616import threading
17- from typing import Any , Dict , Generic , Iterable , Optional , Set , TypeVar
17+ from typing import Any , Dict , Generic , Iterable , Optional , Set , Tuple , TypeVar , Union
1818
1919import attr
20+ from typing_extensions import Literal
2021
2122from synapse .util .caches .lrucache import LruCache
23+ from synapse .util .caches .treecache import TreeCache
2224
2325logger = logging .getLogger (__name__ )
2426
3335
3436# This class can't be generic because it uses slots with attrs.
3537# See: https://github.com/python-attrs/attrs/issues/313
36- @attr .s (slots = True , auto_attribs = True )
38+ @attr .s (slots = True , frozen = True , auto_attribs = True )
3739class DictionaryEntry : # should be: Generic[DKT, DV].
3840 """Returned when getting an entry from the cache
3941
42+ If `full` is true then `known_absent` will be the empty set.
43+
4044 Attributes:
4145 full: Whether the cache has the full or dict or just some keys.
4246 If not full then not all requested keys will necessarily be present
@@ -53,20 +57,90 @@ def __len__(self) -> int:
5357 return len (self .value )
5458
5559
60+ class _FullCacheKey (enum .Enum ):
61+ """The key we use to cache the full dict."""
62+
63+ KEY = object ()
64+
65+
5666class _Sentinel (enum .Enum ):
5767 # defining a sentinel in this way allows mypy to correctly handle the
5868 # type of a dictionary lookup.
5969 sentinel = object ()
6070
6171
72+ class _PerKeyValue (Generic [DV ]):
73+ """The cached value of a dictionary key. If `value` is the sentinel,
74+ indicates that the requested key is known to *not* be in the full dict.
75+ """
76+
77+ __slots__ = ["value" ]
78+
79+ def __init__ (self , value : Union [DV , Literal [_Sentinel .sentinel ]]) -> None :
80+ self .value = value
81+
82+ def __len__ (self ) -> int :
83+ # We add a `__len__` implementation as we use this class in a cache
84+ # where the values are variable length.
85+ return 1
86+
87+
6288class DictionaryCache (Generic [KT , DKT , DV ]):
6389 """Caches key -> dictionary lookups, supporting caching partial dicts, i.e.
6490 fetching a subset of dictionary keys for a particular key.
91+
92+ This cache has two levels of key. First there is the "cache key" (of type
93+ `KT`), which maps to a dict. The keys to that dict are the "dict key" (of
94+ type `DKT`). The overall structure is therefore `KT->DKT->DV`. For
95+ example, it might look like:
96+
97+ {
98+ 1: { 1: "a", 2: "b" },
99+ 2: { 1: "c" },
100+ }
101+
102+ It is possible to look up either individual dict keys, or the *complete*
103+ dict for a given cache key.
104+
105+ Each dict item, and the complete dict is treated as a separate LRU
106+ entry for the purpose of cache expiry. For example, given:
107+ dict_cache.get(1, None) -> DictionaryEntry({1: "a", 2: "b"})
108+ dict_cache.get(1, [1]) -> DictionaryEntry({1: "a"})
109+ dict_cache.get(1, [2]) -> DictionaryEntry({2: "b"})
110+
111+ ... then the cache entry for the complete dict will expire first,
112+ followed by the cache entry for the '1' dict key, and finally that
113+ for the '2' dict key.
65114 """
66115
67116 def __init__ (self , name : str , max_entries : int = 1000 ):
68- self .cache : LruCache [KT , DictionaryEntry ] = LruCache (
69- max_size = max_entries , cache_name = name , size_callback = len
117+ # We use a single LruCache to store two different types of entries:
118+ # 1. Map from (key, dict_key) -> dict value (or sentinel, indicating
119+ # the key doesn't exist in the dict); and
120+ # 2. Map from (key, _FullCacheKey.KEY) -> full dict.
121+ #
122+ # The former is used when explicit keys of the dictionary are looked up,
123+ # and the latter when the full dictionary is requested.
124+ #
125+ # If when explicit keys are requested and not in the cache, we then look
126+ # to see if we have the full dict and use that if we do. If found in the
127+ # full dict each key is added into the cache.
128+ #
129+ # This set up allows the `LruCache` to prune the full dict entries if
130+ # they haven't been used in a while, even when there have been recent
131+ # queries for subsets of the dict.
132+ #
133+ # Typing:
134+ # * A key of `(KT, DKT)` has a value of `_PerKeyValue`
135+ # * A key of `(KT, _FullCacheKey.KEY)` has a value of `Dict[DKT, DV]`
136+ self .cache : LruCache [
137+ Tuple [KT , Union [DKT , Literal [_FullCacheKey .KEY ]]],
138+ Union [_PerKeyValue , Dict [DKT , DV ]],
139+ ] = LruCache (
140+ max_size = max_entries ,
141+ cache_name = name ,
142+ cache_type = TreeCache ,
143+ size_callback = len ,
70144 )
71145
72146 self .name = name
@@ -91,23 +165,83 @@ def get(
91165 Args:
92166 key
93167 dict_keys: If given a set of keys then return only those keys
94- that exist in the cache.
168+ that exist in the cache. If None then returns the full dict
169+ if it is in the cache.
95170
96171 Returns:
97- DictionaryEntry
172+ DictionaryEntry: If `dict_keys` is not None then `DictionaryEntry`
173+ will contain include the keys that are in the cache. If None then
174+ will either return the full dict if in the cache, or the empty
175+ dict (with `full` set to False) if it isn't.
98176 """
99- entry = self .cache .get (key , _Sentinel .sentinel )
100- if entry is not _Sentinel .sentinel :
101- if dict_keys is None :
102- return DictionaryEntry (
103- entry .full , entry .known_absent , dict (entry .value )
104- )
177+ if dict_keys is None :
178+ # The caller wants the full set of dictionary keys for this cache key
179+ return self ._get_full_dict (key )
180+
181+ # We are being asked for a subset of keys.
182+
183+ # First go and check for each requested dict key in the cache, tracking
184+ # which we couldn't find.
185+ values = {}
186+ known_absent = set ()
187+ missing = []
188+ for dict_key in dict_keys :
189+ entry = self .cache .get ((key , dict_key ), _Sentinel .sentinel )
190+ if entry is _Sentinel .sentinel :
191+ missing .append (dict_key )
192+ continue
193+
194+ assert isinstance (entry , _PerKeyValue )
195+
196+ if entry .value is _Sentinel .sentinel :
197+ known_absent .add (dict_key )
105198 else :
106- return DictionaryEntry (
107- entry .full ,
108- entry .known_absent ,
109- {k : entry .value [k ] for k in dict_keys if k in entry .value },
110- )
199+ values [dict_key ] = entry .value
200+
201+ # If we found everything we can return immediately.
202+ if not missing :
203+ return DictionaryEntry (False , known_absent , values )
204+
205+ # We are missing some keys, so check if we happen to have the full dict in
206+ # the cache.
207+ #
208+ # We don't update the last access time for this cache fetch, as we
209+ # aren't explicitly interested in the full dict and so we don't want
210+ # requests for explicit dict keys to keep the full dict in the cache.
211+ entry = self .cache .get (
212+ (key , _FullCacheKey .KEY ),
213+ _Sentinel .sentinel ,
214+ update_last_access = False ,
215+ )
216+ if entry is _Sentinel .sentinel :
217+ # Not in the cache, return the subset of keys we found.
218+ return DictionaryEntry (False , known_absent , values )
219+
220+ # We have the full dict!
221+ assert isinstance (entry , dict )
222+
223+ for dict_key in missing :
224+ # We explicitly add each dict key to the cache, so that cache hit
225+ # rates and LRU times for each key can be tracked separately.
226+ value = entry .get (dict_key , _Sentinel .sentinel ) # type: ignore[arg-type]
227+ self .cache [(key , dict_key )] = _PerKeyValue (value )
228+
229+ if value is not _Sentinel .sentinel :
230+ values [dict_key ] = value
231+
232+ return DictionaryEntry (True , set (), values )
233+
234+ def _get_full_dict (
235+ self ,
236+ key : KT ,
237+ ) -> DictionaryEntry :
238+ """Fetch the full dict for the given key."""
239+
240+ # First we check if we have cached the full dict.
241+ entry = self .cache .get ((key , _FullCacheKey .KEY ), _Sentinel .sentinel )
242+ if entry is not _Sentinel .sentinel :
243+ assert isinstance (entry , dict )
244+ return DictionaryEntry (True , set (), entry )
111245
112246 return DictionaryEntry (False , set (), {})
113247
@@ -117,7 +251,13 @@ def invalidate(self, key: KT) -> None:
117251 # Increment the sequence number so that any SELECT statements that
118252 # raced with the INSERT don't update the cache (SYN-369)
119253 self .sequence += 1
120- self .cache .pop (key , None )
254+
255+ # We want to drop all information about the dict for the given key, so
256+ # we use `del_multi` to delete it all in one go.
257+ #
258+ # We ignore the type error here: `del_multi` accepts a truncated key
259+ # (when the key type is a tuple).
260+ self .cache .del_multi ((key ,)) # type: ignore[arg-type]
121261
122262 def invalidate_all (self ) -> None :
123263 self .check_thread ()
@@ -131,7 +271,16 @@ def update(
131271 value : Dict [DKT , DV ],
132272 fetched_keys : Optional [Iterable [DKT ]] = None ,
133273 ) -> None :
134- """Updates the entry in the cache
274+ """Updates the entry in the cache.
275+
276+ Note: This does *not* invalidate any existing entries for the `key`.
277+ In particular, if we add an entry for the cached "full dict" with
278+ `fetched_keys=None`, existing entries for individual dict keys are
279+ not invalidated. Likewise, adding entries for individual keys does
280+ not invalidate any cached value for the full dict.
281+
282+ In other words: if the underlying data is *changed*, the cache must
283+ be explicitly invalidated via `.invalidate()`.
135284
136285 Args:
137286 sequence
@@ -149,20 +298,27 @@ def update(
149298 # Only update the cache if the caches sequence number matches the
150299 # number that the cache had before the SELECT was started (SYN-369)
151300 if fetched_keys is None :
152- self ._insert (key , value , set ())
301+ self .cache [ (key , _FullCacheKey . KEY )] = value
153302 else :
154- self ._update_or_insert (key , value , fetched_keys )
303+ self ._update_subset (key , value , fetched_keys )
155304
156- def _update_or_insert (
157- self , key : KT , value : Dict [DKT , DV ], known_absent : Iterable [DKT ]
305+ def _update_subset (
306+ self , key : KT , value : Dict [DKT , DV ], fetched_keys : Iterable [DKT ]
158307 ) -> None :
159- # We pop and reinsert as we need to tell the cache the size may have
160- # changed
308+ """Add the given dictionary values as explicit keys in the cache.
309+
310+ Args:
311+ key: top-level cache key
312+ value: The dictionary with all the values that we should cache
313+ fetched_keys: The full set of dict keys that were looked up. Any keys
314+ here not in `value` should be marked as "known absent".
315+ """
316+
317+ for dict_key , dict_value in value .items ():
318+ self .cache [(key , dict_key )] = _PerKeyValue (dict_value )
161319
162- entry : DictionaryEntry = self .cache .pop (key , DictionaryEntry (False , set (), {}))
163- entry .value .update (value )
164- entry .known_absent .update (known_absent )
165- self .cache [key ] = entry
320+ for dict_key in fetched_keys :
321+ if dict_key in value :
322+ continue
166323
167- def _insert (self , key : KT , value : Dict [DKT , DV ], known_absent : Set [DKT ]) -> None :
168- self .cache [key ] = DictionaryEntry (True , known_absent , value )
324+ self .cache [(key , dict_key )] = _PerKeyValue (_Sentinel .sentinel )
0 commit comments