Skip to content

Commit 854c12f

Browse files
committed
Avoid eviction on entry replacement
`*Result` objects are immutable, thus if a `PartialResult` gets filled further it has to be re-set into the cache. This does not change the cache size, but because the current S3 and SIEVE implementations unconditionally check the cache size on `__setitem__` they may evict an entry unnecessarily. Fix that: if there is already a valid cache entry for the key, just update it in place instead of trying to evict then creating a brand new entry. Also update the LRU to pre-check for size (and presence as well), this may make setting a bit more expensive than post-check but it avoids "wronging" the user by bypassing the limit they set. Fixes #201
1 parent a270fe2 commit 854c12f

File tree

2 files changed

+20
-24
lines changed

2 files changed

+20
-24
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ select = ["F", "E", "W", "I", "RET", "RUF", "PT"]
4848
ignore = [
4949
"RET505", # elif after return
5050
"E501", # line too long, formatter should take care of the fixable ones
51+
"E721", # I'll compare types with `is` if I want
5152
]
5253

5354
[tool.ruff.lint.isort]

src/ua_parser/caching.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626

2727

2828
class Cache(Protocol):
29-
"""Cache abstract protocol. The :class:`CachingResolver` will look
29+
"""Cache()
30+
31+
Cache abstract protocol. The :class:`CachingResolver` will look
3032
values up, merge what was returned (possibly nothing) with what it
3133
got from its actual parser, and *re-set the result*.
32-
33-
A :class:`Cache` is responsible for its own replacement policy.
3434
"""
3535

3636
@abc.abstractmethod
@@ -55,12 +55,6 @@ class Lru:
5555
full of popular items then all of them being replaced at once:
5656
:class:`S3Fifo` and :class:`Sieve` are FIFO-based caches and have
5757
worst-case O(n) eviction.
58-
59-
60-
.. note:: The cache size is adjusted after inserting the new
61-
entry, so the cache will temporarily contain ``maxsize +
62-
1`` items.
63-
6458
"""
6559

6660
def __init__(self, maxsize: int):
@@ -77,10 +71,9 @@ def __getitem__(self, key: str) -> Optional[PartialResult]:
7771

7872
def __setitem__(self, key: str, value: PartialResult) -> None:
7973
with self.lock:
80-
self.cache[key] = value
81-
self.cache.move_to_end(key)
82-
while len(self.cache) > self.maxsize:
74+
if len(self.cache) >= self.maxsize and key not in self.cache:
8375
self.cache.popitem(last=False)
76+
self.cache[key] = value
8477

8578

8679
@dataclasses.dataclass
@@ -92,14 +85,12 @@ class CacheEntry:
9285

9386

9487
class S3Fifo:
95-
"""FIFO-based quick-demotion lazy-promotion cache by Yang, Zhang,
96-
Qiu, Yue, Vinayak.
88+
"""FIFO-based quick-demotion lazy-promotion cache [S3-FIFO]_.
9789
9890
Experimentally provides excellent hit rate at lower cache sizes,
9991
for a relatively simple and efficient implementation. Notably
10092
excellent at handling "one hit wonders", aka entries seen only
10193
once during a work-set (or reasonable work window).
102-
10394
"""
10495

10596
def __init__(self, maxsize: int):
@@ -113,16 +104,19 @@ def __init__(self, maxsize: int):
113104
self.lock = threading.Lock()
114105

115106
def __getitem__(self, key: str) -> Optional[PartialResult]:
116-
e = self.index.get(key)
117-
if e and isinstance(e, CacheEntry):
118-
# small race here, we could bump the freq above the limit
107+
if (e := self.index.get(key)) and type(e) is CacheEntry:
108+
# small race here, we could miss an increment
119109
e.freq = min(e.freq + 1, 3)
120110
return e.value
121111

122112
return None
123113

124114
def __setitem__(self, key: str, r: PartialResult) -> None:
125115
with self.lock:
116+
if (e := self.index.get(key)) and type(e) is CacheEntry:
117+
e.value = r
118+
return
119+
126120
if len(self.small) + len(self.main) >= self.maxsize:
127121
# if main is not overcapacity, resize small
128122
if len(self.main) < self.main_target:
@@ -133,7 +127,7 @@ def __setitem__(self, key: str, r: PartialResult) -> None:
133127
self._evict_main()
134128

135129
entry = CacheEntry(key, r, 0)
136-
if isinstance(self.index.get(key), str):
130+
if type(self.index.get(key)) is str:
137131
self.main.appendleft(entry)
138132
else:
139133
self.small.appendleft(entry)
@@ -175,8 +169,7 @@ class SieveNode:
175169

176170

177171
class Sieve:
178-
"""FIFO-based quick-demotion cache by Zhang, Yang, Yue, Vigfusson,
179-
Rashmi.
172+
"""FIFO-based quick-demotion cache [SIEVE]_.
180173
181174
Simpler FIFO-based cache, cousin of :class:`S3Fifo`.
182175
Experimentally slightly lower hit rates than :class:`S3Fifo` (if
@@ -187,7 +180,6 @@ class Sieve:
187180
Can be an interesting candidate when trying to save on memory,
188181
although the contained entries will generally be much larger than
189182
the cache itself.
190-
191183
"""
192184

193185
def __init__(self, maxsize: int) -> None:
@@ -200,15 +192,18 @@ def __init__(self, maxsize: int) -> None:
200192
self.lock = threading.Lock()
201193

202194
def __getitem__(self, key: str) -> Optional[PartialResult]:
203-
entry = self.cache.get(key)
204-
if entry:
195+
if entry := self.cache.get(key):
205196
entry.visited = True
206197
return entry.value
207198

208199
return None
209200

210201
def __setitem__(self, key: str, value: PartialResult) -> None:
211202
with self.lock:
203+
if e := self.cache.get(key):
204+
e.value = value
205+
return
206+
212207
if len(self.cache) >= self.maxsize:
213208
self._evict()
214209

0 commit comments

Comments
 (0)