Skip to content

Commit 30324bd

Browse files
authored
chore(deps): change custom LRUCache to cachetools.LRUCache (#988)
### Description - change custom `LRUCache` to `cachetools.LRUCache`. In my opinion, `functools.lru_cache's` logic isn't well-suited for this use case. Therefore, if we want to modify our caching approach, using `cachetools` appears to be a better option. ### Issues - Closes: #86
1 parent 0e6685e commit 30324bd

File tree

6 files changed

+83
-152
lines changed

6 files changed

+83
-152
lines changed

poetry.lock

Lines changed: 60 additions & 36 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ keywords = [
4444
python = "^3.9"
4545
beautifulsoup4 = { version = ">=4.12.0", optional = true }
4646
browserforge = { version = ">=1.2.3", optional = true }
47+
cachetools = ">=5.5.1"
4748
colorama = ">=0.4.0"
4849
cookiecutter = ">=2.6.0"
4950
curl-cffi = { version = ">=0.7.2", optional = true }
@@ -93,6 +94,7 @@ ruff = "~0.8.0"
9394
setuptools = "~75.8.0" # setuptools are used by pytest, but not explicitly required
9495
sortedcontainers-stubs = "~2.4.0"
9596
types-beautifulsoup4 = "~4.12.0.20240229"
97+
types-cachetools = "~=5.5.0.20240820"
9698
types-colorama = "~0.4.15.20240106"
9799
types-psutil = "~5.9.5.20240205"
98100
types-python-dateutil = "~2.9.0.20240316"

src/crawlee/_utils/lru_cache.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

src/crawlee/storages/_request_queue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
from logging import getLogger
88
from typing import TYPE_CHECKING, Any, Generic, TypedDict, TypeVar
99

10+
from cachetools import LRUCache
1011
from typing_extensions import override
1112

1213
from crawlee import service_locator
1314
from crawlee._utils.crypto import crypto_random_object_id
1415
from crawlee._utils.docs import docs_group
15-
from crawlee._utils.lru_cache import LRUCache
1616
from crawlee._utils.requests import unique_key_to_request_id
1717
from crawlee._utils.wait import wait_for_all_tasks_for_finish
1818
from crawlee.events import Event
@@ -135,7 +135,7 @@ def __init__(self, id: str, name: str | None, storage_client: StorageClient) ->
135135
self._in_progress: set[str] = set()
136136
self._last_activity = datetime.now(timezone.utc)
137137
self._recently_handled: BoundedSet[str] = BoundedSet(max_length=self._RECENTLY_HANDLED_CACHE_SIZE)
138-
self._requests_cache: LRUCache[CachedRequest] = LRUCache(max_length=self._MAX_CACHED_REQUESTS)
138+
self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=self._MAX_CACHED_REQUESTS)
139139

140140
@property
141141
@override

tests/unit/_utils/test_lru_cache.py

Lines changed: 0 additions & 59 deletions
This file was deleted.

tests/unit/storages/test_request_queue.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,3 +246,22 @@ async def test_deduplication_of_requests_with_valid_custom_unique_key() -> None:
246246
assert await rq.get_total_count() == 1
247247

248248
assert await rq.fetch_next_request() == request_1
249+
250+
251+
async def test_cache_requests(request_queue: RequestQueue) -> None:
252+
request_1 = Request.from_url('https://apify.com')
253+
request_2 = Request.from_url('https://crawlee.dev')
254+
255+
await request_queue.add_request(request_1)
256+
await request_queue.add_request(request_2)
257+
258+
assert request_queue._requests_cache.currsize == 2
259+
260+
fetched_request = await request_queue.fetch_next_request()
261+
262+
assert fetched_request is not None
263+
assert fetched_request.id == request_1.id
264+
265+
# After calling fetch_next_request request_1 moved to the end of the cache store.
266+
cached_items = [request_queue._requests_cache.popitem()[0] for _ in range(2)]
267+
assert cached_items == [request_2.id, request_1.id]

0 commit comments

Comments
 (0)