Skip to content

Commit 10cb7cb

Browse files
committed
polishment
1 parent bf7703c commit 10cb7cb

File tree

5 files changed

+7
-80
lines changed

5 files changed

+7
-80
lines changed

src/crawlee/storage_clients/_memory/_request_queue_client.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@ class MemoryRequestQueueClient(RequestQueueClient):
3535
does not support data sharing across different processes.
3636
"""
3737

38-
# Class variable to store request queue instances by ID
39-
_instances: ClassVar[dict[str, MemoryRequestQueueClient]] = {}
40-
4138
def __init__(
4239
self,
4340
*,

src/crawlee/storages/_base.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,3 @@ async def purge(self) -> None:
5858
This method does not remove the storage itself, e.g. don't remove the metadata,
5959
but clears all items within it.
6060
"""
61-
62-
@classmethod
63-
def compute_cache_key(
64-
cls,
65-
id: str | None = None,
66-
name: str | None = None,
67-
configuration: Configuration | None = None,
68-
storage_client: StorageClient | None = None,
69-
) -> str:
70-
"""Compute the cache key for the storage.
71-
72-
The cache key computed based on the storage ID, name, configuration fields, and storage client class.
73-
"""
74-
return f'{id}|{name}|{configuration}|{storage_client.__class__}'

src/crawlee/storages/_key_value_store.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ class KeyValueStore(Storage):
7373
_default_instance: ClassVar[KeyValueStore | None] = None
7474
"""Cache for the default key-value store instance."""
7575

76-
# Cache for recoverable (auto-saved) values
7776
_autosaved_values: ClassVar[
7877
dict[
7978
str,
8079
dict[str, RecoverableState[AutosavedValue]],
8180
]
8281
] = {}
82+
"""Cache for recoverable (auto-saved) values."""
8383

8484
def __init__(self, client: KeyValueStoreClient) -> None:
8585
"""Initialize a new instance.
@@ -155,17 +155,12 @@ async def open(
155155

156156
@override
157157
async def drop(self) -> None:
158-
# Remove from cache before dropping
159158
if self.id in self._cache_by_id:
160159
del self._cache_by_id[self.id]
161-
162160
if self.name is not None and self.name in self._cache_by_name:
163161
del self._cache_by_name[self.name]
164162

165-
# Clear cache with persistent values
166-
await self._clear_cache()
167-
168-
# Drop the key-value store client
163+
await self._clear_cache() # Clear cache with persistent values.
169164
await self._client.drop()
170165

171166
@override

tests/unit/crawlers/_adaptive_playwright/test_adaptive_playwright_crawler.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,10 +500,10 @@ async def test_adaptive_playwright_crawler_timeout_in_sub_crawler(test_urls: lis
500500
"""Tests that timeout in static sub crawler forces fall back to browser sub crawler.
501501
502502
Create situation where static sub crawler blocks(should time out), such error should start browser sub
503-
crawler."""
504-
503+
crawler.
504+
"""
505505
static_only_predictor_no_detection = _SimpleRenderingTypePredictor(detection_probability_recommendation=cycle([0]))
506-
request_handler_timeout = timedelta(seconds=0.1)
506+
request_handler_timeout = timedelta(seconds=1)
507507

508508
crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
509509
max_request_retries=1,
@@ -522,9 +522,9 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
522522
except AdaptiveContextError:
523523
mocked_static_handler()
524524
# Relax timeout for the fallback browser request to avoid flakiness in test
525-
crawler._request_handler_timeout = timedelta(seconds=5)
525+
crawler._request_handler_timeout = timedelta(seconds=10)
526526
# Sleep for time obviously larger than top crawler timeout.
527-
await asyncio.sleep(request_handler_timeout.total_seconds() * 2)
527+
await asyncio.sleep(request_handler_timeout.total_seconds() * 3)
528528

529529
await crawler.run(test_urls[:1])
530530

tests/unit/storages/test_request_queue.py

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -259,57 +259,6 @@ async def test_add_requests_batch_with_forefront(rq: RequestQueue) -> None:
259259
assert next_request6 is None
260260

261261

262-
async def test_add_requests_mixed_forefront(rq: RequestQueue) -> None:
263-
"""Test the ordering when adding requests with mixed forefront values."""
264-
# Add normal requests
265-
await rq.add_request('https://example.com/normal1')
266-
await rq.add_request('https://example.com/normal2')
267-
268-
# Add a batch with forefront=True
269-
await rq.add_requests(
270-
['https://example.com/priority1', 'https://example.com/priority2'],
271-
forefront=True,
272-
)
273-
274-
# Add another normal request
275-
await rq.add_request('https://example.com/normal3')
276-
277-
# Add another priority request
278-
await rq.add_request('https://example.com/priority3', forefront=True)
279-
280-
# Wait for background tasks
281-
await asyncio.sleep(0.1)
282-
283-
# The expected order should be:
284-
# 1. priority3 (most recent forefront)
285-
# 2. priority1 (from batch, forefront)
286-
# 3. priority2 (from batch, forefront)
287-
# 4. normal1 (oldest normal)
288-
# 5. normal2
289-
# 6. normal3 (newest normal)
290-
291-
requests = []
292-
while True:
293-
req = await rq.fetch_next_request()
294-
if req is None:
295-
break
296-
requests.append(req)
297-
await rq.mark_request_as_handled(req)
298-
299-
assert len(requests) == 6
300-
assert requests[0].url == 'https://example.com/priority3'
301-
302-
# The next two should be from the forefront batch (exact order within batch may vary)
303-
batch_urls = {requests[1].url, requests[2].url}
304-
assert 'https://example.com/priority1' in batch_urls
305-
assert 'https://example.com/priority2' in batch_urls
306-
307-
# Then the normal requests in order
308-
assert requests[3].url == 'https://example.com/normal1'
309-
assert requests[4].url == 'https://example.com/normal2'
310-
assert requests[5].url == 'https://example.com/normal3'
311-
312-
313262
async def test_add_requests_with_forefront(rq: RequestQueue) -> None:
314263
"""Test adding requests to the front of the queue."""
315264
# Add some initial requests

0 commit comments

Comments
 (0)