|
15 | 15 | from crawlee._utils.lru_cache import LRUCache
|
16 | 16 | from crawlee._utils.requests import unique_key_to_request_id
|
17 | 17 | from crawlee._utils.wait import wait_for_all_tasks_for_finish
|
18 |
| -from crawlee.base_storage_client._models import ProcessedRequest, RequestQueueMetadata |
| 18 | +from crawlee.base_storage_client import BaseStorageClient, ProcessedRequest, RequestQueueMetadata |
19 | 19 | from crawlee.events._types import Event
|
20 | 20 | from crawlee.storages._base_storage import BaseStorage
|
21 | 21 | from crawlee.storages._request_provider import RequestProvider
|
|
24 | 24 | from collections.abc import Sequence
|
25 | 25 |
|
26 | 26 | from crawlee._request import Request
|
27 |
| - |
| 27 | + from crawlee.configuration import Configuration |
28 | 28 |
|
29 | 29 | logger = getLogger(__name__)
|
30 | 30 |
|
@@ -104,10 +104,9 @@ class RequestQueue(BaseStorage, RequestProvider):
|
104 | 104 | _STORAGE_CONSISTENCY_DELAY = timedelta(seconds=3)
|
105 | 105 | """Expected delay for storage to achieve consistency, guiding the timing of subsequent read operations."""
|
106 | 106 |
|
107 |
| - def __init__(self, id: str, name: str | None) -> None: |
| 107 | + def __init__(self, id: str, name: str | None, storage_client: BaseStorageClient) -> None: |
108 | 108 | config = service_container.get_configuration()
|
109 | 109 | event_manager = service_container.get_event_manager()
|
110 |
| - storage_client = service_container.get_storage_client() |
111 | 110 |
|
112 | 111 | self._id = id
|
113 | 112 | self._name = name
|
@@ -148,10 +147,26 @@ def name(self) -> str | None:
|
148 | 147 |
|
149 | 148 | @override
|
150 | 149 | @classmethod
|
151 |
| - async def open(cls, *, id: str | None = None, name: str | None = None) -> RequestQueue: |
| 150 | + async def open( |
| 151 | + cls, |
| 152 | + *, |
| 153 | + id: str | None = None, |
| 154 | + name: str | None = None, |
| 155 | + configuration: Configuration | None = None, |
| 156 | + storage_client: BaseStorageClient | None = None, |
| 157 | + ) -> RequestQueue: |
152 | 158 | from crawlee.storages._creation_management import open_storage
|
153 | 159 |
|
154 |
| - return await open_storage(storage_class=cls, id=id, name=name) |
| 160 | + configuration = configuration or service_container.get_configuration() |
| 161 | + storage_client = storage_client or service_container.get_storage_client() |
| 162 | + |
| 163 | + return await open_storage( |
| 164 | + storage_class=cls, |
| 165 | + id=id, |
| 166 | + name=name, |
| 167 | + configuration=configuration, |
| 168 | + storage_client=storage_client, |
| 169 | + ) |
155 | 170 |
|
156 | 171 | @override
|
157 | 172 | async def drop(self, *, timeout: timedelta | None = None) -> None:
|
@@ -204,7 +219,7 @@ async def add_request(
|
204 | 219 | ):
|
205 | 220 | self._assumed_total_count += 1
|
206 | 221 |
|
207 |
| - return processed_request # type: ignore[no-any-return] # Mypy is broken |
| 222 | + return processed_request |
208 | 223 |
|
209 | 224 | @override
|
210 | 225 | async def add_requests_batched(
|
@@ -260,7 +275,7 @@ async def get_request(self, request_id: str) -> Request | None:
|
260 | 275 | Returns:
|
261 | 276 | The retrieved request, or `None`, if it does not exist.
|
262 | 277 | """
|
263 |
| - return await self._resource_client.get_request(request_id) # type: ignore[no-any-return] # Mypy is broken |
| 278 | + return await self._resource_client.get_request(request_id) |
264 | 279 |
|
265 | 280 | async def fetch_next_request(self) -> Request | None:
|
266 | 281 | """Return the next request in the queue to be processed.
|
@@ -373,7 +388,7 @@ async def mark_request_as_handled(self, request: Request) -> ProcessedRequest |
|
373 | 388 | self._assumed_handled_count += 1
|
374 | 389 |
|
375 | 390 | self._cache_request(unique_key_to_request_id(request.unique_key), processed_request)
|
376 |
| - return processed_request # type: ignore[no-any-return] # Mypy is broken |
| 391 | + return processed_request |
377 | 392 |
|
378 | 393 | async def reclaim_request(
|
379 | 394 | self,
|
@@ -417,7 +432,7 @@ async def reclaim_request(
|
417 | 432 | except Exception as err:
|
418 | 433 | logger.debug(f'Failed to delete request lock for request {request.id}', exc_info=err)
|
419 | 434 |
|
420 |
| - return processed_request # type: ignore[no-any-return] # Mypy is broken |
| 435 | + return processed_request |
421 | 436 |
|
422 | 437 | async def is_empty(self) -> bool:
|
423 | 438 | """Check whether the queue is empty.
|
@@ -483,7 +498,7 @@ async def is_finished(self) -> bool:
|
483 | 498 |
|
484 | 499 | async def get_info(self) -> RequestQueueMetadata | None:
|
485 | 500 | """Get an object containing general information about the request queue."""
|
486 |
| - return await self._resource_client.get() # type: ignore[no-any-return] # Mypy is broken |
| 501 | + return await self._resource_client.get() |
487 | 502 |
|
488 | 503 | @override
|
489 | 504 | async def get_handled_count(self) -> int:
|
@@ -658,7 +673,7 @@ async def _prolong_request_lock(self, request_id: str) -> datetime | None:
|
658 | 673 | )
|
659 | 674 | return None
|
660 | 675 | else:
|
661 |
| - return res.lock_expires_at # type: ignore[no-any-return] # Mypy is broken |
| 676 | + return res.lock_expires_at |
662 | 677 |
|
663 | 678 | async def _clear_possible_locks(self) -> None:
|
664 | 679 | self._queue_paused_for_migration = True
|
|
0 commit comments