10
10
from apify_client import ApifyClientAsync
11
11
from apify_client .clients import RequestQueueClientAsync
12
12
13
- from .._utils import LRUCache , _crypto_random_object_id , _unique_key_to_request_id
13
+ from .._utils import LRUCache , _budget_ow , _crypto_random_object_id , _unique_key_to_request_id
14
14
from ..config import Configuration
15
15
from ..consts import REQUEST_QUEUE_HEAD_MAX_LIMIT
16
16
from ..memory_storage import MemoryStorage
@@ -126,7 +126,7 @@ async def _create_instance(cls, request_queue_id_or_name: str, client: Union[Api
126
126
def _get_default_name (cls , config : Configuration ) -> str :
127
127
return config .default_request_queue_id
128
128
129
- async def add_request (self , request : Dict , * , forefront : bool = False ) -> Dict : # TODO: Validate request with pydantic
129
+ async def add_request (self , request : Dict , * , forefront : bool = False ) -> Dict :
130
130
"""Add a request to the queue.
131
131
132
132
Args:
@@ -136,8 +136,14 @@ async def add_request(self, request: Dict, *, forefront: bool = False) -> Dict:
136
136
Returns:
137
137
dict: Information about the queue operation with keys `requestId`, `uniqueKey`, `wasAlreadyPresent`, `wasAlreadyHandled`.
138
138
"""
139
+ _budget_ow (request , {
140
+ 'url' : (str , True ),
141
+ })
139
142
self ._last_activity = datetime .utcnow ()
140
143
144
+ if request .get ('uniqueKey' ) is None :
145
+ request ['uniqueKey' ] = request ['url' ] # TODO: Check Request class in crawlee and replicate uniqueKey generation logic...
146
+
141
147
cache_key = _unique_key_to_request_id (request ['uniqueKey' ])
142
148
cached_info = self ._requests_cache .get (cache_key )
143
149
@@ -174,7 +180,8 @@ async def get_request(self, request_id: str) -> Optional[Dict]:
174
180
Returns:
175
181
dict, optional: The retrieved request, or `None`, if it does not exist.
176
182
"""
177
- return await self ._client .get_request (request_id ) # TODO: Maybe create a Request class?
183
+ _budget_ow (request_id , (str , True ), 'request_id' )
184
+ return await self ._client .get_request (request_id ) # TODO: Maybe create a Request dataclass?
178
185
179
186
async def fetch_next_request (self ) -> Optional [Dict ]:
180
187
"""Return the next request in the queue to be processed.
@@ -241,7 +248,7 @@ async def fetch_next_request(self) -> Optional[Dict]:
241
248
242
249
return request
243
250
244
- async def mark_request_as_handled (self , request : Dict ) -> Optional [Dict ]: # TODO: Validate request with pydantic
251
+ async def mark_request_as_handled (self , request : Dict ) -> Optional [Dict ]:
245
252
"""Mark a request as handled after successful processing.
246
253
247
254
Handled requests will never again be returned by the `RequestQueue.fetch_next_request` method.
@@ -253,6 +260,11 @@ async def mark_request_as_handled(self, request: Dict) -> Optional[Dict]: # TOD
253
260
dict, optional: Information about the queue operation with keys `requestId`, `uniqueKey`, `wasAlreadyPresent`, `wasAlreadyHandled`.
254
261
`None` if the given request was not in progress.
255
262
"""
263
+ _budget_ow (request , {
264
+ 'id' : (str , True ),
265
+ 'uniqueKey' : (str , True ),
266
+ 'handledAt' : (datetime , False ),
267
+ })
256
268
self ._last_activity = datetime .utcnow ()
257
269
if request ['id' ] not in self ._in_progress :
258
270
logging .debug (f'Cannot mark request { request ["id" ]} as handled, because it is not in progress!' )
@@ -272,7 +284,7 @@ async def mark_request_as_handled(self, request: Dict) -> Optional[Dict]: # TOD
272
284
273
285
return queue_operation_info
274
286
275
- async def reclaim_request (self , request : Dict , forefront : bool = False ) -> Optional [Dict ]: # TODO: Validate request with pydantic
287
+ async def reclaim_request (self , request : Dict , forefront : bool = False ) -> Optional [Dict ]:
276
288
"""Reclaim a failed request back to the queue.
277
289
278
290
The request will be returned for processing later again
@@ -285,6 +297,10 @@ async def reclaim_request(self, request: Dict, forefront: bool = False) -> Optio
285
297
dict, optional: Information about the queue operation with keys `requestId`, `uniqueKey`, `wasAlreadyPresent`, `wasAlreadyHandled`.
286
298
`None` if the given request was not in progress.
287
299
"""
300
+ _budget_ow (request , {
301
+ 'id' : (str , True ),
302
+ 'uniqueKey' : (str , True ),
303
+ })
288
304
self ._last_activity = datetime .utcnow ()
289
305
290
306
if request ['id' ] not in self ._in_progress :
0 commit comments