51
51
52
52
from crawlee ._types import ConcurrencySettings , HttpMethod , JsonSerializable
53
53
from crawlee .base_storage_client ._models import DatasetItemsListPage
54
- from crawlee .configuration import Configuration
55
54
from crawlee .events ._event_manager import EventManager
56
55
from crawlee .http_clients import BaseHttpClient , HttpResponse
57
56
from crawlee .proxy_configuration import ProxyConfiguration , ProxyInfo
@@ -96,9 +95,6 @@ class BasicCrawlerOptions(TypedDict, Generic[TCrawlingContext]):
96
95
"""Maximum number of session rotations per request. The crawler rotates the session if a proxy error occurs
97
96
or if the website blocks the request."""
98
97
99
- configuration : NotRequired [Configuration ]
100
- """Crawler configuration."""
101
-
102
98
request_handler_timeout : NotRequired [timedelta ]
103
99
"""Maximum duration allowed for a single request handler to run."""
104
100
@@ -176,7 +172,6 @@ def __init__(
176
172
max_request_retries : int = 3 ,
177
173
max_requests_per_crawl : int | None = None ,
178
174
max_session_rotations : int = 10 ,
179
- configuration : Configuration | None = None ,
180
175
request_handler_timeout : timedelta = timedelta (minutes = 1 ),
181
176
session_pool : SessionPool | None = None ,
182
177
use_session_pool : bool = True ,
@@ -205,7 +200,6 @@ def __init__(
205
200
this value.
206
201
max_session_rotations: Maximum number of session rotations per request. The crawler rotates the session
207
202
if a proxy error occurs or if the website blocks the request.
208
- configuration: Crawler configuration.
209
203
request_handler_timeout: Maximum duration allowed for a single request handler to run.
210
204
use_session_pool: Enable the use of a session pool for managing sessions during crawling.
211
205
session_pool: A custom `SessionPool` instance, allowing the use of non-default configuration.
@@ -241,12 +235,13 @@ def __init__(
241
235
self ._max_session_rotations = max_session_rotations
242
236
243
237
self ._request_provider = request_provider
244
- self ._configuration = configuration or service_container .get_configuration ()
238
+
239
+ config = service_container .get_configuration ()
245
240
246
241
self ._request_handler_timeout = request_handler_timeout
247
242
self ._internal_timeout = (
248
- self . _configuration .internal_timeout
249
- if self . _configuration .internal_timeout is not None
243
+ config .internal_timeout
244
+ if config .internal_timeout is not None
250
245
else max (2 * request_handler_timeout , timedelta (minutes = 5 ))
251
246
)
252
247
@@ -255,10 +250,8 @@ def __init__(
255
250
self ._event_manager = event_manager or service_container .get_event_manager ()
256
251
self ._snapshotter = Snapshotter (
257
252
self ._event_manager ,
258
- max_memory_size = ByteSize .from_mb (self ._configuration .memory_mbytes )
259
- if self ._configuration .memory_mbytes
260
- else None ,
261
- available_memory_ratio = self ._configuration .available_memory_ratio ,
253
+ max_memory_size = ByteSize .from_mb (config .memory_mbytes ) if config .memory_mbytes else None ,
254
+ available_memory_ratio = config .available_memory_ratio ,
262
255
)
263
256
self ._autoscaled_pool = AutoscaledPool (
264
257
system_status = SystemStatus (self ._snapshotter ),
@@ -275,13 +268,11 @@ def __init__(
275
268
276
269
if configure_logging :
277
270
root_logger = logging .getLogger ()
278
- configure_logger (root_logger , self . _configuration , remove_old_handlers = True )
271
+ configure_logger (root_logger , remove_old_handlers = True )
279
272
280
273
# Silence HTTPX logger
281
274
httpx_logger = logging .getLogger ('httpx' )
282
- httpx_logger .setLevel (
283
- logging .DEBUG if get_configured_log_level (self ._configuration ) <= logging .DEBUG else logging .WARNING
284
- )
275
+ httpx_logger .setLevel (logging .DEBUG if get_configured_log_level () <= logging .DEBUG else logging .WARNING )
285
276
286
277
if not _logger :
287
278
_logger = logging .getLogger (__name__ )
@@ -369,7 +360,7 @@ async def get_request_provider(
369
360
) -> RequestProvider :
370
361
"""Return the configured request provider. If none is configured, open and return the default request queue."""
371
362
if not self ._request_provider :
372
- self ._request_provider = await RequestQueue .open (id = id , name = name , configuration = self . _configuration )
363
+ self ._request_provider = await RequestQueue .open (id = id , name = name )
373
364
374
365
return self ._request_provider
375
366
@@ -380,7 +371,7 @@ async def get_dataset(
380
371
name : str | None = None ,
381
372
) -> Dataset :
382
373
"""Return the dataset with the given ID or name. If none is provided, return the default dataset."""
383
- return await Dataset .open (id = id , name = name , configuration = self . _configuration )
374
+ return await Dataset .open (id = id , name = name )
384
375
385
376
async def get_key_value_store (
386
377
self ,
@@ -389,7 +380,7 @@ async def get_key_value_store(
389
380
name : str | None = None ,
390
381
) -> KeyValueStore :
391
382
"""Return the key-value store with the given ID or name. If none is provided, return the default KVS."""
392
- return await KeyValueStore .open (id = id , name = name , configuration = self . _configuration )
383
+ return await KeyValueStore .open (id = id , name = name )
393
384
394
385
def error_handler (
395
386
self , handler : ErrorHandler [TCrawlingContext | BasicCrawlingContext ]
@@ -434,7 +425,7 @@ async def run(
434
425
request_provider = await self .get_request_provider ()
435
426
if purge_request_queue and isinstance (request_provider , RequestQueue ):
436
427
await request_provider .drop ()
437
- self ._request_provider = await RequestQueue .open (configuration = self . _configuration )
428
+ self ._request_provider = await RequestQueue .open ()
438
429
439
430
if requests is not None :
440
431
await self .add_requests (requests )
0 commit comments