51
51
52
52
from crawlee ._types import ConcurrencySettings , HttpMethod , JsonSerializable
53
53
from crawlee .base_storage_client ._models import DatasetItemsListPage
54
- from crawlee .configuration import Configuration
55
54
from crawlee .events ._event_manager import EventManager
56
55
from crawlee .http_clients import BaseHttpClient , HttpResponse
57
56
from crawlee .proxy_configuration import ProxyConfiguration , ProxyInfo
@@ -96,9 +95,6 @@ class BasicCrawlerOptions(TypedDict, Generic[TCrawlingContext]):
96
95
"""Maximum number of session rotations per request. The crawler rotates the session if a proxy error occurs
97
96
or if the website blocks the request."""
98
97
99
- configuration : NotRequired [Configuration ]
100
- """Crawler configuration."""
101
-
102
98
request_handler_timeout : NotRequired [timedelta ]
103
99
"""Maximum duration allowed for a single request handler to run."""
104
100
@@ -173,7 +169,6 @@ def __init__(
173
169
max_request_retries : int = 3 ,
174
170
max_requests_per_crawl : int | None = None ,
175
171
max_session_rotations : int = 10 ,
176
- configuration : Configuration | None = None ,
177
172
request_handler_timeout : timedelta = timedelta (minutes = 1 ),
178
173
session_pool : SessionPool | None = None ,
179
174
use_session_pool : bool = True ,
@@ -201,7 +196,6 @@ def __init__(
201
196
this value.
202
197
max_session_rotations: Maximum number of session rotations per request. The crawler rotates the session
203
198
if a proxy error occurs or if the website blocks the request.
204
- configuration: Crawler configuration.
205
199
request_handler_timeout: Maximum duration allowed for a single request handler to run.
206
200
use_session_pool: Enable the use of a session pool for managing sessions during crawling.
207
201
session_pool: A custom `SessionPool` instance, allowing the use of non-default configuration.
@@ -236,12 +230,13 @@ def __init__(
236
230
self ._max_session_rotations = max_session_rotations
237
231
238
232
self ._request_provider = request_provider
239
- self ._configuration = configuration or service_container .get_configuration ()
233
+
234
+ config = service_container .get_configuration ()
240
235
241
236
self ._request_handler_timeout = request_handler_timeout
242
237
self ._internal_timeout = (
243
- self . _configuration .internal_timeout
244
- if self . _configuration .internal_timeout is not None
238
+ config .internal_timeout
239
+ if config .internal_timeout is not None
245
240
else max (2 * request_handler_timeout , timedelta (minutes = 5 ))
246
241
)
247
242
@@ -250,10 +245,8 @@ def __init__(
250
245
self ._event_manager = event_manager or service_container .get_event_manager ()
251
246
self ._snapshotter = Snapshotter (
252
247
self ._event_manager ,
253
- max_memory_size = ByteSize .from_mb (self ._configuration .memory_mbytes )
254
- if self ._configuration .memory_mbytes
255
- else None ,
256
- available_memory_ratio = self ._configuration .available_memory_ratio ,
248
+ max_memory_size = ByteSize .from_mb (config .memory_mbytes ) if config .memory_mbytes else None ,
249
+ available_memory_ratio = config .available_memory_ratio ,
257
250
)
258
251
self ._autoscaled_pool = AutoscaledPool (
259
252
system_status = SystemStatus (self ._snapshotter ),
@@ -270,13 +263,11 @@ def __init__(
270
263
271
264
if configure_logging :
272
265
root_logger = logging .getLogger ()
273
- configure_logger (root_logger , self . _configuration , remove_old_handlers = True )
266
+ configure_logger (root_logger , remove_old_handlers = True )
274
267
275
268
# Silence HTTPX logger
276
269
httpx_logger = logging .getLogger ('httpx' )
277
- httpx_logger .setLevel (
278
- logging .DEBUG if get_configured_log_level (self ._configuration ) <= logging .DEBUG else logging .WARNING
279
- )
270
+ httpx_logger .setLevel (logging .DEBUG if get_configured_log_level () <= logging .DEBUG else logging .WARNING )
280
271
281
272
if not _logger :
282
273
_logger = logging .getLogger (__name__ )
@@ -361,7 +352,7 @@ async def get_request_provider(
361
352
) -> RequestProvider :
362
353
"""Return the configured request provider. If none is configured, open and return the default request queue."""
363
354
if not self ._request_provider :
364
- self ._request_provider = await RequestQueue .open (id = id , name = name , configuration = self . _configuration )
355
+ self ._request_provider = await RequestQueue .open (id = id , name = name )
365
356
366
357
return self ._request_provider
367
358
@@ -372,7 +363,7 @@ async def get_dataset(
372
363
name : str | None = None ,
373
364
) -> Dataset :
374
365
"""Return the dataset with the given ID or name. If none is provided, return the default dataset."""
375
- return await Dataset .open (id = id , name = name , configuration = self . _configuration )
366
+ return await Dataset .open (id = id , name = name )
376
367
377
368
async def get_key_value_store (
378
369
self ,
@@ -381,7 +372,7 @@ async def get_key_value_store(
381
372
name : str | None = None ,
382
373
) -> KeyValueStore :
383
374
"""Return the key-value store with the given ID or name. If none is provided, return the default KVS."""
384
- return await KeyValueStore .open (id = id , name = name , configuration = self . _configuration )
375
+ return await KeyValueStore .open (id = id , name = name )
385
376
386
377
def error_handler (
387
378
self , handler : ErrorHandler [TCrawlingContext | BasicCrawlingContext ]
@@ -426,7 +417,7 @@ async def run(
426
417
request_provider = await self .get_request_provider ()
427
418
if purge_request_queue and isinstance (request_provider , RequestQueue ):
428
419
await request_provider .drop ()
429
- self ._request_provider = await RequestQueue .open (configuration = self . _configuration )
420
+ self ._request_provider = await RequestQueue .open ()
430
421
431
422
if requests is not None :
432
423
await self .add_requests (requests )
0 commit comments