50
50
51
51
from crawlee ._types import ConcurrencySettings , HttpMethod , JsonSerializable
52
52
from crawlee .base_storage_client ._models import DatasetItemsListPage
53
- from crawlee .configuration import Configuration
54
53
from crawlee .events ._event_manager import EventManager
55
54
from crawlee .http_clients import BaseHttpClient , HttpResponse
56
55
from crawlee .proxy_configuration import ProxyConfiguration , ProxyInfo
@@ -95,9 +94,6 @@ class BasicCrawlerOptions(TypedDict, Generic[TCrawlingContext]):
95
94
"""Maximum number of session rotations per request. The crawler rotates the session if a proxy error occurs
96
95
or if the website blocks the request."""
97
96
98
- configuration : NotRequired [Configuration ]
99
- """Crawler configuration."""
100
-
101
97
request_handler_timeout : NotRequired [timedelta ]
102
98
"""Maximum duration allowed for a single request handler to run."""
103
99
@@ -172,7 +168,6 @@ def __init__(
172
168
max_request_retries : int = 3 ,
173
169
max_requests_per_crawl : int | None = None ,
174
170
max_session_rotations : int = 10 ,
175
- configuration : Configuration | None = None ,
176
171
request_handler_timeout : timedelta = timedelta (minutes = 1 ),
177
172
session_pool : SessionPool | None = None ,
178
173
use_session_pool : bool = True ,
@@ -200,7 +195,6 @@ def __init__(
200
195
this value.
201
196
max_session_rotations: Maximum number of session rotations per request. The crawler rotates the session
202
197
if a proxy error occurs or if the website blocks the request.
203
- configuration: Crawler configuration.
204
198
request_handler_timeout: Maximum duration allowed for a single request handler to run.
205
199
use_session_pool: Enable the use of a session pool for managing sessions during crawling.
206
200
session_pool: A custom `SessionPool` instance, allowing the use of non-default configuration.
@@ -235,12 +229,13 @@ def __init__(
235
229
self ._max_session_rotations = max_session_rotations
236
230
237
231
self ._request_provider = request_provider
238
- self ._configuration = configuration or service_container .get_configuration ()
232
+
233
+ config = service_container .get_configuration ()
239
234
240
235
self ._request_handler_timeout = request_handler_timeout
241
236
self ._internal_timeout = (
242
- self . _configuration .internal_timeout
243
- if self . _configuration .internal_timeout is not None
237
+ config .internal_timeout
238
+ if config .internal_timeout is not None
244
239
else max (2 * request_handler_timeout , timedelta (minutes = 5 ))
245
240
)
246
241
@@ -249,10 +244,8 @@ def __init__(
249
244
self ._event_manager = event_manager or service_container .get_event_manager ()
250
245
self ._snapshotter = Snapshotter (
251
246
self ._event_manager ,
252
- max_memory_size = ByteSize .from_mb (self ._configuration .memory_mbytes )
253
- if self ._configuration .memory_mbytes
254
- else None ,
255
- available_memory_ratio = self ._configuration .available_memory_ratio ,
247
+ max_memory_size = ByteSize .from_mb (config .memory_mbytes ) if config .memory_mbytes else None ,
248
+ available_memory_ratio = config .available_memory_ratio ,
256
249
)
257
250
self ._pool = AutoscaledPool (
258
251
system_status = SystemStatus (self ._snapshotter ),
@@ -269,13 +262,11 @@ def __init__(
269
262
270
263
if configure_logging :
271
264
root_logger = logging .getLogger ()
272
- configure_logger (root_logger , self . _configuration , remove_old_handlers = True )
265
+ configure_logger (root_logger , remove_old_handlers = True )
273
266
274
267
# Silence HTTPX logger
275
268
httpx_logger = logging .getLogger ('httpx' )
276
- httpx_logger .setLevel (
277
- logging .DEBUG if get_configured_log_level (self ._configuration ) <= logging .DEBUG else logging .WARNING
278
- )
269
+ httpx_logger .setLevel (logging .DEBUG if get_configured_log_level () <= logging .DEBUG else logging .WARNING )
279
270
280
271
if not _logger :
281
272
_logger = logging .getLogger (__name__ )
@@ -360,7 +351,7 @@ async def get_request_provider(
360
351
) -> RequestProvider :
361
352
"""Return the configured request provider. If none is configured, open and return the default request queue."""
362
353
if not self ._request_provider :
363
- self ._request_provider = await RequestQueue .open (id = id , name = name , configuration = self . _configuration )
354
+ self ._request_provider = await RequestQueue .open (id = id , name = name )
364
355
365
356
return self ._request_provider
366
357
@@ -371,7 +362,7 @@ async def get_dataset(
371
362
name : str | None = None ,
372
363
) -> Dataset :
373
364
"""Return the dataset with the given ID or name. If none is provided, return the default dataset."""
374
- return await Dataset .open (id = id , name = name , configuration = self . _configuration )
365
+ return await Dataset .open (id = id , name = name )
375
366
376
367
async def get_key_value_store (
377
368
self ,
@@ -380,7 +371,7 @@ async def get_key_value_store(
380
371
name : str | None = None ,
381
372
) -> KeyValueStore :
382
373
"""Return the key-value store with the given ID or name. If none is provided, return the default KVS."""
383
- return await KeyValueStore .open (id = id , name = name , configuration = self . _configuration )
374
+ return await KeyValueStore .open (id = id , name = name )
384
375
385
376
def error_handler (
386
377
self , handler : ErrorHandler [TCrawlingContext | BasicCrawlingContext ]
@@ -425,7 +416,7 @@ async def run(
425
416
request_provider = await self .get_request_provider ()
426
417
if purge_request_queue and isinstance (request_provider , RequestQueue ):
427
418
await request_provider .drop ()
428
- self ._request_provider = await RequestQueue .open (configuration = self . _configuration )
419
+ self ._request_provider = await RequestQueue .open ()
429
420
430
421
if requests is not None :
431
422
await self .add_requests (requests )
0 commit comments