Skip to content

Commit bf427bd

Browse files
committed
Rebase & fix tests
1 parent 931def3 commit bf427bd

File tree

10 files changed

+20
-22
lines changed

10 files changed

+20
-22
lines changed

src/crawlee/basic_crawler/_basic_crawler.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,12 +478,14 @@ def sigint_handler() -> None:
478478
return final_statistics
479479

480480
async def _run_crawler(self) -> None:
481+
event_manager = service_container.get_event_manager()
482+
481483
# Collect the context managers to be entered. Context managers that are already active are excluded,
482484
# as they were likely entered by the caller, who will also be responsible for exiting them.
483485
contexts_to_enter = [
484486
cm
485487
for cm in (
486-
self._event_manager,
488+
event_manager,
487489
self._snapshotter,
488490
self._statistics,
489491
self._session_pool if self._use_session_pool else None,

src/crawlee/errors.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
'HttpStatusCodeError',
1515
'ProxyError',
1616
'RequestHandlerError',
17-
'ServiceConflictError',
1817
'SessionError',
1918
'UserDefinedErrorHandlerError',
2019
]

src/crawlee/events/_event_manager.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,6 @@ def emit(self, *, event: Event, event_data: EventData) -> None:
189189
event: The event which will be emitted.
190190
event_data: The data which will be passed to the event listeners.
191191
"""
192-
if not self._initialized:
193-
raise RuntimeError('EventManager is not initialized. Please use it within async context manager.')
194-
195192
self._event_emitter.emit(event.value, event_data)
196193

197194
@ensure_context
@@ -202,8 +199,6 @@ async def wait_for_all_listeners_to_complete(self, *, timeout: timedelta | None
202199
timeout: The maximum time to wait for the event listeners to finish. If they do not complete within
203200
the specified timeout, they will be canceled.
204201
"""
205-
if not self._initialized:
206-
raise RuntimeError('EventManager is not initialized. Please use it within async context manager.')
207202

208203
async def wait_for_listeners() -> None:
209204
"""Gathers all listener tasks and awaits their completion, logging any exceptions encountered."""

src/crawlee/service_container.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,11 @@
1212

1313
__all__ = [
1414
'get_configuration',
15-
'get_configuration_if_set',
1615
'get_event_manager',
1716
'get_storage_client',
18-
'set_cloud_storage_client',
1917
'set_configuration',
20-
'set_default_storage_client_type',
2118
'set_event_manager',
22-
'set_local_storage_client',
19+
'set_storage_client',
2320
]
2421

2522

src/crawlee/sessions/_session_pool.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from logging import getLogger
77
from typing import TYPE_CHECKING, Callable, Literal, overload
88

9+
from crawlee import service_container
910
from crawlee._utils.context import ensure_context
1011
from crawlee._utils.docs import docs_group
1112
from crawlee.events._types import Event, EventPersistStateData

src/crawlee/statistics/_statistics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from typing_extensions import Self, TypeVar
1010

11-
import crawlee.service_container
11+
from crawlee import service_container
1212
from crawlee._utils.context import ensure_context
1313
from crawlee._utils.docs import docs_group
1414
from crawlee._utils.recurring_task import RecurringTask
@@ -131,7 +131,7 @@ async def __aenter__(self) -> Self:
131131
self._key_value_store = await KeyValueStore.open(name=self._persist_state_kvs_name)
132132

133133
await self._maybe_load_statistics()
134-
self._events.on(event=Event.PERSIST_STATE, listener=self._persist_state)
134+
self._event_manager.on(event=Event.PERSIST_STATE, listener=self._persist_state)
135135
self._periodic_logger.start()
136136

137137
return self

src/crawlee/storages/_key_value_store.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from collections.abc import AsyncIterator
34
from typing import TYPE_CHECKING, Any, TypeVar, overload
45

56
from typing_extensions import override
@@ -12,8 +13,6 @@
1213
if TYPE_CHECKING:
1314
from collections.abc import AsyncIterator
1415

15-
from crawlee.base_storage_client import BaseStorageClient
16-
1716
T = TypeVar('T')
1817

1918

tests/unit/_autoscaling/test_snapshotter.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,12 @@ def event_system_data_info() -> EventSystemInfoData:
3333

3434

3535
async def test_start_stop_lifecycle() -> None:
36-
async with LocalEventManager() as event_manager, Snapshotter(
37-
event_manager=event_manager,
38-
available_memory_ratio=0.25,
36+
async with (
37+
LocalEventManager() as event_manager,
38+
Snapshotter(
39+
event_manager=event_manager,
40+
available_memory_ratio=0.25,
41+
),
3942
):
4043
pass
4144

@@ -98,7 +101,7 @@ async def test_get_cpu_sample(snapshotter: Snapshotter) -> None:
98101

99102
async def test_methods_raise_error_when_not_active() -> None:
100103
event_manager = AsyncMock(spec=EventManager)
101-
snapshotter = Snapshotter(event_manager, available_memory_ratio=0.25)
104+
snapshotter = Snapshotter(event_manager=event_manager, available_memory_ratio=0.25)
102105

103106
assert snapshotter.active is False
104107

tests/unit/_autoscaling/test_system_status.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
async def snapshotter() -> AsyncGenerator[Snapshotter, None]:
2626
async with (
2727
LocalEventManager() as event_manager,
28-
Snapshotter(event_manager, available_memory_ratio=0.25) as snapshotter,
28+
Snapshotter(event_manager=event_manager, available_memory_ratio=0.25) as snapshotter,
2929
):
3030
yield snapshotter
3131

@@ -38,7 +38,7 @@ def now() -> datetime:
3838
async def test_start_stop_lifecycle() -> None:
3939
async with (
4040
LocalEventManager() as event_manager,
41-
Snapshotter(event_manager, available_memory_ratio=0.25) as snapshotter,
41+
Snapshotter(event_manager=event_manager, available_memory_ratio=0.25) as snapshotter,
4242
):
4343
system_status = SystemStatus(snapshotter)
4444
system_status.get_current_system_info()

tests/unit/_memory_storage_client/test_memory_storage_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import pytest
1010

11-
from crawlee import Request
11+
from crawlee import Request, service_container
1212
from crawlee._consts import METADATA_FILENAME
1313
from crawlee.configuration import Configuration
1414
from crawlee.memory_storage_client import MemoryStorageClient
@@ -224,13 +224,15 @@ async def test_not_implemented_method(tmp_path: Path) -> None:
224224
async def test_default_storage_path_used(monkeypatch: pytest.MonkeyPatch) -> None:
225225
# We expect the default value to be used
226226
monkeypatch.delenv('CRAWLEE_STORAGE_DIR', raising=False)
227+
service_container.set_configuration(Configuration())
227228
ms = MemoryStorageClient()
228229
assert ms.storage_dir == './storage'
229230

230231

231232
async def test_storage_path_from_env_var_overrides_default(monkeypatch: pytest.MonkeyPatch) -> None:
232233
# We expect the env var to override the default value
233234
monkeypatch.setenv('CRAWLEE_STORAGE_DIR', './env_var_storage_dir')
235+
service_container.set_configuration(Configuration())
234236
ms = MemoryStorageClient()
235237
assert ms.storage_dir == './env_var_storage_dir'
236238

0 commit comments

Comments
 (0)