Skip to content

Commit 6ee1190

Browse files
committed
better service locator
1 parent 23ead53 commit 6ee1190

File tree

9 files changed

+134
-188
lines changed

9 files changed

+134
-188
lines changed

src/crawlee/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from importlib import metadata
22

33
from ._request import Request
4+
from ._service_locator import service_locator
45
from ._types import ConcurrencySettings, EnqueueStrategy, HttpHeaders
56
from ._utils.globs import Glob
67

78
__version__ = metadata.version('crawlee')
89

9-
__all__ = ['ConcurrencySettings', 'EnqueueStrategy', 'Glob', 'HttpHeaders', 'Request']
10+
__all__ = ['ConcurrencySettings', 'EnqueueStrategy', 'Glob', 'HttpHeaders', 'Request', 'service_locator']

src/crawlee/_service_locator.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from __future__ import annotations
2+
3+
from crawlee._utils.docs import docs_group
4+
from crawlee.base_storage_client._base_storage_client import BaseStorageClient
5+
from crawlee.configuration import Configuration
6+
from crawlee.errors import ServiceConflictError
7+
from crawlee.events._event_manager import EventManager
8+
9+
10+
@docs_group('Classes')
11+
class ServiceLocator:
12+
"""Service locator for managing the services used by Crawlee.
13+
14+
All services are initialized to its default value lazily.
15+
"""
16+
17+
def __init__(self) -> None:
18+
self._configuration: Configuration | None = None
19+
self._event_manager: EventManager | None = None
20+
self._storage_client: BaseStorageClient | None = None
21+
22+
# Flags to check if the services were already set.
23+
self._configuration_was_set = False
24+
self._event_manager_was_set = False
25+
self._storage_client_was_set = False
26+
27+
def get_configuration(self) -> Configuration:
28+
"""Get the configuration."""
29+
if self._configuration is None:
30+
self._configuration = Configuration()
31+
32+
return self._configuration
33+
34+
def set_configuration(self, configuration: Configuration) -> None:
35+
"""Set the configuration.
36+
37+
Args:
38+
configuration: The configuration to set.
39+
40+
Raises:
41+
ServiceConflictError: If the configuration was already set.
42+
"""
43+
if self._configuration_was_set:
44+
raise ServiceConflictError(Configuration, configuration, self._configuration)
45+
46+
self._configuration = configuration
47+
self._configuration_was_set = True
48+
49+
def get_event_manager(self) -> EventManager:
50+
"""Get the event manager."""
51+
if self._event_manager is None:
52+
from crawlee.events import LocalEventManager
53+
54+
self._event_manager = LocalEventManager()
55+
56+
return self._event_manager
57+
58+
def set_event_manager(self, event_manager: EventManager) -> None:
59+
"""Set the event manager.
60+
61+
Args:
62+
event_manager: The event manager to set.
63+
64+
Raises:
65+
ServiceConflictError: If the event manager was already set.
66+
"""
67+
if self._event_manager_was_set:
68+
raise ServiceConflictError(EventManager, event_manager, self._event_manager)
69+
70+
self._event_manager = event_manager
71+
self._event_manager_was_set = True
72+
73+
def get_storage_client(self) -> BaseStorageClient:
74+
"""Get the storage client."""
75+
if self._storage_client is None:
76+
from crawlee.memory_storage_client import MemoryStorageClient
77+
78+
self._storage_client = MemoryStorageClient()
79+
80+
return self._storage_client
81+
82+
def set_storage_client(self, storage_client: BaseStorageClient) -> None:
83+
"""Set the storage client.
84+
85+
Args:
86+
storage_client: The storage client to set.
87+
88+
Raises:
89+
ServiceConflictError: If the storage client was already set.
90+
"""
91+
if self._storage_client_was_set:
92+
raise ServiceConflictError(BaseStorageClient, storage_client, self._storage_client)
93+
94+
self._storage_client = storage_client
95+
self._storage_client_was_set = True
96+
97+
98+
service_locator = ServiceLocator()

src/crawlee/basic_crawler/_basic_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ async def _run_crawler(self) -> None:
522522

523523
async with AsyncExitStack() as exit_stack:
524524
for context in contexts_to_enter:
525-
await exit_stack.enter_async_context(context)
525+
await exit_stack.enter_async_context(context) # type: ignore[arg-type]
526526

527527
await self._autoscaled_pool.run()
528528

src/crawlee/configuration.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,11 +235,12 @@ class Configuration(BaseSettings):
235235
def get_global_configuration(cls) -> Self:
236236
"""Retrieve the global instance of the configuration.
237237
238-
TODO: Can we remove this?
238+
Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
239+
instead.
239240
"""
240-
from crawlee.service_locator import get_configuration
241+
from crawlee import service_locator
241242

242-
config = get_configuration()
243+
config = service_locator.get_configuration()
243244

244245
if not isinstance(config, cls):
245246
raise TypeError(f'Requested global configuration object of type {cls}, but {config.__class__} was found')

src/crawlee/service_locator.py

Lines changed: 0 additions & 147 deletions
This file was deleted.

tests/unit/_memory_storage_client/test_memory_storage_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,8 @@ async def test_not_implemented_method(tmp_path: Path) -> None:
223223

224224
async def test_default_storage_path_used(monkeypatch: pytest.MonkeyPatch) -> None:
225225
# Reset the configuration in service locator
226-
service_locator._service_locator._configuration = None
227-
service_locator._service_locator._configuration_was_set = False
226+
service_locator._configuration = None
227+
service_locator._configuration_was_set = False
228228

229229
# Remove the env var for setting the storage directory
230230
monkeypatch.delenv('CRAWLEE_STORAGE_DIR', raising=False)

tests/unit/conftest.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ def _prepare_test_env() -> None:
4141

4242
# Reset the flags in the service locator to indicate that no services are explicitly set. This ensures
4343
# a clean state, as services might have been set during a previous test and not reset properly.
44-
service_locator._service_locator._configuration_was_set = False
45-
service_locator._service_locator._storage_client_was_set = False
46-
service_locator._service_locator._event_manager_was_set = False
44+
service_locator._configuration_was_set = False
45+
service_locator._storage_client_was_set = False
46+
service_locator._event_manager_was_set = False
4747

4848
# Reset the services in the service locator.
49-
service_locator._service_locator._configuration = None
50-
service_locator._service_locator._event_manager = None
51-
service_locator._service_locator._storage_client = None
49+
service_locator._configuration = None
50+
service_locator._event_manager = None
51+
service_locator._storage_client = None
5252

5353
# Clear creation-related caches to ensure no state is carried over between tests.
5454
monkeypatch.setattr(_creation_management, '_cache_dataset_by_id', {})
@@ -60,9 +60,9 @@ def _prepare_test_env() -> None:
6060

6161
# Verify that the test environment was set up correctly.
6262
assert os.environ.get('CRAWLEE_STORAGE_DIR') == str(tmp_path)
63-
assert service_locator._service_locator.configuration_was_set is False
64-
assert service_locator._service_locator.storage_client_was_set is False
65-
assert service_locator._service_locator.event_manager_was_set is False
63+
assert service_locator._configuration_was_set is False
64+
assert service_locator._storage_client_was_set is False
65+
assert service_locator._event_manager_was_set is False
6666

6767
return _prepare_test_env
6868

tests/unit/test_configuration.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55

66
from typing import TYPE_CHECKING
77

8+
from crawlee import service_locator
89
from crawlee.configuration import Configuration
910
from crawlee.http_crawler import HttpCrawler, HttpCrawlingContext
1011
from crawlee.memory_storage_client._memory_storage_client import MemoryStorageClient
11-
from crawlee.service_locator import get_configuration, set_storage_client
1212

1313
if TYPE_CHECKING:
1414
from pathlib import Path
@@ -20,8 +20,8 @@ def test_global_configuration_works() -> None:
2020
assert (
2121
Configuration.get_global_configuration()
2222
is Configuration.get_global_configuration()
23-
is get_configuration()
24-
is get_configuration()
23+
is service_locator.get_configuration()
24+
is service_locator.get_configuration()
2525
)
2626

2727

@@ -32,7 +32,7 @@ async def test_storage_not_persisted_when_disabled(tmp_path: Path, httpbin: URL)
3232
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
3333
)
3434
storage_client = MemoryStorageClient(config)
35-
set_storage_client(storage_client)
35+
service_locator.set_storage_client(storage_client)
3636

3737
crawler = HttpCrawler()
3838

@@ -54,7 +54,7 @@ async def test_storage_persisted_when_enabled(tmp_path: Path, httpbin: URL) -> N
5454
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
5555
)
5656
storage_client = MemoryStorageClient(config)
57-
set_storage_client(storage_client)
57+
service_locator.set_storage_client(storage_client)
5858

5959
crawler = HttpCrawler()
6060

0 commit comments

Comments
 (0)