Skip to content

Commit 234b04c

Browse files
committed
chore: adapt to use service locator from crawlee
1 parent c4805dc commit 234b04c

File tree

6 files changed

+104
-76
lines changed

6 files changed

+104
-76
lines changed

pyproject.toml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ keywords = [
4545
python = "^3.9"
4646
apify-client = ">=1.8.1"
4747
apify-shared = ">=1.1.2"
48-
crawlee = "~0.4.0"
48+
crawlee = { git = "https://github.com/apify/crawlee-python.git", branch = "refactor-service-container" }
4949
cryptography = ">=42.0.0"
5050
# TODO: relax the upper bound once the issue is resolved:
5151
# https://github.com/apify/apify-sdk-python/issues/348
@@ -188,11 +188,7 @@ typeCheckingMode = "standard"
188188
include = ["src", "tests"]
189189

190190
[tool.coverage.report]
191-
exclude_lines = [
192-
"pragma: no cover",
193-
"if TYPE_CHECKING:",
194-
"assert_never()",
195-
]
191+
exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:", "assert_never()"]
196192

197193
[tool.ipdb]
198194
context = 7

src/apify/_actor.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
from apify_client import ApifyClientAsync
1313
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
1414
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
15-
from crawlee import service_container
15+
from crawlee import service_locator
1616
from crawlee.events._types import Event, EventPersistStateData
17+
from crawlee.memory_storage_client import MemoryStorageClient
1718

1819
from apify._configuration import Configuration
1920
from apify._consts import EVENT_LISTENERS_TIMEOUT
@@ -69,17 +70,31 @@ def __init__(
6970
self._configure_logging = configure_logging
7071
self._apify_client = self.new_client()
7172

72-
self._event_manager: EventManager
73-
if self._configuration.is_at_home:
74-
self._event_manager = PlatformEventManager(
73+
# We need to keep both local & cloud storage clients because of the `force_cloud` option.
74+
self._local_storage_client = MemoryStorageClient()
75+
self._cloud_storage_client = ApifyStorageClient(configuration=self._configuration)
76+
77+
# Set the event manager based on whether the Actor is running on the platform or locally.
78+
self._event_manager = (
79+
PlatformEventManager(
7580
config=self._configuration,
7681
persist_state_interval=self._configuration.persist_state_interval,
7782
)
78-
else:
79-
self._event_manager = LocalEventManager(
83+
if self.is_at_home()
84+
else LocalEventManager(
8085
system_info_interval=self._configuration.system_info_interval,
8186
persist_state_interval=self._configuration.persist_state_interval,
8287
)
88+
)
89+
90+
# Register services in the service locator.
91+
if self.is_at_home():
92+
service_locator.set_storage_client(self._cloud_storage_client)
93+
else:
94+
service_locator.set_storage_client(self._local_storage_client)
95+
96+
service_locator.set_event_manager(self.event_manager)
97+
service_locator.set_configuration(self.configuration)
8398

8499
self._is_initialized = False
85100

@@ -93,7 +108,7 @@ async def __aenter__(self) -> Self:
93108
executing the block code, the `Actor.fail` method is called.
94109
"""
95110
if self._configure_logging:
96-
_configure_logging(self._configuration)
111+
_configure_logging()
97112

98113
await self.init()
99114
return self
@@ -172,16 +187,6 @@ async def init(self) -> None:
172187
if self._is_initialized:
173188
raise RuntimeError('The Actor was already initialized!')
174189

175-
if self._configuration.token:
176-
service_container.set_cloud_storage_client(ApifyStorageClient(configuration=self._configuration))
177-
178-
if self._configuration.is_at_home:
179-
service_container.set_default_storage_client_type('cloud')
180-
else:
181-
service_container.set_default_storage_client_type('local')
182-
183-
service_container.set_event_manager(self._event_manager)
184-
185190
self._is_exiting = False
186191
self._was_final_persist_state_emitted = False
187192

@@ -233,7 +238,6 @@ async def finalize() -> None:
233238
await self._event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
234239

235240
await self._event_manager.__aexit__(None, None, None)
236-
cast(dict, service_container._services).clear() # noqa: SLF001
237241

238242
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
239243
self._is_initialized = False
@@ -335,12 +339,13 @@ async def open_dataset(
335339
An instance of the `Dataset` class for the given ID or name.
336340
"""
337341
self._raise_if_not_initialized()
342+
storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client()
338343

339344
return await Dataset.open(
340345
id=id,
341346
name=name,
342347
configuration=self._configuration,
343-
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
348+
storage_client=storage_client,
344349
)
345350

346351
async def open_key_value_store(
@@ -367,12 +372,13 @@ async def open_key_value_store(
367372
An instance of the `KeyValueStore` class for the given ID or name.
368373
"""
369374
self._raise_if_not_initialized()
375+
storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client()
370376

371377
return await KeyValueStore.open(
372378
id=id,
373379
name=name,
374380
configuration=self._configuration,
375-
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
381+
storage_client=storage_client,
376382
)
377383

378384
async def open_request_queue(
@@ -401,12 +407,13 @@ async def open_request_queue(
401407
An instance of the `RequestQueue` class for the given ID or name.
402408
"""
403409
self._raise_if_not_initialized()
410+
storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client()
404411

405412
return await RequestQueue.open(
406413
id=id,
407414
name=name,
408415
configuration=self._configuration,
409-
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
416+
storage_client=storage_client,
410417
)
411418

412419
async def push_data(self, data: dict | list[dict]) -> None:

src/apify/_configuration.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from datetime import datetime, timedelta
4+
from logging import getLogger
45
from typing import Annotated
56

67
from pydantic import AliasChoices, BeforeValidator, Field
@@ -12,6 +13,8 @@
1213

1314
from apify._utils import docs_group
1415

16+
logger = getLogger(__name__)
17+
1518

1619
@docs_group('Classes')
1720
class Configuration(CrawleeConfiguration):
@@ -321,6 +324,11 @@ class Configuration(CrawleeConfiguration):
321324
),
322325
] = None
323326

327+
@classmethod
328+
def get_global_configuration(cls) -> Configuration:
329+
"""Retrieve the global instance of the configuration.
324330
325-
# Monkey-patch the base class so that it works with the extended configuration
326-
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]
331+
Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
332+
instead.
333+
"""
334+
return cls()

src/apify/log.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
from __future__ import annotations
22

33
import logging
4-
from typing import TYPE_CHECKING
54

65
from apify_shared.utils import ignore_docs
76
from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level
87

9-
if TYPE_CHECKING:
10-
from apify import Configuration
11-
128
# Name of the logger used throughout the library (resolves to 'apify')
139
logger_name = __name__.split('.')[0]
1410

@@ -21,11 +17,11 @@ class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 (Inherited from pare
2117
pass
2218

2319

24-
def _configure_logging(configuration: Configuration) -> None:
20+
def _configure_logging() -> None:
2521
apify_client_logger = logging.getLogger('apify_client')
26-
configure_logger(apify_client_logger, configuration, remove_old_handlers=True)
22+
configure_logger(apify_client_logger, remove_old_handlers=True)
2723

28-
level = get_configured_log_level(configuration)
24+
level = get_configured_log_level()
2925

3026
# Keep apify_client logger quiet unless debug logging is requested
3127
if level > logging.DEBUG:
@@ -42,4 +38,4 @@ def _configure_logging(configuration: Configuration) -> None:
4238

4339
# Use configured log level for apify logger
4440
apify_logger = logging.getLogger('apify')
45-
configure_logger(apify_logger, configuration, remove_old_handlers=True)
41+
configure_logger(apify_logger, remove_old_handlers=True)

tests/integration/conftest.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import sys
88
import textwrap
99
from pathlib import Path
10-
from typing import TYPE_CHECKING, Any, Callable, Protocol, cast
10+
from typing import TYPE_CHECKING, Any, Callable, Protocol
1111

1212
import pytest
1313
from filelock import FileLock
@@ -36,9 +36,6 @@ def _reset_and_patch_default_instances() -> None:
3636
To isolate the tests, we need to reset the used singletons before each test case. We also patch the default
3737
storage client with a tmp_path.
3838
"""
39-
from crawlee import service_container
40-
41-
cast(dict, service_container._services).clear()
4239
delattr(apify._actor.Actor, '__wrapped__')
4340

4441
# TODO: StorageClientManager local storage client purge # noqa: TD003

tests/unit/conftest.py

Lines changed: 59 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,19 @@
22

33
import asyncio
44
import inspect
5+
import os
56
from collections import defaultdict
67
from copy import deepcopy
7-
from typing import TYPE_CHECKING, Any, Callable, cast, get_type_hints
8+
from typing import TYPE_CHECKING, Any, Callable, get_type_hints
89

910
import pytest
1011

1112
from apify_client.client import ApifyClientAsync
1213
from apify_shared.consts import ApifyEnvVars
14+
from crawlee import service_locator
1315
from crawlee.configuration import Configuration as CrawleeConfiguration
1416
from crawlee.memory_storage_client import MemoryStorageClient
17+
from crawlee.storages import _creation_management
1518

1619
import apify._actor
1720

@@ -20,45 +23,66 @@
2023

2124

2225
@pytest.fixture
23-
def reset_default_instances() -> Callable[[], None]:
24-
def reset() -> None:
25-
from crawlee.storages._creation_management import (
26-
_cache_dataset_by_id,
27-
_cache_dataset_by_name,
28-
_cache_kvs_by_id,
29-
_cache_kvs_by_name,
30-
_cache_rq_by_id,
31-
_cache_rq_by_name,
32-
)
33-
34-
_cache_dataset_by_id.clear()
35-
_cache_dataset_by_name.clear()
36-
_cache_kvs_by_id.clear()
37-
_cache_kvs_by_name.clear()
38-
_cache_rq_by_id.clear()
39-
_cache_rq_by_name.clear()
40-
41-
from crawlee import service_container
42-
43-
cast(dict, service_container._services).clear()
26+
def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]:
27+
"""Prepare the testing environment by resetting the global state before each test.
28+
29+
This fixture ensures that the global state of the package is reset to a known baseline before each test runs.
30+
It also configures a temporary storage directory for test isolation.
31+
32+
Args:
33+
monkeypatch: Test utility provided by pytest for patching.
34+
tmp_path: A unique temporary directory path provided by pytest for test isolation.
35+
36+
Returns:
37+
A callable that prepares the test environment.
38+
"""
39+
40+
def _prepare_test_env() -> None:
4441
delattr(apify._actor.Actor, '__wrapped__')
45-
# TODO: local storage client purge # noqa: TD003
4642

47-
return reset
43+
# Set the environment variable for the local storage directory to the temporary path.
44+
monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path))
45+
46+
# Reset the flags in the service locator to indicate that no services are explicitly set. This ensures
47+
# a clean state, as services might have been set during a previous test and not reset properly.
48+
service_locator._configuration_was_set = False
49+
service_locator._storage_client_was_set = False
50+
service_locator._event_manager_was_set = False
51+
52+
# Reset the services in the service locator.
53+
service_locator._configuration = None
54+
service_locator._event_manager = None
55+
service_locator._storage_client = None
56+
57+
# Clear creation-related caches to ensure no state is carried over between tests.
58+
monkeypatch.setattr(_creation_management, '_cache_dataset_by_id', {})
59+
monkeypatch.setattr(_creation_management, '_cache_dataset_by_name', {})
60+
monkeypatch.setattr(_creation_management, '_cache_kvs_by_id', {})
61+
monkeypatch.setattr(_creation_management, '_cache_kvs_by_name', {})
62+
monkeypatch.setattr(_creation_management, '_cache_rq_by_id', {})
63+
monkeypatch.setattr(_creation_management, '_cache_rq_by_name', {})
64+
65+
# Verify that the test environment was set up correctly.
66+
assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path)
67+
assert service_locator._configuration_was_set is False
68+
assert service_locator._storage_client_was_set is False
69+
assert service_locator._event_manager_was_set is False
70+
71+
return _prepare_test_env
4872

4973

50-
# To isolate the tests, we need to reset the used singletons before each test case
51-
# We also set the MemoryStorageClient to use a temp path
5274
@pytest.fixture(autouse=True)
53-
def _reset_and_patch_default_instances(
54-
monkeypatch: pytest.MonkeyPatch,
55-
tmp_path: Path,
56-
reset_default_instances: Callable[[], None],
57-
) -> None:
58-
# This forces the MemoryStorageClient to use tmp_path for its storage dir
59-
monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path))
60-
61-
reset_default_instances()
75+
def _isolate_test_environment(prepare_test_env: Callable[[], None]) -> None:
76+
"""Isolate the testing environment by resetting global state before and after each test.
77+
78+
This fixture ensures that each test starts with a clean slate and that any modifications during the test
79+
do not affect subsequent tests. It runs automatically for all tests.
80+
81+
Args:
82+
prepare_test_env: Fixture to prepare the environment before each test.
83+
"""
84+
85+
prepare_test_env()
6286

6387

6488
# This class is used to patch the ApifyClientAsync methods to return a fixed value or be replaced with another method.

0 commit comments

Comments
 (0)