Skip to content

Commit 9edd205

Browse files
committed
rm force flag
1 parent 49403b5 commit 9edd205

File tree

8 files changed

+93
-62
lines changed

8 files changed

+93
-62
lines changed

src/crawlee/service_container.py

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,21 +95,16 @@ def get_configuration() -> Configuration:
9595

9696

9797
@docs_group('Functions')
98-
def set_configuration(
99-
configuration: Configuration,
100-
*,
101-
force: bool = False,
102-
) -> None:
98+
def set_configuration(configuration: Configuration) -> None:
10399
"""Set the configuration.
104100
105101
Args:
106102
configuration: The configuration to set.
107-
force: If True, the configuration will be set even if it was already set.
108103
109104
Raises:
110105
ServiceConflictError: If the configuration was already set.
111106
"""
112-
if _service_locator.configuration_was_set and not force:
107+
if _service_locator.configuration_was_set:
113108
raise ServiceConflictError(Configuration, configuration, _service_locator.configuration)
114109

115110
_service_locator.configuration = configuration
@@ -122,21 +117,16 @@ def get_event_manager() -> EventManager:
122117

123118

124119
@docs_group('Functions')
125-
def set_event_manager(
126-
event_manager: EventManager,
127-
*,
128-
force: bool = False,
129-
) -> None:
120+
def set_event_manager(event_manager: EventManager) -> None:
130121
"""Set the event manager.
131122
132123
Args:
133124
event_manager: The event manager to set.
134-
force: If True, the event manager will be set even if it was already set.
135125
136126
Raises:
137127
ServiceConflictError: If the event manager was already set.
138128
"""
139-
if _service_locator.event_manager_was_set and not force:
129+
if _service_locator.event_manager_was_set:
140130
raise ServiceConflictError(EventManager, event_manager, _service_locator.event_manager)
141131

142132
_service_locator.event_manager = event_manager
@@ -149,21 +139,16 @@ def get_storage_client() -> BaseStorageClient:
149139

150140

151141
@docs_group('Functions')
152-
def set_storage_client(
153-
storage_client: BaseStorageClient,
154-
*,
155-
force: bool = False,
156-
) -> None:
142+
def set_storage_client(storage_client: BaseStorageClient) -> None:
157143
"""Set the storage client.
158144
159145
Args:
160146
storage_client: The storage client to set.
161-
force: If True, the storage client will be set even if it was already set.
162147
163148
Raises:
164149
ServiceConflictError: If the storage client was already set.
165150
"""
166-
if _service_locator.storage_client_was_set and not force:
151+
if _service_locator.storage_client_was_set:
167152
raise ServiceConflictError(BaseStorageClient, storage_client, _service_locator.storage_client)
168153

169154
_service_locator.storage_client = storage_client

tests/unit/_memory_storage_client/test_memory_storage_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,15 +224,15 @@ async def test_not_implemented_method(tmp_path: Path) -> None:
224224
async def test_default_storage_path_used(monkeypatch: pytest.MonkeyPatch) -> None:
225225
# We expect the default value to be used
226226
monkeypatch.delenv('CRAWLEE_STORAGE_DIR', raising=False)
227-
service_container.set_configuration(Configuration(), force=True)
227+
service_container.set_configuration(Configuration())
228228
ms = MemoryStorageClient()
229229
assert ms.storage_dir == './storage'
230230

231231

232232
async def test_storage_path_from_env_var_overrides_default(monkeypatch: pytest.MonkeyPatch) -> None:
233233
# We expect the env var to override the default value
234234
monkeypatch.setenv('CRAWLEE_STORAGE_DIR', './env_var_storage_dir')
235-
service_container.set_configuration(Configuration(), force=True)
235+
service_container.set_configuration(Configuration())
236236
ms = MemoryStorageClient()
237237
assert ms.storage_dir == './env_var_storage_dir'
238238

tests/unit/_memory_storage_client/test_memory_storage_e2e.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
async def test_actor_memory_storage_client_key_value_store_e2e(
1515
monkeypatch: pytest.MonkeyPatch,
1616
purge_on_start: bool, # noqa: FBT001
17-
reset_globals: Callable[[], None],
17+
prepare_test_env: Callable[[], None],
1818
) -> None:
1919
"""This test simulates two clean runs using memory storage.
2020
The second run attempts to access data created by the first one.
@@ -32,7 +32,7 @@ async def test_actor_memory_storage_client_key_value_store_e2e(
3232

3333
# We simulate another clean run, we expect the memory storage to read from the local data directory
3434
# Default storages are purged based on purge_on_start parameter.
35-
reset_globals()
35+
prepare_test_env()
3636

3737
# Check if we're using a different memory storage instance
3838
assert old_client is not service_container.get_storage_client()
@@ -54,7 +54,7 @@ async def test_actor_memory_storage_client_key_value_store_e2e(
5454
async def test_actor_memory_storage_client_request_queue_e2e(
5555
monkeypatch: pytest.MonkeyPatch,
5656
purge_on_start: bool, # noqa: FBT001
57-
reset_globals: Callable[[], None],
57+
prepare_test_env: Callable[[], None],
5858
) -> None:
5959
"""This test simulates two clean runs using memory storage.
6060
The second run attempts to access data created by the first one.
@@ -82,7 +82,7 @@ async def test_actor_memory_storage_client_request_queue_e2e(
8282

8383
# We simulate another clean run, we expect the memory storage to read from the local data directory
8484
# Default storages are purged based on purge_on_start parameter.
85-
reset_globals()
85+
prepare_test_env()
8686

8787
# Add some more requests to the default queue
8888
default_queue = await RequestQueue.open()

tests/unit/basic_crawler/test_basic_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,7 @@ async def handler(context: BasicCrawlingContext) -> None:
798798

799799
async def test_respects_no_persist_storage() -> None:
800800
config = Configuration(persist_storage=False)
801-
service_container.set_configuration(config, force=True)
801+
service_container.set_configuration(config)
802802
crawler = BasicCrawler()
803803

804804
@crawler.router.default_handler

tests/unit/conftest.py

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,47 +18,96 @@
1818
from crawlee.storages import _creation_management
1919

2020
if TYPE_CHECKING:
21-
from collections.abc import AsyncGenerator
21+
from collections.abc import AsyncGenerator, Generator
2222
from pathlib import Path
2323

2424

2525
@pytest.fixture
26-
def reset_globals(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]:
27-
def reset() -> None:
28-
# Set the environment variable for the local storage directory to the temporary path
26+
def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]:
27+
"""Prepare the testing environment by resetting the global state before each test.
28+
29+
This fixture ensures that the global state of the package is reset to a known baseline before each test runs.
30+
It also configures a temporary storage directory for test isolation.
31+
32+
Args:
33+
monkeypatch: Test utility provided by pytest for patching.
34+
tmp_path: A unique temporary directory path provided by pytest for test isolation.
35+
36+
Returns:
37+
A callable that prepares the test environment.
38+
"""
39+
40+
def _prepare_test_env() -> None:
41+
# Set the environment variable for the local storage directory to the temporary path.
2942
monkeypatch.setenv('CRAWLEE_STORAGE_DIR', str(tmp_path))
3043

31-
# Reset services in crawlee.service_container
32-
service_container.set_configuration(Configuration(), force=True)
33-
service_container.set_storage_client(MemoryStorageClient(), force=True)
34-
service_container.set_event_manager(LocalEventManager(), force=True)
44+
# Initialize services in the service container with default values.
45+
service_container.set_configuration(Configuration())
46+
service_container.set_storage_client(MemoryStorageClient())
47+
service_container.set_event_manager(LocalEventManager())
48+
49+
# Reset the global state flags in the service locator.
50+
service_container._service_locator._configuration_was_set = False
51+
service_container._service_locator._storage_client_was_set = False
52+
service_container._service_locator._event_manager_was_set = False
3553

36-
# Clear creation-related caches to ensure no state is carried over between tests
54+
# Clear creation-related caches to ensure no state is carried over between tests.
3755
monkeypatch.setattr(_creation_management, '_cache_dataset_by_id', {})
3856
monkeypatch.setattr(_creation_management, '_cache_dataset_by_name', {})
3957
monkeypatch.setattr(_creation_management, '_cache_kvs_by_id', {})
4058
monkeypatch.setattr(_creation_management, '_cache_kvs_by_name', {})
4159
monkeypatch.setattr(_creation_management, '_cache_rq_by_id', {})
4260
monkeypatch.setattr(_creation_management, '_cache_rq_by_name', {})
4361

44-
# Verify that the environment variable is set correctly
62+
# Verify that the test environment was set up correctly.
4563
assert os.environ.get('CRAWLEE_STORAGE_DIR') == str(tmp_path)
64+
assert service_container._service_locator.configuration_was_set is False
65+
assert service_container._service_locator.storage_client_was_set is False
66+
assert service_container._service_locator.event_manager_was_set is False
4667

47-
return reset
68+
return _prepare_test_env
69+
70+
71+
@pytest.fixture
72+
def cleanup_test_env() -> Callable[[], None]:
73+
"""Clean up and reset global state after a test has completed.
74+
75+
This fixture ensures that any modifications to the global state during a test are undone
76+
after the test finishes. Restoring the environment for the subsequent tests.
77+
78+
Returns:
79+
A callable that cleans up the test environment.
80+
"""
81+
82+
def _cleanup_test_env() -> None:
83+
# Reset the flags in the service locator to indicate no services are explicitly set.
84+
service_container._service_locator._configuration_was_set = False
85+
service_container._service_locator._storage_client_was_set = False
86+
service_container._service_locator._event_manager_was_set = False
87+
88+
return _cleanup_test_env
4889

4990

5091
@pytest.fixture(autouse=True)
51-
def _isolate_test_environment(reset_globals: Callable[[], None]) -> None:
52-
"""Isolate tests by resetting the storage clients, clearing caches, and setting the environment variables.
92+
def _isolate_test_environment(
93+
prepare_test_env: Callable[[], None],
94+
cleanup_test_env: Callable[[], None],
95+
) -> Generator[None, None, None]:
96+
"""Isolate the testing environment by resetting global state before and after each test.
5397
54-
The fixture is applied automatically to all test cases.
98+
This fixture ensures that each test starts with a clean slate and that any modifications during the test
99+
do not affect subsequent tests. It runs automatically for all tests.
55100
56101
Args:
57-
monkeypatch: Test utility provided by pytest.
58-
tmp_path: A unique temporary directory path provided by pytest for test isolation.
59-
"""
102+
prepare_test_env: Fixture to prepare the environment before each test.
103+
cleanup_test_env: Fixture to clean up the environment after each test.
60104
61-
reset_globals()
105+
Yields:
106+
None. This fixture works as a setup and teardown mechanism.
107+
"""
108+
prepare_test_env()
109+
yield
110+
cleanup_test_env()
62111

63112

64113
@pytest.fixture

tests/unit/sessions/test_session_pool.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ async def test_create_session_function() -> None:
114114

115115
async def test_session_pool_persist(event_manager: EventManager, kvs: KeyValueStore) -> None:
116116
"""Test persistence of session pool state to KVS and validate stored data integrity."""
117-
service_container.set_event_manager(event_manager, force=True)
117+
service_container.set_event_manager(event_manager)
118118

119119
async with SessionPool(
120120
max_pool_size=MAX_POOL_SIZE,
@@ -145,7 +145,7 @@ async def test_session_pool_persist(event_manager: EventManager, kvs: KeyValueSt
145145

146146
async def test_session_pool_persist_and_restore(event_manager: EventManager, kvs: KeyValueStore) -> None:
147147
"""Check session pool's ability to persist its state and then restore it accurately after reset."""
148-
service_container.set_event_manager(event_manager, force=True)
148+
service_container.set_event_manager(event_manager)
149149

150150
async with SessionPool(
151151
max_pool_size=MAX_POOL_SIZE,

tests/unit/test_configuration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async def test_storage_not_persisted_when_disabled(tmp_path: Path, httpbin: URL)
3232
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
3333
)
3434
storage_client = MemoryStorageClient(config)
35-
set_storage_client(storage_client, force=True)
35+
set_storage_client(storage_client)
3636

3737
crawler = HttpCrawler()
3838

@@ -54,7 +54,7 @@ async def test_storage_persisted_when_enabled(tmp_path: Path, httpbin: URL) -> N
5454
crawlee_storage_dir=str(tmp_path), # type: ignore[call-arg]
5555
)
5656
storage_client = MemoryStorageClient(config)
57-
set_storage_client(storage_client, force=True)
57+
set_storage_client(storage_client)
5858

5959
crawler = HttpCrawler()
6060

tests/unit/test_service_container.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,38 +22,35 @@ def test_configuration() -> None:
2222
assert config == default_config
2323

2424
custom_config = Configuration(default_browser_path='custom_path')
25+
set_configuration(custom_config)
26+
config = get_configuration()
27+
assert config == custom_config
2528

2629
with pytest.raises(ServiceConflictError, match='Configuration has already been set.'):
2730
set_configuration(custom_config)
2831

29-
set_configuration(custom_config, force=True)
30-
config = get_configuration()
31-
assert config == custom_config
32-
3332

3433
def test_event_manager() -> None:
3534
default_event_manager = get_event_manager()
3635
assert isinstance(default_event_manager, LocalEventManager)
3736

3837
custom_event_manager = LocalEventManager()
38+
set_event_manager(custom_event_manager)
39+
event_manager = get_event_manager()
40+
assert event_manager == custom_event_manager
3941

4042
with pytest.raises(ServiceConflictError, match='EventManager has already been set.'):
4143
set_event_manager(custom_event_manager)
4244

43-
set_event_manager(custom_event_manager, force=True)
44-
event_manager = get_event_manager()
45-
assert event_manager == custom_event_manager
46-
4745

4846
def test_storage_client() -> None:
4947
default_storage_client = get_storage_client()
5048
assert isinstance(default_storage_client, MemoryStorageClient)
5149

5250
custom_storage_client = MemoryStorageClient()
51+
set_storage_client(custom_storage_client)
52+
storage_client = get_storage_client()
53+
assert storage_client == custom_storage_client
5354

5455
with pytest.raises(ServiceConflictError, match='StorageClient has already been set.'):
5556
set_storage_client(custom_storage_client)
56-
57-
set_storage_client(custom_storage_client, force=True)
58-
storage_client = get_storage_client()
59-
assert storage_client == custom_storage_client

0 commit comments

Comments
 (0)