Skip to content

Commit d6b85ac

Browse files
committed
Add many configuration based tests
1 parent cf1ee6f commit d6b85ac

File tree

3 files changed

+165
-32
lines changed

3 files changed

+165
-32
lines changed

src/apify/_actor.py

Lines changed: 53 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from apify_client import ApifyClientAsync
1515
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
16+
from crawlee.errors import ServiceConflictError
1617
from crawlee.events import (
1718
Event,
1819
EventAbortingData,
@@ -118,9 +119,12 @@ def __init__(
118119
self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process
119120
self._is_exiting = False
120121

121-
if configuration:
122-
service_locator.set_configuration(configuration)
123-
self._configuration = service_locator.get_configuration()
122+
# Actor state when this method is being executed is unpredictable.
123+
# Actor can be initialized by lazy object proxy or by user directly, or by both.
124+
# Until `init` method is run, this state of uncertainty remains. This is the reason why any setting done here in
125+
# `__init__` method should not be considered final.
126+
127+
self._configuration = configuration
124128
self._configure_logging = configure_logging
125129
self._apify_client = self.new_client()
126130

@@ -131,17 +135,17 @@ def __init__(
131135
# Set the event manager based on whether the Actor is running on the platform or locally.
132136
self._event_manager = (
133137
ApifyEventManager(
134-
configuration=self._configuration,
135-
persist_state_interval=self._configuration.persist_state_interval,
138+
configuration=self.config,
139+
persist_state_interval=self.config.persist_state_interval,
136140
)
137141
if self.is_at_home()
138142
else LocalEventManager(
139-
system_info_interval=self._configuration.system_info_interval,
140-
persist_state_interval=self._configuration.persist_state_interval,
143+
system_info_interval=self.config.system_info_interval,
144+
persist_state_interval=self.config.persist_state_interval,
141145
)
142146
)
143147

144-
self._charging_manager = ChargingManagerImplementation(self._configuration, self._apify_client)
148+
self._charging_manager = ChargingManagerImplementation(self.config, self._apify_client)
145149

146150
self._is_initialized = False
147151

@@ -204,12 +208,18 @@ def apify_client(self) -> ApifyClientAsync:
204208
@property
205209
def configuration(self) -> Configuration:
206210
"""The Configuration instance the Actor instance uses."""
207-
return self._configuration
211+
return self.config
208212

209213
@property
210214
def config(self) -> Configuration:
211215
"""The Configuration instance the Actor instance uses."""
212-
return self._configuration
216+
if self._configuration:
217+
return self._configuration
218+
self.log.debug(
219+
'Implicit configuration used.'
220+
"It's recommended to explicitly set the configuration to avoid unexpected behavior."
221+
)
222+
return Configuration()
213223

214224
@property
215225
def event_manager(self) -> EventManager:
@@ -251,6 +261,21 @@ async def init(self) -> None:
251261
This method should be called immediately before performing any additional Actor actions, and it should be
252262
called only once.
253263
"""
264+
if self._configuration:
265+
# Set explicitly the configuration in the service locator
266+
service_locator.set_configuration(self.configuration)
267+
else:
268+
try:
269+
# Set implicit default Apify configuration, unless configuration was already set.
270+
service_locator.set_configuration(self.configuration)
271+
except ServiceConflictError:
272+
self.log.info(
273+
'Configuration in service locator was set explicitly before Actor. '
274+
'Using the existing configuration.'
275+
)
276+
# Use the configuration from the service locator
277+
self._configuration = service_locator.get_configuration()
278+
254279
if self._is_initialized:
255280
raise RuntimeError('The Actor was already initialized!')
256281

@@ -268,7 +293,6 @@ async def init(self) -> None:
268293
service_locator.set_storage_client(self._cloud_storage_client)
269294

270295
service_locator.set_event_manager(self.event_manager)
271-
service_locator.set_configuration(self.configuration)
272296

273297
# The logging configuration has to be called after all service_locator set methods.
274298
if self._configure_logging:
@@ -386,8 +410,8 @@ def new_client(
386410
(increases exponentially from this value).
387411
timeout: The socket timeout of the HTTP requests sent to the Apify API.
388412
"""
389-
token = token or self._configuration.token
390-
api_url = api_url or self._configuration.api_base_url
413+
token = token or self.config.token
414+
api_url = api_url or self.config.api_base_url
391415
return ApifyClientAsync(
392416
token=token,
393417
api_url=api_url,
@@ -547,9 +571,9 @@ async def get_input(self) -> Any:
547571
"""Get the Actor input value from the default key-value store associated with the current Actor run."""
548572
self._raise_if_not_initialized()
549573

550-
input_value = await self.get_value(self._configuration.input_key)
551-
input_secrets_private_key = self._configuration.input_secrets_private_key_file
552-
input_secrets_key_passphrase = self._configuration.input_secrets_private_key_passphrase
574+
input_value = await self.get_value(self.config.input_key)
575+
input_secrets_private_key = self.config.input_secrets_private_key_file
576+
input_secrets_key_passphrase = self.config.input_secrets_private_key_passphrase
553577
if input_secrets_private_key and input_secrets_key_passphrase:
554578
private_key = load_private_key(
555579
input_secrets_private_key,
@@ -686,7 +710,7 @@ def off(self, event_name: Event, listener: Callable | None = None) -> None:
686710

687711
def is_at_home(self) -> bool:
688712
"""Return `True` when the Actor is running on the Apify platform, and `False` otherwise (e.g. local run)."""
689-
return self._configuration.is_at_home
713+
return self.config.is_at_home
690714

691715
def get_env(self) -> dict:
692716
"""Return a dictionary with information parsed from all the `APIFY_XXX` environment variables.
@@ -712,7 +736,7 @@ def get_env(self) -> dict:
712736
aliases = [field_name]
713737

714738
for alias in aliases:
715-
config[alias] = getattr(self._configuration, field_name)
739+
config[alias] = getattr(self.config, field_name)
716740

717741
env_vars = {env_var.value.lower(): env_var.name.lower() for env_var in [*ActorEnvVars, *ApifyEnvVars]}
718742
return {option_name: config[env_var] for env_var, option_name in env_vars.items() if env_var in config}
@@ -1000,13 +1024,13 @@ async def metamorph(
10001024
return
10011025

10021026
if not custom_after_sleep:
1003-
custom_after_sleep = self._configuration.metamorph_after_sleep
1027+
custom_after_sleep = self.config.metamorph_after_sleep
10041028

10051029
# If is_at_home() is True, config.actor_run_id is always set
1006-
if not self._configuration.actor_run_id:
1030+
if not self.config.actor_run_id:
10071031
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
10081032

1009-
await self._apify_client.run(self._configuration.actor_run_id).metamorph(
1033+
await self._apify_client.run(self.config.actor_run_id).metamorph(
10101034
target_actor_id=target_actor_id,
10111035
run_input=run_input,
10121036
target_actor_build=target_actor_build,
@@ -1043,7 +1067,7 @@ async def reboot(
10431067
_ActorType._is_rebooting = True
10441068

10451069
if not custom_after_sleep:
1046-
custom_after_sleep = self._configuration.metamorph_after_sleep
1070+
custom_after_sleep = self.config.metamorph_after_sleep
10471071

10481072
# Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish.
10491073
# PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot.
@@ -1063,10 +1087,10 @@ async def reboot(
10631087
*[listener(EventMigratingData()) for listener in migrating_listeners],
10641088
)
10651089

1066-
if not self._configuration.actor_run_id:
1090+
if not self.config.actor_run_id:
10671091
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
10681092

1069-
await self._apify_client.run(self._configuration.actor_run_id).reboot()
1093+
await self._apify_client.run(self.config.actor_run_id).reboot()
10701094

10711095
if custom_after_sleep:
10721096
await asyncio.sleep(custom_after_sleep.total_seconds())
@@ -1105,11 +1129,11 @@ async def add_webhook(
11051129
return
11061130

11071131
# If is_at_home() is True, config.actor_run_id is always set
1108-
if not self._configuration.actor_run_id:
1132+
if not self.config.actor_run_id:
11091133
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
11101134

11111135
await self._apify_client.webhooks().create(
1112-
actor_run_id=self._configuration.actor_run_id,
1136+
actor_run_id=self.config.actor_run_id,
11131137
event_types=webhook.event_types,
11141138
request_url=webhook.request_url,
11151139
payload_template=webhook.payload_template,
@@ -1141,10 +1165,10 @@ async def set_status_message(
11411165
return None
11421166

11431167
# If is_at_home() is True, config.actor_run_id is always set
1144-
if not self._configuration.actor_run_id:
1168+
if not self.config.actor_run_id:
11451169
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
11461170

1147-
api_result = await self._apify_client.run(self._configuration.actor_run_id).update(
1171+
api_result = await self._apify_client.run(self.config.actor_run_id).update(
11481172
status_message=status_message, is_status_message_terminal=is_terminal
11491173
)
11501174

@@ -1199,7 +1223,7 @@ async def create_proxy_configuration(
11991223
country_code=country_code,
12001224
proxy_urls=proxy_urls,
12011225
new_url_function=new_url_function,
1202-
_actor_config=self._configuration,
1226+
_actor_config=self.config,
12031227
_apify_client=self._apify_client,
12041228
)
12051229

src/apify/_configuration.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,13 +440,21 @@ class ApifyServiceLocator(ServiceLocator):
440440
def get_configuration(self) -> Configuration:
441441
# ApifyServiceLocator can store any children of Crawlee Configuration, but in Apify context it is desired to
442442
# return Apify Configuration.
443+
if isinstance(self._configuration, Configuration):
444+
# If Apify configuration was already stored in service locator, return it.
445+
return self._configuration
446+
443447
stored_configuration = super().get_configuration()
448+
444449
# Ensure the returned configuration is of type Apify Configuration.
450+
# Most likely crawlee configuration was already set. Create Apify configuration from it.
451+
# Env vars will have lower priority than the stored configuration.
445452
model_dump = stored_configuration.model_dump()
446453
# The configuration will read env variables first and overridden with stored_configuration
447454
_config = Configuration()
448455
for key in model_dump:
449-
setattr(_config, key, model_dump[key])
456+
if model_dump[key]:
457+
setattr(_config, key, model_dump[key])
450458
return _config
451459

452460

tests/unit/actor/test_configuration.py

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import pytest
22

33
from crawlee.configuration import Configuration as CrawleeConfiguration
4+
from crawlee.crawlers import BasicCrawler
5+
from crawlee.errors import ServiceConflictError
46

7+
from apify import Actor
58
from apify import Configuration as ApifyConfiguration
69
from apify._configuration import service_locator
710

@@ -28,10 +31,108 @@ def test_disable_browser_sandbox(
2831

2932
def test_apify_configuration_is_always_used() -> None:
3033
"""Set Crawlee Configuration in service_locator and verify that Apify Configuration is returned."""
31-
# Some value to verify
32-
max_used_cpu_ratio = 0.123456
34+
max_used_cpu_ratio = 0.123456 # Some unique value to verify configuration
3335
service_locator.set_configuration(CrawleeConfiguration(max_used_cpu_ratio=max_used_cpu_ratio))
3436

3537
returned_config = service_locator.get_configuration()
3638
assert returned_config.max_used_cpu_ratio == max_used_cpu_ratio
3739
assert isinstance(returned_config, ApifyConfiguration)
40+
41+
42+
async def test_existing_apify_config_respected_by_actor() -> None:
43+
"""Set Apify Configuration in service_locator and verify that Actor respects it."""
44+
max_used_cpu_ratio = 0.123456 # Some unique value to verify configuration
45+
apify_config = ApifyConfiguration(max_used_cpu_ratio=max_used_cpu_ratio)
46+
service_locator.set_configuration(apify_config)
47+
async with Actor:
48+
pass
49+
50+
returned_config = service_locator.get_configuration()
51+
assert returned_config is apify_config
52+
53+
54+
async def test_existing_crawlee_config_respected_by_actor() -> None:
55+
"""Set Crawlee Configuration in service_locator and verify that Actor respects it."""
56+
max_used_cpu_ratio = 0.123456 # Some unique value to verify configuration
57+
crawlee_config = CrawleeConfiguration(max_used_cpu_ratio=max_used_cpu_ratio)
58+
service_locator.set_configuration(crawlee_config)
59+
async with Actor:
60+
pass
61+
62+
returned_config = service_locator.get_configuration()
63+
assert returned_config is not crawlee_config
64+
assert isinstance(returned_config, ApifyConfiguration)
65+
# Make sure the Crawlee Configuration was used to create returned Apify Configuration
66+
assert returned_config.max_used_cpu_ratio == max_used_cpu_ratio
67+
68+
69+
async def test_existing_apify_config_throws_error_when_set_in_actor() -> None:
70+
"""Test that passing explicit configuration to actor after service locator configuration was already set,
71+
raises exception."""
72+
service_locator.set_configuration(ApifyConfiguration())
73+
with pytest.raises(ServiceConflictError):
74+
async with Actor(configuration=ApifyConfiguration()):
75+
pass
76+
77+
78+
async def test_setting_config_after_actor_raises_exception() -> None:
79+
"""Test that passing setting configuration in service locator after actor wa created raises an exception."""
80+
async with Actor():
81+
with pytest.raises(ServiceConflictError):
82+
service_locator.set_configuration(ApifyConfiguration())
83+
84+
85+
async def test_actor_using_input_configuration() -> None:
86+
"""Test that passing setting configuration in service locator after actor wa created raises an exception."""
87+
apify_config = ApifyConfiguration()
88+
async with Actor(configuration=apify_config):
89+
pass
90+
91+
assert service_locator.get_configuration() is apify_config
92+
93+
94+
async def test_crawler_implicit_configuration_through_actor() -> None:
95+
"""Test that crawler uses Actor configuration unless explicit configuration was passed to it."""
96+
apify_config = ApifyConfiguration()
97+
async with Actor(configuration=apify_config):
98+
crawler = BasicCrawler()
99+
100+
assert crawler._service_locator.get_configuration() is apify_config
101+
assert service_locator.get_configuration() is apify_config
102+
103+
104+
async def test_crawler_implicit_configuration() -> None:
105+
"""Test that crawler and Actor use implicit service_locator based configuration unless explicit configuration
106+
was passed to them."""
107+
async with Actor():
108+
crawler_1 = BasicCrawler()
109+
110+
assert service_locator.get_configuration() is crawler_1._service_locator.get_configuration()
111+
112+
113+
async def test_crawlers_own_configuration() -> None:
114+
"""Test that crawlers can use own configurations without crashing."""
115+
config_actor = ApifyConfiguration()
116+
apify_crawler_1 = ApifyConfiguration()
117+
apify_crawler_2 = ApifyConfiguration()
118+
119+
async with Actor(configuration=config_actor):
120+
crawler_1 = BasicCrawler(configuration=apify_crawler_1)
121+
crawler_2 = BasicCrawler(configuration=apify_crawler_2)
122+
123+
assert service_locator.get_configuration() is config_actor
124+
assert crawler_1._service_locator.get_configuration() is apify_crawler_1
125+
assert crawler_2._service_locator.get_configuration() is apify_crawler_2
126+
127+
128+
async def test_crawler_global_configuration() -> None:
129+
"""Test that crawler and Actor use explicit service_locator based configuration unless explicit configuration
130+
was passed to them."""
131+
config_global = ApifyConfiguration()
132+
service_locator.set_configuration(config_global)
133+
134+
async with Actor():
135+
crawler_1 = BasicCrawler()
136+
137+
assert service_locator.get_configuration() is config_global
138+
assert crawler_1._service_locator.get_configuration() is config_global

0 commit comments

Comments
 (0)