Skip to content

Commit a2e4ff6

Browse files
authored
Allow overriding Configuration values in constructor, unify setting its properties (#51)
This PR: - unifies how we set all the `Configuration` values (just the `max_used_cpu_ratio` did not have an env var, so I added it, it is only really useful in local running) - adds an option to override the non-constant `Configuration` values right in the `Configuration` constructor, for a bit easier use
1 parent 1843f48 commit a2e4ff6

File tree

4 files changed

+111
-26
lines changed

4 files changed

+111
-26
lines changed

src/apify/_utils.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@
3131
from .consts import (
3232
_BOOL_ENV_VARS_TYPE,
3333
_DATETIME_ENV_VARS_TYPE,
34+
_FLOAT_ENV_VARS_TYPE,
3435
_INTEGER_ENV_VARS_TYPE,
3536
_STRING_ENV_VARS_TYPE,
3637
BOOL_ENV_VARS,
3738
DATETIME_ENV_VARS,
39+
FLOAT_ENV_VARS,
3840
INTEGER_ENV_VARS,
3941
REQUEST_ID_LENGTH,
4042
ApifyEnvVars,
@@ -113,6 +115,16 @@ def _fetch_and_parse_env_var(env_var: _DATETIME_ENV_VARS_TYPE, default: datetime
113115
...
114116

115117

118+
@overload
119+
def _fetch_and_parse_env_var(env_var: _FLOAT_ENV_VARS_TYPE) -> Optional[float]: # noqa: U100
120+
...
121+
122+
123+
@overload
124+
def _fetch_and_parse_env_var(env_var: _FLOAT_ENV_VARS_TYPE, default: float) -> float: # noqa: U100
125+
...
126+
127+
116128
@overload
117129
def _fetch_and_parse_env_var(env_var: _INTEGER_ENV_VARS_TYPE) -> Optional[int]: # noqa: U100
118130
...
@@ -147,11 +159,16 @@ def _fetch_and_parse_env_var(env_var: Any, default: Any = None) -> Any:
147159

148160
if env_var in BOOL_ENV_VARS:
149161
return _maybe_parse_bool(val)
162+
if env_var in FLOAT_ENV_VARS:
163+
parsed_float = _maybe_parse_float(val)
164+
if parsed_float is None:
165+
return default
166+
return parsed_float
150167
if env_var in INTEGER_ENV_VARS:
151-
res = _maybe_parse_int(val)
152-
if res is None:
168+
parsed_int = _maybe_parse_int(val)
169+
if parsed_int is None:
153170
return default
154-
return res
171+
return parsed_int
155172
if env_var in DATETIME_ENV_VARS:
156173
return _maybe_parse_datetime(val)
157174
return val
@@ -185,6 +202,13 @@ def _maybe_parse_datetime(val: str) -> Union[datetime, str]:
185202
return val
186203

187204

205+
def _maybe_parse_float(val: str) -> Optional[float]:
206+
try:
207+
return float(val)
208+
except ValueError:
209+
return None
210+
211+
188212
def _maybe_parse_int(val: str) -> Optional[int]:
189213
try:
190214
return int(val)

src/apify/config.py

Lines changed: 71 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,48 +13,97 @@ class Configuration:
1313

1414
_default_instance: Optional['Configuration'] = None
1515

16-
def __init__(self) -> None:
17-
"""Create a `Configuration` instance."""
16+
def __init__(
17+
self,
18+
*,
19+
api_base_url: Optional[str] = None,
20+
api_public_base_url: Optional[str] = None,
21+
container_port: Optional[int] = None,
22+
container_url: Optional[str] = None,
23+
default_dataset_id: Optional[str] = None,
24+
default_key_value_store_id: Optional[str] = None,
25+
default_request_queue_id: Optional[str] = None,
26+
input_key: Optional[str] = None,
27+
max_used_cpu_ratio: Optional[float] = None,
28+
metamorph_after_sleep_millis: Optional[int] = None,
29+
persist_state_interval_millis: Optional[int] = None,
30+
persist_storage: Optional[bool] = None,
31+
proxy_hostname: Optional[str] = None,
32+
proxy_password: Optional[str] = None,
33+
proxy_port: Optional[int] = None,
34+
proxy_status_url: Optional[str] = None,
35+
purge_on_start: Optional[bool] = None,
36+
token: Optional[str] = None,
37+
system_info_interval_millis: Optional[int] = None,
38+
) -> None:
39+
"""Create a `Configuration` instance.
40+
41+
All the parameters are loaded by default from environment variables when running on the Apify platform.
42+
You can override them here in the Configuration constructor, which might be useful for local testing of your actors.
43+
44+
Args:
45+
api_base_url (str, optional): The URL of the Apify API.
46+
This is the URL actually used for connecting to the API, so it can contain an IP address when running in a container on the platform.
47+
api_public_base_url (str, optional): The public URL of the Apify API.
48+
This will always contain the public URL of the API, even when running in a container on the platform.
49+
Useful for generating shareable URLs to key-value store records or datasets.
50+
container_port (int, optional): The port on which the container can listen for HTTP requests.
51+
container_url (str, optional): The URL on which the container can listen for HTTP requests.
52+
default_dataset_id (str, optional): The ID of the default dataset for the actor.
53+
default_key_value_store_id (str, optional): The ID of the default key-value store for the actor.
54+
default_request_queue_id (str, optional): The ID of the default request queue for the actor.
55+
input_key (str, optional): The key of the input record in the actor's default key-value store
56+
max_used_cpu_ratio (float, optional): The CPU usage above which the SYSTEM_INFO event will report the CPU is overloaded.
57+
metamorph_after_sleep_millis (int, optional): How long should the actor sleep after calling metamorph.
58+
persist_state_interval_millis (int, optional): How often should the actor emit the PERSIST_STATE event.
59+
persist_storage (bool, optional): Whether the actor should persist its used storages to the filesystem when running locally.
60+
proxy_hostname (str, optional): The hostname of Apify Proxy.
61+
proxy_password (str, optional): The password for Apify Proxy.
62+
proxy_port (str, optional): The port of Apify Proxy.
63+
proxy_status_url (str, optional): The URL on which the Apify Proxy status page is available.
64+
purge_on_start (str, optional): Whether the actor should purge its default storages on startup, when running locally.
65+
token (str, optional): The API token for the Apify API this actor should use.
66+
system_info_interval_millis (str, optional): How often should the actor emit the SYSTEM_INFO event when running locally.
67+
"""
1868
self.actor_build_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_BUILD_ID)
1969
self.actor_build_number = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_BUILD_NUMBER)
2070
self.actor_events_ws_url = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_EVENTS_WS_URL)
2171
self.actor_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_ID)
2272
self.actor_run_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_RUN_ID)
2373
self.actor_task_id = _fetch_and_parse_env_var(ApifyEnvVars.ACTOR_TASK_ID)
24-
self.api_base_url = _fetch_and_parse_env_var(ApifyEnvVars.API_BASE_URL, 'https://api.apify.com')
25-
self.api_public_base_url = _fetch_and_parse_env_var(ApifyEnvVars.API_PUBLIC_BASE_URL, 'https://api.apify.com')
74+
self.api_base_url = api_base_url or _fetch_and_parse_env_var(ApifyEnvVars.API_BASE_URL, 'https://api.apify.com')
75+
self.api_public_base_url = api_public_base_url or _fetch_and_parse_env_var(ApifyEnvVars.API_PUBLIC_BASE_URL, 'https://api.apify.com')
2676
self.chrome_executable_path = _fetch_and_parse_env_var(ApifyEnvVars.CHROME_EXECUTABLE_PATH)
27-
self.container_port = _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_PORT, 4321)
28-
self.container_url = _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_URL, 'http://localhost:4321')
77+
self.container_port = container_port or _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_PORT, 4321)
78+
self.container_url = container_url or _fetch_and_parse_env_var(ApifyEnvVars.CONTAINER_URL, 'http://localhost:4321')
2979
self.dedicated_cpus = _fetch_and_parse_env_var(ApifyEnvVars.DEDICATED_CPUS)
3080
self.default_browser_path = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_BROWSER_PATH)
31-
self.default_dataset_id = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_DATASET_ID, 'default')
32-
self.default_key_value_store_id = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_KEY_VALUE_STORE_ID, 'default')
33-
self.default_request_queue_id = _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_REQUEST_QUEUE_ID, 'default')
81+
self.default_dataset_id = default_dataset_id or _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_DATASET_ID, 'default')
82+
self.default_key_value_store_id = default_key_value_store_id or _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_KEY_VALUE_STORE_ID, 'default')
83+
self.default_request_queue_id = default_request_queue_id or _fetch_and_parse_env_var(ApifyEnvVars.DEFAULT_REQUEST_QUEUE_ID, 'default')
3484
self.disable_browser_sandbox = _fetch_and_parse_env_var(ApifyEnvVars.DISABLE_BROWSER_SANDBOX, False)
3585
self.headless = _fetch_and_parse_env_var(ApifyEnvVars.HEADLESS, True)
36-
self.input_key = _fetch_and_parse_env_var(ApifyEnvVars.INPUT_KEY, 'INPUT')
86+
self.input_key = input_key or _fetch_and_parse_env_var(ApifyEnvVars.INPUT_KEY, 'INPUT')
3787
self.input_secrets_private_key_file = _fetch_and_parse_env_var(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_FILE)
3888
self.input_secrets_private_key_passphrase = _fetch_and_parse_env_var(ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE)
3989
self.is_at_home = _fetch_and_parse_env_var(ApifyEnvVars.IS_AT_HOME, False)
90+
self.max_used_cpu_ratio = max_used_cpu_ratio or _fetch_and_parse_env_var(ApifyEnvVars.MAX_USED_CPU_RATIO, 0.95)
4091
self.memory_mbytes = _fetch_and_parse_env_var(ApifyEnvVars.MEMORY_MBYTES)
4192
self.meta_origin = _fetch_and_parse_env_var(ApifyEnvVars.META_ORIGIN)
42-
self.metamorph_after_sleep_millis = _fetch_and_parse_env_var(ApifyEnvVars.METAMORPH_AFTER_SLEEP_MILLIS, 300000)
43-
self.persist_state_interval_millis = _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, 60000)
44-
self.persist_storage = _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STORAGE)
45-
self.proxy_hostname = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_HOSTNAME, 'proxy.apify.com')
46-
self.proxy_password = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PASSWORD)
47-
self.proxy_port = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PORT, 8000)
48-
self.proxy_status_url = _fetch_and_parse_env_var(ApifyEnvVars.PROXY_STATUS_URL, 'http://proxy.apify.com')
49-
self.purge_on_start = _fetch_and_parse_env_var(ApifyEnvVars.PURGE_ON_START, True)
93+
self.metamorph_after_sleep_millis = metamorph_after_sleep_millis or _fetch_and_parse_env_var(ApifyEnvVars.METAMORPH_AFTER_SLEEP_MILLIS, 300000) # noqa: E501
94+
self.persist_state_interval_millis = persist_state_interval_millis or _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, 60000) # noqa: E501
95+
self.persist_storage = persist_storage or _fetch_and_parse_env_var(ApifyEnvVars.PERSIST_STORAGE)
96+
self.proxy_hostname = proxy_hostname or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_HOSTNAME, 'proxy.apify.com')
97+
self.proxy_password = proxy_password or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PASSWORD)
98+
self.proxy_port = proxy_port or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_PORT, 8000)
99+
self.proxy_status_url = proxy_status_url or _fetch_and_parse_env_var(ApifyEnvVars.PROXY_STATUS_URL, 'http://proxy.apify.com')
100+
self.purge_on_start = purge_on_start or _fetch_and_parse_env_var(ApifyEnvVars.PURGE_ON_START, True)
50101
self.started_at = _fetch_and_parse_env_var(ApifyEnvVars.STARTED_AT)
51102
self.timeout_at = _fetch_and_parse_env_var(ApifyEnvVars.TIMEOUT_AT)
52-
self.token = _fetch_and_parse_env_var(ApifyEnvVars.TOKEN)
103+
self.token = token or _fetch_and_parse_env_var(ApifyEnvVars.TOKEN)
53104
self.user_id = _fetch_and_parse_env_var(ApifyEnvVars.USER_ID)
54105
self.xvfb = _fetch_and_parse_env_var(ApifyEnvVars.XVFB, False)
55-
self.system_info_interval_millis = _fetch_and_parse_env_var(ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, 60000)
56-
57-
self.max_used_cpu_ratio = 0.95
106+
self.system_info_interval_millis = system_info_interval_millis or _fetch_and_parse_env_var(ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, 60000)
58107

59108
@classmethod
60109
def _get_default_instance(cls) -> 'Configuration':

src/apify/consts.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class ApifyEnvVars(str, Enum):
4848
LOCAL_STORAGE_DIR = 'APIFY_LOCAL_STORAGE_DIR'
4949
LOG_FORMAT = 'APIFY_LOG_FORMAT'
5050
LOG_LEVEL = 'APIFY_LOG_LEVEL'
51+
MAX_USED_CPU_RATIO = 'APIFY_MAX_USED_CPU_RATIO'
5152
MEMORY_MBYTES = 'APIFY_MEMORY_MBYTES'
5253
META_ORIGIN = 'APIFY_META_ORIGIN'
5354
PERSIST_STORAGE = 'APIFY_PERSIST_STORAGE'
@@ -83,6 +84,12 @@ class ApifyEnvVars(str, Enum):
8384

8485
INTEGER_ENV_VARS: List[_INTEGER_ENV_VARS_TYPE] = list(get_args(_INTEGER_ENV_VARS_TYPE))
8586

87+
_FLOAT_ENV_VARS_TYPE = Literal[
88+
ApifyEnvVars.MAX_USED_CPU_RATIO,
89+
]
90+
91+
FLOAT_ENV_VARS: List[_FLOAT_ENV_VARS_TYPE] = list(get_args(_FLOAT_ENV_VARS_TYPE))
92+
8693
_BOOL_ENV_VARS_TYPE = Literal[
8794
ApifyEnvVars.DISABLE_BROWSER_SANDBOX,
8895
ApifyEnvVars.DISABLE_OUTDATED_WARNING,

tests/unit/actor/test_actor_env_helpers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
from apify import Actor
9-
from apify.consts import BOOL_ENV_VARS, DATETIME_ENV_VARS, INTEGER_ENV_VARS, STRING_ENV_VARS, ApifyEnvVars
9+
from apify.consts import BOOL_ENV_VARS, DATETIME_ENV_VARS, FLOAT_ENV_VARS, INTEGER_ENV_VARS, STRING_ENV_VARS, ApifyEnvVars
1010

1111

1212
class TestIsAtHome:
@@ -31,6 +31,11 @@ async def test_get_env_use_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> No
3131
expected_get_env[int_get_env_var] = random.randint(1, 99999)
3232
monkeypatch.setenv(int_env_var, f'{expected_get_env[int_get_env_var]}')
3333

34+
for float_env_var in FLOAT_ENV_VARS:
35+
float_get_env_var = float_env_var.name.lower()
36+
expected_get_env[float_get_env_var] = random.random()
37+
monkeypatch.setenv(float_env_var, f'{expected_get_env[float_get_env_var]}')
38+
3439
for bool_env_var in BOOL_ENV_VARS:
3540
bool_get_env_var = bool_env_var.name.lower()
3641
expected_get_env[bool_get_env_var] = random.choice([True, False])

0 commit comments

Comments
 (0)