Skip to content

Commit 046b370

Browse files
GitHKAndrei Neagu
andauthored
🚑️ tuning down scheduler for dynamic sidecars (ITISFoundation#3025)
* adding log level overwrite * swapped out default log levels to warning for dysdcr * strip duplicate network entries * stripping extra errors * fix log level error reporting * making events trigger less frequently * fixed failing test * lowering log level * literal * replacing literal with validator Co-authored-by: Andrei Neagu <[email protected]>
1 parent 66ea86c commit 046b370

File tree

9 files changed

+28
-11
lines changed

9 files changed

+28
-11
lines changed

.env-devel

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,14 @@ CATALOG_DEV_FEATURES_ENABLED=0
1919
DASK_SCHEDULER_HOST=dask-scheduler
2020
DASK_SCHEDULER_PORT=8786
2121

22-
23-
DYNAMIC_SIDECAR_IMAGE=${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest}
24-
2522
DIRECTOR_REGISTRY_CACHING_TTL=900
2623
DIRECTOR_REGISTRY_CACHING=True
2724

2825
COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_URL=tcp://dask-scheduler:8786
2926
DIRECTOR_V2_DEV_FEATURES_ENABLED=0
3027

3128
DYNAMIC_SIDECAR_IMAGE=${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest}
29+
DYNAMIC_SIDECAR_LOG_LEVEL=DEBUG
3230

3331
FUNCTION_SERVICES_AUTHORS='{"UN": {"name": "Unknown", "email": "[email protected]", "affiliation": "unknown"}}'
3432

services/director-v2/src/simcore_service_director_v2/core/settings.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ class DynamicSidecarProxySettings(BaseCustomSettings):
148148

149149

150150
class DynamicSidecarSettings(BaseCustomSettings):
151+
DYNAMIC_SIDECAR_LOG_LEVEL: str = Field(
152+
"WARNING", description="log level of the dynamic sidecar"
153+
)
151154
SC_BOOT_MODE: BootModeEnum = Field(
152155
BootModeEnum.PRODUCTION,
153156
description="Used to compute where or not should start sidecar in development mode",
@@ -274,6 +277,14 @@ class DynamicSidecarSettings(BaseCustomSettings):
274277
def strip_leading_slashes(cls, v) -> str:
275278
return v.lstrip("/")
276279

280+
@validator("DYNAMIC_SIDECAR_LOG_LEVEL")
281+
@classmethod
282+
def validate_log_level(cls, v) -> str:
283+
valid_log_levels = {"DEBUG", "INFO", "WARNING", "ERROR"}
284+
if v not in valid_log_levels:
285+
raise ValueError(f"Log level must be one of {valid_log_levels} not {v}")
286+
return v
287+
277288

278289
class DynamicServicesSchedulerSettings(BaseCustomSettings):
279290
DIRECTOR_V2_DYNAMIC_SCHEDULER_ENABLED: bool = True

services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def update_ok_status(self, info: str) -> None:
6161
self._update(DynamicSidecarStatus.OK, info)
6262

6363
def update_failing_status(self, info: str) -> None:
64-
logger.error(info)
6564
self._update(DynamicSidecarStatus.FAILING, info)
6665

6766
def __eq__(self, other: "Status") -> bool:

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_compose_specs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def _inject_proxy_network_configuration(
3636
target_container_spec = service_spec["services"][target_container]
3737
container_networks = target_container_spec.get("networks", [])
3838
container_networks.append(dynamic_sidecar_network_name)
39-
target_container_spec["networks"] = container_networks
39+
# avoid duplicate entries, this is important when the dynamic-sidecar
40+
# fails to run docker-compose up, otherwise it will
41+
# continue adding lots of entries to this list
42+
target_container_spec["networks"] = list(set(container_networks))
4043

4144

4245
class _environment_section:

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def _get_environment_variables(
3434
state_exclude = scheduler_data.paths_mapping.state_exclude
3535

3636
return {
37+
"LOG_LEVEL": app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_LOG_LEVEL,
3738
"SIMCORE_HOST_NAME": scheduler_data.service_name,
3839
"DYNAMIC_SIDECAR_COMPOSE_NAMESPACE": compose_namespace,
3940
"DY_SIDECAR_PATH_INPUTS": f"{scheduler_data.paths_mapping.inputs_path}",

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/task.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ async def _apply_observation_cycle(
7070
dynamic_sidecar_settings=dynamic_services_settings.DYNAMIC_SIDECAR,
7171
)
7272
):
73+
# NOTE: once marked for removal the observation cycle needs
74+
# to continue in order for the service to be removed
7375
logger.warning(
7476
"Removing service %s from observation", scheduler_data.service_name
7577
)
@@ -168,7 +170,6 @@ async def mark_service_for_removal(
168170
)
169171
await update_scheduler_data_label(current.scheduler_data)
170172

171-
self._enqueue_observation_from_service_name(service_name)
172173
logger.debug("Service '%s' marked for removal from scheduler", service_name)
173174

174175
async def finish_service_removal(self, node_uuid: NodeID) -> None:
@@ -399,7 +400,7 @@ async def observing_single_service(service_name: str) -> None:
399400
# fire and forget about the task
400401
asyncio.create_task(
401402
observing_single_service(service_name),
402-
name=f"observe {service_name}",
403+
name=f"observe_{service_name}",
403404
)
404405

405406
logger.info("Scheduler 'trigger observation queue task' was shut down")
@@ -417,13 +418,15 @@ async def _run_scheduler_task(self) -> None:
417418
async with self._lock:
418419
for service_name in self._to_observe:
419420
self._enqueue_observation_from_service_name(service_name)
420-
421-
await sleep(settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS)
422421
except asyncio.CancelledError: # pragma: no cover
423422
logger.info("Stopped dynamic scheduler")
424423
raise
425424
except Exception: # pylint: disable=broad-except
426-
logger.error("Unexpected error in dynamic scheduler", exc_info=True)
425+
logger.exception(
426+
"Unexpected error while scheduling sidecars observation"
427+
)
428+
429+
await sleep(settings.DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS)
427430

428431
async def _discover_running_services(self) -> None:
429432
"""discover all services which were started before and add them to the scheduler"""

services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ def mock_env(
319319

320320
monkeypatch.setenv("SC_BOOT_MODE", "production")
321321
monkeypatch.setenv("DYNAMIC_SIDECAR_EXPOSE_PORT", "true")
322+
monkeypatch.setenv("DYNAMIC_SIDECAR_LOG_LEVEL", "DEBUG")
322323
monkeypatch.setenv("PROXY_EXPOSE_PORT", "true")
323324
monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", network_name)
324325
monkeypatch.delenv("DYNAMIC_SIDECAR_MOUNT_PATH_DEV", raising=False)

services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
}
3232

3333
EXPECTED_DYNAMIC_SIDECAR_ENV_VAR_NAMES = {
34+
"LOG_LEVEL",
3435
"DY_SIDECAR_NODE_ID",
3536
"DY_SIDECAR_PATH_INPUTS",
3637
"DY_SIDECAR_PATH_OUTPUTS",

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class DynamicSidecarSettings(BaseCustomSettings):
2222
)
2323

2424
# LOGGING
25-
LOG_LEVEL: str = Field("DEBUG")
25+
LOG_LEVEL: str = Field("WARNING")
2626

2727
@validator("LOG_LEVEL")
2828
@classmethod

0 commit comments

Comments
 (0)