Skip to content

Commit 4319179

Browse files
GitHKAndrei Neagu
andauthored
🐛 persist dynamic-sidecar state to docker service labels (ITISFoundation#2961)
* serializing path * initial commit * task updates * fixed tests * fixed boot * pytest * remove reraise * added new settings * no failiure if service was not found * bumped timeout * added test to ensure compare works * update tests and properties * updated docstring * update comment * fix notes * moved test to correct place * simplified call * refactor test * more test refactoring * using parse_raw * add extra enfrocement * fix pylint * removed json encoders * fixed comments and styles * renamed label * removed notes * fix comment Co-authored-by: Andrei Neagu <[email protected]>
1 parent 1a32f2d commit 4319179

File tree

18 files changed

+451
-175
lines changed

18 files changed

+451
-175
lines changed

packages/models-library/tests/test_service_settings_labels.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint:disable=unused-argument
33
# pylint:disable=redefined-outer-name
44

5+
import json
56
from collections import namedtuple
67
from copy import deepcopy
78
from pprint import pformat
@@ -123,6 +124,13 @@ def test_path_mappings_none_state_paths() -> None:
123124
PathMappingsLabel(**sample_data)
124125

125126

127+
def test_path_mappings_json_encoding() -> None:
128+
example = PathMappingsLabel.Config.schema_extra["example"]
129+
path_mappings = PathMappingsLabel.parse_obj(example)
130+
print(path_mappings)
131+
assert json.loads(path_mappings.json()) == example
132+
133+
126134
def test_simcore_services_labels_compose_spec_null_container_http_entry_provided() -> None:
127135
sample_data = deepcopy(SimcoreServiceLabels.Config.schema_extra["examples"][2])
128136
assert sample_data["simcore.service.container-http-entrypoint"]

services/director-v2/src/simcore_service_director_v2/core/settings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,12 @@ class DynamicSidecarSettings(BaseCustomSettings):
221221
"timeout for attaching/detaching project networks to/from a container"
222222
),
223223
)
224+
DYNAMIC_SIDECAR_VOLUMES_REMOVAL_TIMEOUT_S: PositiveFloat = Field(
225+
1.0 * MINS,
226+
description=(
227+
"time to wait before giving up on removing dynamic-sidecar's volumes"
228+
),
229+
)
224230

225231
TRAEFIK_SIMCORE_ZONE: str = Field(
226232
...,

services/director-v2/src/simcore_service_director_v2/models/schemas/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
DYNAMIC_SIDECAR_SERVICE_PREFIX = "dy-sidecar"
44
DYNAMIC_PROXY_SERVICE_PREFIX = "dy-proxy"
55

6+
# label storing scheduler_data to allow service
7+
# monitoring recovery after director-v2 reboots
8+
DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL = "io.simcore.scheduler-data"
9+
610
# This matches registries by:
711
# - local
812
# - itisfoundation

services/director-v2/src/simcore_service_director_v2/models/schemas/dynamic_services/scheduler.py

Lines changed: 9 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,21 @@
1-
import datetime
21
import json
32
import logging
43
from asyncio import Lock
54
from enum import Enum
6-
from typing import Any, Dict, List, Optional
5+
from typing import Any, Dict, List, Mapping, Optional
76
from uuid import UUID
87

9-
from models_library.projects import ProjectID
108
from models_library.projects_nodes_io import NodeID
119
from models_library.service_settings_labels import (
1210
DynamicSidecarServiceLabels,
1311
PathMappingsLabel,
14-
RestartPolicy,
1512
SimcoreServiceLabels,
1613
)
1714
from pydantic import BaseModel, Extra, Field, PositiveInt, PrivateAttr, constr
1815

1916
from ..constants import (
2017
DYNAMIC_PROXY_SERVICE_PREFIX,
18+
DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL,
2119
DYNAMIC_SIDECAR_SERVICE_PREFIX,
2220
REGEX_DY_SERVICE_PROXY,
2321
REGEX_DY_SERVICE_SIDECAR,
@@ -28,9 +26,9 @@
2826

2927
MAX_ALLOWED_SERVICE_NAME_LENGTH: int = 63
3028

31-
SHA256 = constr(max_length=64, regex=r"\b[A-Fa-f0-9]{64}\b")
32-
ServiceId = SHA256
33-
NetworkId = SHA256
29+
DockerId = constr(max_length=25, regex=r"[A-Za-z0-9]{25}")
30+
ServiceId = DockerId
31+
NetworkId = DockerId
3432

3533
logger = logging.getLogger()
3634

@@ -94,11 +92,6 @@ class DockerContainerInspect(BaseModel):
9492
name: str = Field(..., description="docker name of the container")
9593
id: str = Field(..., description="docker id of the container")
9694

97-
last_updated: datetime.datetime = Field(
98-
default_factory=datetime.datetime.utcnow,
99-
description="time of the update in UTC",
100-
)
101-
10295
@classmethod
10396
def from_container(cls, container: Dict[str, Any]) -> "DockerContainerInspect":
10497
return cls(
@@ -246,62 +239,6 @@ def are_containers_ready(self) -> bool:
246239
)
247240

248241

249-
class ServiceLabelsStoredData(CommonServiceDetails, DynamicSidecarServiceLabels):
250-
service_name: str
251-
paths_mapping: PathMappingsLabel # overwrites in DynamicSidecarServiceLabels
252-
dynamic_sidecar_network_name: str
253-
simcore_traefik_zone: str
254-
service_port: int
255-
256-
@classmethod
257-
def from_service(cls, service: Dict[str, Any]) -> "ServiceLabelsStoredData":
258-
labels = service["Spec"]["Labels"]
259-
params = dict(
260-
service_name=service["Spec"]["Name"],
261-
node_uuid=NodeID(labels["uuid"]),
262-
key=labels["service_key"],
263-
version=labels["service_tag"],
264-
paths_mapping=PathMappingsLabel.parse_raw(labels["paths_mapping"]),
265-
dynamic_sidecar_network_name=labels["traefik.docker.network"],
266-
simcore_traefik_zone=labels["io.simcore.zone"],
267-
service_port=labels["service_port"],
268-
project_id=ProjectID(labels["study_id"]),
269-
user_id=int(labels["user_id"]),
270-
)
271-
if "compose_spec" in labels:
272-
params["compose_spec"] = labels["compose_spec"]
273-
if "container_http_entry" in labels:
274-
params["container_http_entry"] = labels["container_http_entry"]
275-
if "restart_policy" in labels:
276-
params["restart_policy"] = labels["restart_policy"]
277-
return cls(**params)
278-
279-
class Config:
280-
extra = Extra.allow
281-
allow_population_by_field_name = True
282-
schema_extra = {
283-
"example": {
284-
"service_name": "some service",
285-
"node_uuid": UUID("75c7f3f4-18f9-4678-8610-54a2ade78eaa"),
286-
"key": "simcore/services/dynamic/3dviewer",
287-
"version": "2.4.5",
288-
"paths_mapping": PathMappingsLabel.parse_obj(
289-
PathMappingsLabel.Config.schema_extra["example"]
290-
),
291-
"compose_spec": SimcoreServiceLabels.Config.schema_extra["examples"][2][
292-
"simcore.service.compose-spec"
293-
],
294-
"container_http_entry": "some-entrypoint",
295-
"dynamic_sidecar_network_name": "some_network_name",
296-
"simcore_traefik_zone": "main",
297-
"service_port": 300,
298-
"restart_policy": RestartPolicy.NO_RESTART.value,
299-
"project_id": UUID("dd1d04d9-d704-4f7e-8f0f-1ca60cc771fe"),
300-
"user_id": 234,
301-
}
302-
}
303-
304-
305242
class DynamicSidecarNames(BaseModel):
306243
"""
307244
Service naming schema:
@@ -432,38 +369,11 @@ def from_http_request(
432369
return cls.parse_obj(obj_dict)
433370

434371
@classmethod
435-
def from_service_labels_stored_data(
436-
cls,
437-
service_labels_stored_data: ServiceLabelsStoredData,
438-
port: Optional[int],
439-
request_dns: str = None,
440-
request_scheme: str = None,
441-
proxy_service_name: str = None,
372+
def from_service_inspect(
373+
cls, service_inspect: Mapping[str, Any]
442374
) -> "SchedulerData":
443-
return cls.parse_obj(
444-
dict(
445-
service_name=service_labels_stored_data.service_name,
446-
node_uuid=service_labels_stored_data.node_uuid,
447-
project_id=service_labels_stored_data.project_id,
448-
user_id=service_labels_stored_data.user_id,
449-
key=service_labels_stored_data.key,
450-
version=service_labels_stored_data.version,
451-
paths_mapping=service_labels_stored_data.paths_mapping,
452-
compose_spec=json.dumps(service_labels_stored_data.compose_spec),
453-
container_http_entry=service_labels_stored_data.container_http_entry,
454-
restart_policy=service_labels_stored_data.restart_policy,
455-
dynamic_sidecar_network_name=service_labels_stored_data.dynamic_sidecar_network_name,
456-
simcore_traefik_zone=service_labels_stored_data.simcore_traefik_zone,
457-
service_port=service_labels_stored_data.service_port,
458-
request_dns=request_dns,
459-
request_scheme=request_scheme,
460-
proxy_service_name=proxy_service_name,
461-
dynamic_sidecar=dict(
462-
hostname=service_labels_stored_data.service_name,
463-
port=port,
464-
),
465-
)
466-
)
375+
labels = service_inspect["Spec"]["Labels"]
376+
return cls.parse_raw(labels[DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL])
467377

468378
class Config:
469379
extra = Extra.allow

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,28 @@
22

33

44
import asyncio
5+
import json
56
import logging
67
import time
78
import traceback
89
from contextlib import asynccontextmanager
9-
from typing import Any, AsyncIterator, Deque, Dict, List, Optional, Set, Tuple
10+
from copy import deepcopy
11+
from typing import Any, AsyncIterator, Dict, List, Mapping, Optional, Set, Tuple
1012

1113
import aiodocker
12-
from aiodocker.utils import clean_filters
14+
from aiodocker.utils import clean_filters, clean_map
1315
from models_library.projects import ProjectID
1416
from models_library.projects_nodes_io import NodeID
1517
from models_library.users import UserID
1618
from packaging import version
1719
from servicelib.utils import logged_gather
1820

1921
from ...core.settings import DynamicSidecarSettings
20-
from ...models.schemas.constants import DYNAMIC_SIDECAR_SERVICE_PREFIX
21-
from ...models.schemas.dynamic_services import (
22-
ServiceLabelsStoredData,
23-
ServiceState,
24-
ServiceType,
22+
from ...models.schemas.constants import (
23+
DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL,
24+
DYNAMIC_SIDECAR_SERVICE_PREFIX,
2525
)
26+
from ...models.schemas.dynamic_services import SchedulerData, ServiceState, ServiceType
2627
from .docker_states import TASK_STATES_RUNNING, extract_task_state
2728
from .errors import DynamicSidecarError, GenericDockerError
2829

@@ -161,24 +162,22 @@ async def inspect_service(service_id: str) -> Dict[str, Any]:
161162

162163
async def get_dynamic_sidecars_to_observe(
163164
dynamic_sidecar_settings: DynamicSidecarSettings,
164-
) -> Deque[ServiceLabelsStoredData]:
165+
) -> List[SchedulerData]:
165166
"""called when scheduler is started to discover new services to observe"""
166167
async with docker_client() as client:
167-
running_dynamic_sidecar_services = await client.services.list(
168+
running_dynamic_sidecar_services: List[
169+
Mapping[str, Any]
170+
] = await client.services.list(
168171
filters={
169172
"label": [
170173
f"swarm_stack_name={dynamic_sidecar_settings.SWARM_STACK_NAME}"
171174
],
172175
"name": [f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}"],
173176
}
174177
)
175-
176-
dynamic_sidecar_services: Deque[ServiceLabelsStoredData] = Deque()
177-
178-
for service in running_dynamic_sidecar_services:
179-
dynamic_sidecar_services.append(ServiceLabelsStoredData.from_service(service))
180-
181-
return dynamic_sidecar_services
178+
return [
179+
SchedulerData.from_service_inspect(x) for x in running_dynamic_sidecar_services
180+
]
182181

183182

184183
async def _extract_task_data_from_service_for_state(
@@ -374,10 +373,17 @@ async def remove_dynamic_sidecar_network(network_name: str) -> bool:
374373
return False
375374

376375

377-
async def remove_dynamic_sidecar_volumes(node_uuid: NodeID) -> Set[str]:
376+
async def remove_dynamic_sidecar_volumes(
377+
node_uuid: NodeID, dynamic_sidecar_settings: DynamicSidecarSettings
378+
) -> Set[str]:
378379
async with docker_client() as client:
379380
volumes_response = await client.volumes.list(
380-
filters={"label": f"uuid={node_uuid}"}
381+
filters={
382+
"label": [
383+
f"swarm_stack_name={dynamic_sidecar_settings.SWARM_STACK_NAME}",
384+
f"uuid={node_uuid}",
385+
]
386+
}
381387
)
382388
volumes = volumes_response["Volumes"]
383389
log.debug("Removing volumes: %s", [v["Name"] for v in volumes])
@@ -506,3 +512,43 @@ async def try_to_remove_network(network_name: str) -> None:
506512
return await network.delete()
507513
except aiodocker.exceptions.DockerError:
508514
log.warning("Could not remove network %s", network_name)
515+
516+
517+
async def update_scheduler_data_label(scheduler_data: SchedulerData) -> None:
518+
async with docker_client() as client:
519+
# NOTE: builtin `DockerServices.update` function is very limited.
520+
# Using the same pattern but updating labels
521+
522+
try:
523+
# fetch information from service name
524+
service_inspect = await client.services.inspect(scheduler_data.service_name)
525+
service_version = service_inspect["Version"]["Index"]
526+
service_id = service_inspect["ID"]
527+
spec = service_inspect["Spec"]
528+
529+
# compose_spec needs to be json encoded
530+
# before encoding it to json and storing it
531+
# in the label
532+
scheduler_data_copy = deepcopy(scheduler_data)
533+
scheduler_data_copy.compose_spec = json.dumps(
534+
scheduler_data_copy.compose_spec
535+
)
536+
label_data = scheduler_data_copy.json()
537+
spec["Labels"][DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL] = label_data
538+
539+
await client._query_json( # pylint: disable=protected-access
540+
f"services/{service_id}/update",
541+
method="POST",
542+
data=json.dumps(clean_map(spec)),
543+
params={"version": service_version},
544+
)
545+
except aiodocker.exceptions.DockerError as e:
546+
if not (
547+
e.status == 404
548+
and e.message == f"service {scheduler_data.service_name} not found"
549+
):
550+
raise e
551+
log.debug(
552+
"Skip update for service '%s' which could not be found",
553+
scheduler_data.service_name,
554+
)

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from servicelib.json_serialization import json_dumps
66

77
from ....core.settings import AppSettings, DynamicSidecarSettings
8+
from ....models.schemas.constants import DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL
89
from ....models.schemas.dynamic_services import SchedulerData, ServiceType
910
from .._namepsace import get_compose_namespace
1011
from ..volumes_resolver import DynamicSidecarVolumesPathsResolver
@@ -102,6 +103,7 @@ def get_dynamic_sidecar_spec(
102103
]:
103104
mounts.append(
104105
DynamicSidecarVolumesPathsResolver.mount_entry(
106+
swarm_stack_name=dynamic_sidecar_settings.SWARM_STACK_NAME,
105107
compose_namespace=compose_namespace,
106108
path=path_to_mount,
107109
node_uuid=scheduler_data.node_uuid,
@@ -113,6 +115,7 @@ def get_dynamic_sidecar_spec(
113115
if app_settings.DIRECTOR_V2_DEV_FEATURES_ENABLED:
114116
mounts.append(
115117
DynamicSidecarVolumesPathsResolver.mount_r_clone(
118+
swarm_stack_name=dynamic_sidecar_settings.SWARM_STACK_NAME,
116119
compose_namespace=compose_namespace,
117120
path=path_to_mount,
118121
project_id=scheduler_data.project_id,
@@ -123,6 +126,7 @@ def get_dynamic_sidecar_spec(
123126
else:
124127
mounts.append(
125128
DynamicSidecarVolumesPathsResolver.mount_entry(
129+
swarm_stack_name=dynamic_sidecar_settings.SWARM_STACK_NAME,
126130
compose_namespace=compose_namespace,
127131
path=path_to_mount,
128132
node_uuid=scheduler_data.node_uuid,
@@ -187,13 +191,8 @@ def get_dynamic_sidecar_spec(
187191
"user_id": f"{scheduler_data.user_id}",
188192
# the following are used for scheduling
189193
"uuid": f"{scheduler_data.node_uuid}", # also needed for removal when project is closed
190-
"swarm_stack_name": dynamic_sidecar_settings.SWARM_STACK_NAME,
191-
"service_key": scheduler_data.key,
192-
"service_tag": scheduler_data.version,
193-
"paths_mapping": scheduler_data.paths_mapping.json(),
194-
"compose_spec": json_dumps(scheduler_data.compose_spec),
195-
"container_http_entry": scheduler_data.container_http_entry,
196-
"restart_policy": scheduler_data.restart_policy,
194+
"swarm_stack_name": dynamic_sidecar_settings.SWARM_STACK_NAME, # required for listing services with uuid
195+
DYNAMIC_SIDECAR_SCHEDULER_DATA_LABEL: scheduler_data.json(),
197196
},
198197
"name": scheduler_data.service_name,
199198
"networks": [swarm_network_id, dynamic_sidecar_network_id],

0 commit comments

Comments
 (0)