Skip to content

Commit ce53390

Browse files
Merge branch 'master' into 6931-update-webserver-swagger
2 parents cd5a4e8 + 75aed81 commit ce53390

File tree

25 files changed

+411
-44
lines changed

25 files changed

+411
-44
lines changed

packages/models-library/src/models_library/api_schemas_directorv2/services.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,7 @@ class ServiceExtras(BaseModel):
103103

104104

105105
CHARS_IN_VOLUME_NAME_BEFORE_DIR_NAME: Final[NonNegativeInt] = 89
106+
107+
108+
DYNAMIC_SIDECAR_SERVICE_PREFIX: Final[str] = "dy-sidecar"
109+
DYNAMIC_PROXY_SERVICE_PREFIX: Final[str] = "dy-proxy"

packages/models-library/src/models_library/docker.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,15 @@ def from_key(cls, key: str) -> "DockerLabelKey":
3737
str, StringConstraints(pattern=DOCKER_GENERIC_TAG_KEY_RE)
3838
]
3939

40-
DockerPlacementConstraint: TypeAlias = Annotated[str, StringConstraints(strip_whitespace = True, pattern = re.compile(r"^(?!-)(?![.])(?!.*--)(?!.*[.][.])[a-zA-Z0-9.-]*(?<!-)(?<![.])(!=|==)[a-zA-Z0-9_. -]*$"))]
40+
DockerPlacementConstraint: TypeAlias = Annotated[
41+
str,
42+
StringConstraints(
43+
strip_whitespace=True,
44+
pattern=re.compile(
45+
r"^(?!-)(?![.])(?!.*--)(?!.*[.][.])[a-zA-Z0-9.-]*(?<!-)(?<![.])(!=|==)[a-zA-Z0-9_. -]*$"
46+
),
47+
),
48+
]
4149

4250
_SIMCORE_RUNTIME_DOCKER_LABEL_PREFIX: Final[str] = "io.simcore.runtime."
4351
_BACKWARDS_COMPATIBILITY_SIMCORE_RUNTIME_DOCKER_LABELS_MAP: Final[dict[str, str]] = {
@@ -218,3 +226,8 @@ def from_docker_task(cls, docker_task: Task) -> "StandardSimcoreDockerLabels":
218226
]
219227
},
220228
)
229+
230+
231+
DockerNodeID: TypeAlias = Annotated[
232+
str, StringConstraints(strip_whitespace=True, pattern=re.compile(r"[a-zA-Z0-9]"))
233+
]
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import logging
2+
from datetime import timedelta
3+
from typing import Final
4+
5+
from models_library.docker import DockerNodeID
6+
from models_library.projects_nodes_io import NodeID
7+
from models_library.rabbitmq_basic_types import RPCMethodName, RPCNamespace
8+
from pydantic import NonNegativeInt, TypeAdapter
9+
from servicelib.logging_utils import log_decorator
10+
from servicelib.rabbitmq import RabbitMQRPCClient
11+
12+
_logger = logging.getLogger(__name__)
13+
14+
_REQUEST_TIMEOUT: Final[NonNegativeInt] = int(timedelta(minutes=60).total_seconds())
15+
16+
17+
@log_decorator(_logger, level=logging.DEBUG)
18+
async def force_container_cleanup(
19+
rabbitmq_rpc_client: RabbitMQRPCClient,
20+
*,
21+
docker_node_id: DockerNodeID,
22+
swarm_stack_name: str,
23+
node_id: NodeID,
24+
) -> None:
25+
result = await rabbitmq_rpc_client.request(
26+
RPCNamespace.from_entries(
27+
{
28+
"service": "agent",
29+
"docker_node_id": docker_node_id,
30+
"swarm_stack_name": swarm_stack_name,
31+
}
32+
),
33+
TypeAdapter(RPCMethodName).validate_python("force_container_cleanup"),
34+
node_id=node_id,
35+
timeout_s=_REQUEST_TIMEOUT,
36+
)
37+
assert result is None # nosec

packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/agent/volumes.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import timedelta
33
from typing import Final
44

5+
from models_library.docker import DockerNodeID
56
from models_library.projects_nodes_io import NodeID
67
from models_library.rabbitmq_basic_types import RPCMethodName, RPCNamespace
78
from pydantic import NonNegativeInt, TypeAdapter
@@ -17,7 +18,7 @@
1718
async def remove_volumes_without_backup_for_service(
1819
rabbitmq_rpc_client: RabbitMQRPCClient,
1920
*,
20-
docker_node_id: str,
21+
docker_node_id: DockerNodeID,
2122
swarm_stack_name: str,
2223
node_id: NodeID,
2324
) -> None:
@@ -42,7 +43,7 @@ async def remove_volumes_without_backup_for_service(
4243
async def backup_and_remove_volumes_for_all_services(
4344
rabbitmq_rpc_client: RabbitMQRPCClient,
4445
*,
45-
docker_node_id: str,
46+
docker_node_id: DockerNodeID,
4647
swarm_stack_name: str,
4748
) -> None:
4849
result = await rabbitmq_rpc_client.request(
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import logging
2+
3+
from fastapi import FastAPI
4+
from models_library.projects_nodes_io import NodeID
5+
from servicelib.logging_utils import log_context
6+
from servicelib.rabbitmq import RPCRouter
7+
8+
from ...services.containers_manager import ContainersManager
9+
10+
_logger = logging.getLogger(__name__)
11+
12+
router = RPCRouter()
13+
14+
15+
@router.expose()
16+
async def force_container_cleanup(app: FastAPI, *, node_id: NodeID) -> None:
17+
with log_context(
18+
_logger, logging.INFO, f"removing all orphan container for {node_id=}"
19+
):
20+
await ContainersManager.get_from_app_state(app).force_container_cleanup(node_id)

services/agent/src/simcore_service_agent/api/rpc/_volumes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from servicelib.rabbitmq.rpc_interfaces.agent.errors import (
88
NoServiceVolumesFoundRPCError,
99
)
10-
from simcore_service_agent.services.volumes_manager import VolumesManager
10+
11+
from ...services.volumes_manager import VolumesManager
1112

1213
_logger = logging.getLogger(__name__)
1314

services/agent/src/simcore_service_agent/api/rpc/routes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
from simcore_service_agent.core.settings import ApplicationSettings
55

66
from ...services.rabbitmq import get_rabbitmq_rpc_server
7-
from . import _volumes
7+
from . import _containers, _volumes
88

99
ROUTERS: list[RPCRouter] = [
10+
_containers.router,
1011
_volumes.router,
1112
]
1213

services/agent/src/simcore_service_agent/core/application.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
)
1919
from ..api.rest.routes import setup_rest_api
2020
from ..api.rpc.routes import setup_rpc_api_routes
21+
from ..services.containers_manager import setup_containers_manager
2122
from ..services.instrumentation import setup_instrumentation
2223
from ..services.rabbitmq import setup_rabbitmq
2324
from ..services.volumes_manager import setup_volume_manager
@@ -28,8 +29,8 @@
2829

2930
def _setup_logger(settings: ApplicationSettings):
3031
# SEE https://github.com/ITISFoundation/osparc-simcore/issues/3148
31-
logging.basicConfig(level=settings.LOGLEVEL.value) # NOSONAR
32-
logging.root.setLevel(settings.LOGLEVEL.value)
32+
logging.basicConfig(level=settings.LOG_LEVEL.value) # NOSONAR
33+
logging.root.setLevel(settings.LOG_LEVEL.value)
3334
config_all_loggers(
3435
log_format_local_dev_enabled=settings.AGENT_VOLUMES_LOG_FORMAT_LOCAL_DEV_ENABLED,
3536
logger_filter_mapping=settings.AGENT_VOLUMES_LOG_FILTER_MAPPING,
@@ -58,6 +59,7 @@ def create_app() -> FastAPI:
5859

5960
setup_rabbitmq(app)
6061
setup_volume_manager(app)
62+
setup_containers_manager(app)
6163
setup_rest_api(app)
6264
setup_rpc_api_routes(app)
6365

services/agent/src/simcore_service_agent/core/settings.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import timedelta
22

33
from models_library.basic_types import BootModeEnum, LogLevel
4+
from models_library.docker import DockerNodeID
45
from pydantic import AliasChoices, AnyHttpUrl, Field, field_validator
56
from servicelib.logging_utils_filtering import LoggerName, MessageSubstring
67
from settings_library.base import BaseCustomSettings
@@ -11,7 +12,7 @@
1112

1213

1314
class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
14-
LOGLEVEL: LogLevel = Field(
15+
LOG_LEVEL: LogLevel = Field(
1516
LogLevel.WARNING,
1617
validation_alias=AliasChoices(
1718
"AGENT_LOGLEVEL",
@@ -79,7 +80,9 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
7980

8081
AGENT_PROMETHEUS_INSTRUMENTATION_ENABLED: bool = True
8182

82-
AGENT_DOCKER_NODE_ID: str = Field(..., description="used by the rabbitmq module")
83+
AGENT_DOCKER_NODE_ID: DockerNodeID = Field(
84+
..., description="used by the rabbitmq module"
85+
)
8386

8487
AGENT_RABBITMQ: RabbitSettings = Field(
8588
description="settings for service/rabbitmq",
@@ -91,7 +94,7 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
9194
json_schema_extra={"auto_default_from_env": True},
9295
)
9396

94-
@field_validator("LOGLEVEL")
97+
@field_validator("LOG_LEVEL")
9598
@classmethod
9699
def valid_log_level(cls, value) -> LogLevel:
97100
return LogLevel(cls.validate_log_level(value))
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import logging
2+
from dataclasses import dataclass, field
3+
4+
from aiodocker import Docker
5+
from fastapi import FastAPI
6+
from models_library.api_schemas_directorv2.services import (
7+
DYNAMIC_PROXY_SERVICE_PREFIX,
8+
DYNAMIC_SIDECAR_SERVICE_PREFIX,
9+
)
10+
from models_library.projects_nodes_io import NodeID
11+
from servicelib.fastapi.app_state import SingletonInAppStateMixin
12+
13+
from .docker_utils import get_containers_with_prefixes, remove_container_forcefully
14+
15+
_logger = logging.getLogger(__name__)
16+
17+
18+
@dataclass
19+
class ContainersManager(SingletonInAppStateMixin):
20+
app_state_name: str = "containers_manager"
21+
22+
docker: Docker = field(default_factory=Docker)
23+
24+
async def force_container_cleanup(self, node_id: NodeID) -> None:
25+
# compose all possible used container prefixes
26+
proxy_prefix = f"{DYNAMIC_PROXY_SERVICE_PREFIX}_{node_id}"
27+
dy_sidecar_prefix = f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}_{node_id}"
28+
user_service_prefix = f"{DYNAMIC_SIDECAR_SERVICE_PREFIX}-{node_id}"
29+
30+
orphan_containers = await get_containers_with_prefixes(
31+
self.docker, {proxy_prefix, dy_sidecar_prefix, user_service_prefix}
32+
)
33+
_logger.debug(
34+
"Detected orphan containers for node_id='%s': %s",
35+
node_id,
36+
orphan_containers,
37+
)
38+
39+
unexpected_orphans = {
40+
orphan
41+
for orphan in orphan_containers
42+
if orphan.startswith(user_service_prefix)
43+
}
44+
if unexpected_orphans:
45+
_logger.warning(
46+
"Unexpected orphans detected for node_id='%s': %s",
47+
node_id,
48+
unexpected_orphans,
49+
)
50+
51+
# avoids parallel requests to docker engine
52+
for container in orphan_containers:
53+
await remove_container_forcefully(self.docker, container)
54+
55+
async def shutdown(self) -> None:
56+
await self.docker.close()
57+
58+
59+
def get_containers_manager(app: FastAPI) -> ContainersManager:
60+
return ContainersManager.get_from_app_state(app)
61+
62+
63+
def setup_containers_manager(app: FastAPI) -> None:
64+
async def _on_startup() -> None:
65+
ContainersManager().set_to_app_state(app)
66+
67+
async def _on_shutdown() -> None:
68+
await ContainersManager.get_from_app_state(app).shutdown()
69+
70+
app.add_event_handler("startup", _on_startup)
71+
app.add_event_handler("shutdown", _on_shutdown)

0 commit comments

Comments
 (0)