Skip to content

Commit 766f81f

Browse files
GitHKAndrei Neagu
andauthored
🐛added retry when creating services (ITISFoundation#3297)
Co-authored-by: Andrei Neagu <[email protected]>
1 parent 636bbe9 commit 766f81f

File tree

4 files changed

+89
-11
lines changed

4 files changed

+89
-11
lines changed

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/docker_compose_utils.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,7 @@ async def docker_compose_create(
103103
compose_spec_yaml: str, settings: ApplicationSettings
104104
) -> CommandResult:
105105
"""
106-
(Re)creates, starts, and attaches to containers for a service
107-
108-
- does NOT build images
109-
- runs in DETACHED mode, i.e. runs containers in the background, prints new container names
106+
Creates containers required by the service.
110107
111108
[SEE docker-compose](https://docs.docker.com/engine/reference/commandline/compose_up/)
112109
"""

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/docker_utils.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import aiodocker
77
from aiodocker.utils import clean_filters
8+
from pydantic import PositiveInt
89

910
from .errors import UnexpectedDockerError, VolumeNotFoundError
1011

@@ -40,3 +41,15 @@ async def get_volume_by_label(label: str, run_id: UUID) -> dict[str, Any]:
4041
raise VolumeNotFoundError(label, run_id, volumes)
4142
volume_details = volumes[0]
4243
return volume_details # type: ignore
44+
45+
46+
async def get_running_containers_count_from_names(
47+
container_names: list[str],
48+
) -> PositiveInt:
49+
if len(container_names) == 0:
50+
return 0
51+
52+
async with docker_client() as docker:
53+
filters = clean_filters({"name": container_names})
54+
containers = await docker.containers.list(all=True, filters=filters)
55+
return len(containers)

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
docker_compose_start,
2222
)
2323
from ..core.docker_logs import start_log_fetching, stop_log_fetching
24+
from ..core.docker_utils import get_running_containers_count_from_names
2425
from ..core.rabbitmq import RabbitMQ, send_message
2526
from ..core.settings import ApplicationSettings
2627
from ..core.utils import CommandResult, assemble_container_names
27-
from ..core.validation import validate_compose_spec
28+
from ..core.validation import parse_compose_spec, validate_compose_spec
2829
from ..models.schemas.application_health import ApplicationHealth
2930
from ..models.schemas.containers import ContainersCreate
3031
from ..models.shared_store import SharedStore
@@ -43,7 +44,7 @@
4344

4445

4546
@retry(
46-
wait=wait_random_exponential(),
47+
wait=wait_random_exponential(max=30),
4748
stop=stop_after_delay(5 * _MINUTE),
4849
retry=retry_if_result(lambda result: result.success is False),
4950
reraise=False,
@@ -57,6 +58,28 @@ async def _retry_docker_compose_start(
5758
return await docker_compose_start(compose_spec, settings)
5859

5960

61+
@retry(
62+
wait=wait_random_exponential(max=30),
63+
stop=stop_after_delay(5 * _MINUTE),
64+
retry=retry_if_result(lambda result: result is False),
65+
reraise=True,
66+
)
67+
async def _retry_docker_compose_create(
68+
compose_spec: str, settings: ApplicationSettings
69+
) -> bool:
70+
await docker_compose_create(compose_spec, settings)
71+
72+
compose_spec_dict = parse_compose_spec(compose_spec)
73+
container_names = list(compose_spec_dict["services"].keys())
74+
75+
expected_num_containers = len(container_names)
76+
actual_num_containers = await get_running_containers_count_from_names(
77+
container_names
78+
)
79+
80+
return expected_num_containers == actual_num_containers
81+
82+
6083
async def task_create_service_containers(
6184
progress: TaskProgress,
6285
settings: ApplicationSettings,
@@ -90,7 +113,7 @@ async def task_create_service_containers(
90113
await docker_compose_pull(shared_store.compose_spec, settings)
91114

92115
progress.update(message="creating and starting containers", percent=0.90)
93-
await docker_compose_create(shared_store.compose_spec, settings)
116+
await _retry_docker_compose_create(shared_store.compose_spec, settings)
94117

95118
progress.update(message="ensure containers are started", percent=0.95)
96119
r = await _retry_docker_compose_start(shared_store.compose_spec, settings)

services/dynamic-sidecar/tests/unit/test_core_docker_utils.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
11
# pylint: disable=redefined-outer-name
22
# pylint: disable=unused-argument
33
# pylint: disable=unused-variable
4-
from typing import AsyncIterable
4+
from typing import AsyncIterable, AsyncIterator
55
from uuid import UUID
66

77
import aiodocker
88
import pytest
9+
from _pytest.fixtures import FixtureRequest
910
from faker import Faker
10-
from simcore_service_dynamic_sidecar.core.docker_utils import get_volume_by_label
11+
from pydantic import PositiveInt
12+
from simcore_service_dynamic_sidecar.core.docker_utils import (
13+
get_running_containers_count_from_names,
14+
get_volume_by_label,
15+
)
1116
from simcore_service_dynamic_sidecar.core.errors import VolumeNotFoundError
1217

18+
# FIXTURES
19+
1320

1421
@pytest.fixture(scope="session")
1522
def volume_name() -> str:
@@ -39,16 +46,54 @@ async def volume_with_label(volume_name: str, run_id: str) -> AsyncIterable[None
3946
await volume.delete()
4047

4148

49+
@pytest.fixture(params=[0, 1, 2, 3])
50+
def container_count(request: FixtureRequest) -> PositiveInt:
51+
return request.param
52+
53+
54+
@pytest.fixture
55+
def container_names(container_count: PositiveInt) -> list[str]:
56+
return [f"container_test_{i}" for i in range(container_count)]
57+
58+
59+
@pytest.fixture
60+
async def started_services(container_names: list[str]) -> AsyncIterator[None]:
61+
async with aiodocker.Docker() as docker_client:
62+
started_containers = []
63+
for container_name in container_names:
64+
container = await docker_client.containers.create(
65+
config={"Image": "busybox:latest"},
66+
name=container_name,
67+
)
68+
started_containers.append(container)
69+
70+
yield
71+
72+
for container in started_containers:
73+
await container.stop()
74+
await container.delete()
75+
76+
77+
# TESTS
78+
79+
4280
async def test_volume_with_label(
4381
volume_with_label: None, volume_name: str, run_id: UUID
44-
) -> None:
82+
):
4583
assert await get_volume_by_label(volume_name, run_id)
4684

4785

48-
async def test_volume_label_missing(run_id: UUID) -> None:
86+
async def test_volume_label_missing(run_id: UUID):
4987
with pytest.raises(VolumeNotFoundError) as exc_info:
5088
await get_volume_by_label("not_exist", run_id)
5189

5290
error_msg = f"{exc_info.value}"
5391
assert f"{run_id}" in error_msg
5492
assert "not_exist" in error_msg
93+
94+
95+
async def test_get_running_containers_count_from_names(
96+
started_services: None, container_names: list[str], container_count: PositiveInt
97+
):
98+
found_containers = await get_running_containers_count_from_names(container_names)
99+
assert found_containers == container_count

0 commit comments

Comments
 (0)