Skip to content

Commit 3655208

Browse files
authored
🐛🎨♻️Director-v0: improve registry caching (#6799)
1 parent ba881e9 commit 3655208

File tree

17 files changed

+410
-340
lines changed

17 files changed

+410
-340
lines changed

packages/settings-library/src/settings_library/docker_registry.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
from functools import cached_property
2-
from typing import Any
2+
from typing import Any, Self
33

4-
from pydantic import Field, SecretStr, field_validator
4+
from pydantic import (
5+
AnyHttpUrl,
6+
Field,
7+
SecretStr,
8+
TypeAdapter,
9+
field_validator,
10+
model_validator,
11+
)
512
from pydantic_settings import SettingsConfigDict
613

714
from .base import BaseCustomSettings
@@ -12,31 +19,49 @@ class RegistrySettings(BaseCustomSettings):
1219
REGISTRY_PATH: str | None = Field(
1320
default=None,
1421
# This is useful in case of a local registry, where the registry url (path) is relative to the host docker engine"
15-
description="development mode only, in case a local registry is used",
22+
description="development mode only, in case a local registry is used - "
23+
"this is the hostname to the docker registry as seen from the host running the containers (e.g. 127.0.0.1:5000)",
1624
)
1725
# NOTE: name is missleading, http or https protocol are not included
18-
REGISTRY_URL: str = Field(default="", description="address to the docker registry")
26+
REGISTRY_URL: str = Field(
27+
...,
28+
description="hostname of docker registry (without protocol but with port if available)",
29+
min_length=1,
30+
)
1931

2032
REGISTRY_USER: str = Field(
2133
..., description="username to access the docker registry"
2234
)
2335
REGISTRY_PW: SecretStr = Field(
2436
..., description="password to access the docker registry"
2537
)
26-
REGISTRY_SSL: bool = Field(..., description="access to registry through ssl")
38+
REGISTRY_SSL: bool = Field(
39+
..., description="True if docker registry is using HTTPS protocol"
40+
)
2741

2842
@field_validator("REGISTRY_PATH", mode="before")
2943
@classmethod
3044
def _escape_none_string(cls, v) -> Any | None:
3145
return None if v == "None" else v
3246

47+
@model_validator(mode="after")
48+
def check_registry_authentication(self: Self) -> Self:
49+
if self.REGISTRY_AUTH and any(
50+
not v for v in (self.REGISTRY_USER, self.REGISTRY_PW)
51+
):
52+
msg = "If REGISTRY_AUTH is True, both REGISTRY_USER and REGISTRY_PW must be provided"
53+
raise ValueError(msg)
54+
return self
55+
3356
@cached_property
3457
def resolved_registry_url(self) -> str:
3558
return self.REGISTRY_PATH or self.REGISTRY_URL
3659

3760
@cached_property
38-
def api_url(self) -> str:
39-
return f"{self.REGISTRY_URL}/v2"
61+
def api_url(self) -> AnyHttpUrl:
62+
return TypeAdapter(AnyHttpUrl).validate_python(
63+
f"http{'s' if self.REGISTRY_SSL else ''}://{self.REGISTRY_URL}/v2"
64+
)
4065

4166
model_config = SettingsConfigDict(
4267
json_schema_extra={

packages/settings-library/tests/test_docker_registry.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"REGISTRY_USER": "usr",
1212
"REGISTRY_PW": "pwd",
1313
"REGISTRY_SSL": "False",
14+
"REGISTRY_URL": "pytest.registry.com",
1415
}
1516

1617

services/director-v2/tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def mock_env(
186186
"REGISTRY_PW": "test",
187187
"REGISTRY_SSL": "false",
188188
"REGISTRY_USER": "test",
189+
"REGISTRY_URL": faker.url(),
189190
"SC_BOOT_MODE": "production",
190191
"SIMCORE_SERVICES_NETWORK_NAME": "test_network_name",
191192
"SWARM_STACK_NAME": "pytest-simcore",

services/director/requirements/_base.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
aiocache
1616
aiodocker
1717
fastapi[all]
18-
httpx
18+
httpx[http2]
1919
prometheus-client
2020
pydantic
21+
tenacity

services/director/requirements/_base.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ h11==0.14.0
114114
# via
115115
# httpcore
116116
# uvicorn
117+
h2==4.1.0
118+
# via httpx
119+
hpack==4.0.0
120+
# via h2
117121
httpcore==1.0.6
118122
# via httpx
119123
httptools==0.6.4
@@ -135,6 +139,8 @@ httpx==0.27.2
135139
# -r requirements/../../../packages/service-library/requirements/_fastapi.in
136140
# -r requirements/_base.in
137141
# fastapi
142+
hyperframe==6.0.1
143+
# via h2
138144
idna==3.10
139145
# via
140146
# anyio
@@ -422,7 +428,9 @@ starlette==0.41.3
422428
# -c requirements/../../../requirements/constraints.txt
423429
# fastapi
424430
tenacity==9.0.0
425-
# via -r requirements/../../../packages/service-library/requirements/_base.in
431+
# via
432+
# -r requirements/../../../packages/service-library/requirements/_base.in
433+
# -r requirements/_base.in
426434
toolz==1.0.0
427435
# via -r requirements/../../../packages/service-library/requirements/_base.in
428436
tqdm==4.67.0

services/director/requirements/_test.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ faker
1616
jsonref
1717
pytest
1818
pytest-asyncio
19+
pytest-benchmark
1920
pytest-cov
2021
pytest-docker
2122
pytest-instafail

services/director/requirements/_test.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,13 @@ propcache==0.2.0
8787
# -c requirements/_base.txt
8888
# aiohttp
8989
# yarl
90+
py-cpuinfo==9.0.0
91+
# via pytest-benchmark
9092
pytest==8.3.3
9193
# via
9294
# -r requirements/_test.in
9395
# pytest-asyncio
96+
# pytest-benchmark
9497
# pytest-cov
9598
# pytest-docker
9699
# pytest-instafail
@@ -100,6 +103,8 @@ pytest-asyncio==0.23.8
100103
# via
101104
# -c requirements/../../../requirements/constraints.txt
102105
# -r requirements/_test.in
106+
pytest-benchmark==5.1.0
107+
# via -r requirements/_test.in
103108
pytest-cov==6.0.0
104109
# via -r requirements/_test.in
105110
pytest-docker==3.1.1
Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,22 @@
1-
from aiohttp import ClientSession, ClientTimeout
2-
from common_library.json_serialization import json_dumps
1+
import httpx
32
from fastapi import FastAPI
4-
from servicelib.utils import (
5-
get_http_client_request_aiohttp_connect_timeout,
6-
get_http_client_request_aiohttp_sock_connect_timeout,
7-
get_http_client_request_total_timeout,
8-
)
93

104

115
def setup_client_session(app: FastAPI) -> None:
126
async def on_startup() -> None:
13-
# SEE https://github.com/ITISFoundation/osparc-simcore/issues/4628
14-
15-
# ANE: it is important to have fast connection handshakes
16-
# also requests should be as fast as possible
17-
# some services are not that fast to reply
18-
timeout_settings = ClientTimeout(
19-
total=get_http_client_request_total_timeout(),
20-
connect=get_http_client_request_aiohttp_connect_timeout(),
21-
sock_connect=get_http_client_request_aiohttp_sock_connect_timeout(),
22-
)
23-
session = ClientSession(
24-
timeout=timeout_settings,
25-
json_serialize=json_dumps,
26-
)
7+
session = httpx.AsyncClient(transport=httpx.AsyncHTTPTransport(http2=True))
278
app.state.aiohttp_client_session = session
289

2910
async def on_shutdown() -> None:
3011
session = app.state.aiohttp_client_session
31-
assert isinstance(session, ClientSession) # nosec
32-
await session.close()
12+
assert isinstance(session, httpx.AsyncClient) # nosec
13+
await session.aclose()
3314

3415
app.add_event_handler("startup", on_startup)
3516
app.add_event_handler("shutdown", on_shutdown)
3617

3718

38-
def get_client_session(app: FastAPI) -> ClientSession:
19+
def get_client_session(app: FastAPI) -> httpx.AsyncClient:
3920
session = app.state.aiohttp_client_session
40-
assert isinstance(session, ClientSession) # nosec
21+
assert isinstance(session, httpx.AsyncClient) # nosec
4122
return session

services/director/src/simcore_service_director/core/errors.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
from typing import Any
2-
31
from common_library.errors_classes import OsparcErrorMixin
42

53

64
class DirectorRuntimeError(OsparcErrorMixin, RuntimeError):
7-
def __init__(self, **ctx: Any) -> None:
8-
super().__init__(**ctx)
9-
105
msg_template: str = "Director-v0 unexpected error: {msg}"
116

127

services/director/src/simcore_service_director/producer.py

Lines changed: 45 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,22 @@
55
import re
66
from datetime import timedelta
77
from enum import Enum
8-
from http import HTTPStatus
98
from pprint import pformat
109
from typing import Any, Final, cast
1110

1211
import aiodocker
1312
import aiodocker.networks
14-
import aiohttp
1513
import arrow
14+
import httpx
1615
import tenacity
17-
from aiohttp import (
18-
ClientConnectionError,
19-
ClientError,
20-
ClientResponse,
21-
ClientResponseError,
22-
ClientSession,
23-
ClientTimeout,
24-
)
25-
from fastapi import FastAPI
16+
from fastapi import FastAPI, status
2617
from packaging.version import Version
2718
from servicelib.async_utils import run_sequentially_in_context
2819
from servicelib.docker_utils import to_datetime
2920
from settings_library.docker_registry import RegistrySettings
30-
from tenacity import retry
21+
from tenacity import retry, wait_random_exponential
3122
from tenacity.retry import retry_if_exception_type
3223
from tenacity.stop import stop_after_attempt
33-
from tenacity.wait import wait_fixed
3424

3525
from . import docker_utils, registry_proxy
3626
from .client_session import get_client_session
@@ -547,7 +537,7 @@ async def _pass_port_to_service(
547537
service_name: str,
548538
port: str,
549539
service_boot_parameters_labels: list[Any],
550-
session: ClientSession,
540+
session: httpx.AsyncClient,
551541
app_settings: ApplicationSettings,
552542
) -> None:
553543
for param in service_boot_parameters_labels:
@@ -566,8 +556,8 @@ async def _pass_port_to_service(
566556
"port": str(port),
567557
}
568558
_logger.debug("creating request %s and query %s", service_url, query_string)
569-
async with session.post(service_url, data=query_string) as response:
570-
_logger.debug("query response: %s", await response.text())
559+
response = await session.post(service_url, data=query_string)
560+
_logger.debug("query response: %s", response.text)
571561
return
572562
_logger.debug("service %s does not need to know its external port", service_name)
573563

@@ -1149,50 +1139,48 @@ async def get_service_details(app: FastAPI, node_uuid: str) -> dict:
11491139

11501140

11511141
@retry(
1152-
wait=wait_fixed(2),
1142+
wait=wait_random_exponential(min=1, max=5),
11531143
stop=stop_after_attempt(3),
11541144
reraise=True,
1155-
retry=retry_if_exception_type(ClientConnectionError),
1145+
retry=retry_if_exception_type(httpx.RequestError),
11561146
)
11571147
async def _save_service_state(
1158-
service_host_name: str, session: aiohttp.ClientSession
1148+
service_host_name: str, session: httpx.AsyncClient
11591149
) -> None:
1160-
response: ClientResponse
1161-
async with session.post(
1162-
url=f"http://{service_host_name}/state", # NOSONAR
1163-
timeout=ClientTimeout(
1164-
ServicesCommonSettings().director_dynamic_service_save_timeout
1165-
),
1166-
) as response:
1167-
try:
1168-
response.raise_for_status()
1150+
try:
1151+
response = await session.post(
1152+
url=f"http://{service_host_name}/state", # NOSONAR
1153+
timeout=ServicesCommonSettings().director_dynamic_service_save_timeout,
1154+
)
1155+
response.raise_for_status()
11691156

1170-
except ClientResponseError as err:
1171-
if err.status in (
1172-
HTTPStatus.METHOD_NOT_ALLOWED,
1173-
HTTPStatus.NOT_FOUND,
1174-
HTTPStatus.NOT_IMPLEMENTED,
1175-
):
1176-
# NOTE: Legacy Override. Some old services do not have a state entrypoint defined
1177-
# therefore we assume there is nothing to be saved and do not raise exception
1178-
# Responses found so far:
1179-
# METHOD NOT ALLOWED https://httpstatuses.com/405
1180-
# NOT FOUND https://httpstatuses.com/404
1181-
#
1182-
_logger.warning(
1183-
"Service '%s' does not seem to implement save state functionality: %s. Skipping save",
1184-
service_host_name,
1185-
err,
1186-
)
1187-
else:
1188-
# upss ... could service had troubles saving, reraise
1189-
raise
1190-
else:
1191-
_logger.info(
1192-
"Service '%s' successfully saved its state: %s",
1157+
except httpx.HTTPStatusError as err:
1158+
1159+
if err.response.status_code in (
1160+
status.HTTP_405_METHOD_NOT_ALLOWED,
1161+
status.HTTP_404_NOT_FOUND,
1162+
status.HTTP_501_NOT_IMPLEMENTED,
1163+
):
1164+
# NOTE: Legacy Override. Some old services do not have a state entrypoint defined
1165+
# therefore we assume there is nothing to be saved and do not raise exception
1166+
# Responses found so far:
1167+
# METHOD NOT ALLOWED https://httpstatuses.com/405
1168+
# NOT FOUND https://httpstatuses.com/404
1169+
#
1170+
_logger.warning(
1171+
"Service '%s' does not seem to implement save state functionality: %s. Skipping save",
11931172
service_host_name,
1194-
f"{response}",
1173+
err,
11951174
)
1175+
else:
1176+
# upss ... could service had troubles saving, reraise
1177+
raise
1178+
else:
1179+
_logger.info(
1180+
"Service '%s' successfully saved its state: %s",
1181+
service_host_name,
1182+
f"{response}",
1183+
)
11961184

11971185

11981186
@run_sequentially_in_context(target_args=["node_uuid"])
@@ -1246,20 +1234,21 @@ async def stop_service(app: FastAPI, *, node_uuid: str, save_state: bool) -> Non
12461234
await _save_service_state(
12471235
service_host_name, session=get_client_session(app)
12481236
)
1249-
except ClientResponseError as err:
1237+
except httpx.HTTPStatusError as err:
1238+
12501239
raise ServiceStateSaveError(
12511240
service_uuid=node_uuid,
12521241
reason=f"service {service_host_name} rejected to save state, "
1253-
f"responded {err.message} (status {err.status})."
1242+
f"responded {err.response.text} (status {err.response.status_code})."
12541243
"Aborting stop service to prevent data loss.",
12551244
) from err
12561245

1257-
except ClientError as err:
1246+
except httpx.RequestError as err:
12581247
_logger.warning(
12591248
"Could not save state because %s is unreachable [%s]."
12601249
"Resuming stop_service.",
12611250
service_host_name,
1262-
err,
1251+
err.request,
12631252
)
12641253

12651254
# remove the services

0 commit comments

Comments
 (0)