Skip to content

Commit cd864d3

Browse files
GitHKAndrei Neagu
andauthored
🐛 Fixes issues with dy-sidecar removal by director-v2 (ITISFoundation#3029)
* extended API to raise errors * fix service removal * pylint * fixed method spec * downgrading log message * refactor error handling and propagation * pylint * refactor to simplify error handling * cleanup * remove unsued * cleanup * fix codestyle Co-authored-by: Andrei Neagu <[email protected]>
1 parent 375cbd3 commit cd864d3

File tree

12 files changed

+109
-9
lines changed

12 files changed

+109
-9
lines changed

packages/simcore-sdk/src/simcore_sdk/node_ports_common/exceptions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ class NodeNotFound(NodeportsException):
122122
"""The given node_uuid was not found"""
123123

124124
def __init__(self, node_uuid):
125+
self.node_uuid = node_uuid
125126
super().__init__(f"the node id {node_uuid} was not found")
126127

127128

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from .errors import (
1818
DynamicSidecarUnexpectedResponseStatus,
1919
EntrypointContainerNotFoundError,
20+
NodeportsDidNotFindNodeError,
2021
)
2122

2223
# PC -> SAN improvements to discuss
@@ -246,6 +247,11 @@ async def service_push_output_ports(
246247
response = await self._client.post(
247248
url, json=port_keys, timeout=self._save_restore_timeout
248249
)
250+
if response.status_code == status.HTTP_404_NOT_FOUND:
251+
json_error = response.json()
252+
if json_error.get("code") == "dynamic_sidecar.nodeports.node_not_found":
253+
raise NodeportsDidNotFindNodeError(node_uuid=json_error["node_uuid"])
254+
249255
if response.status_code != status.HTTP_204_NO_CONTENT:
250256
raise DynamicSidecarUnexpectedResponseStatus(response, "output ports push")
251257

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ async def docker_client() -> AsyncIterator[aiodocker.docker.Docker]:
9696
except aiodocker.exceptions.DockerError as e:
9797
message = "Unexpected error from docker client"
9898
log_message = f"{message} {e.message}\n{traceback.format_exc()}"
99-
log.warning(log_message)
99+
log.info(log_message)
100100
raise GenericDockerError(message, e) from e
101101
finally:
102102
if client is not None:

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/errors.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from aiodocker.exceptions import DockerError
44
from httpx import Response
55
from models_library.projects_nodes import NodeID
6+
from pydantic.errors import PydanticErrorMixin
67

78
from ...core.errors import DirectorException
89

@@ -52,3 +53,11 @@ def __init__(self, response: Response, msg: Optional[str] = None):
5253
)
5354
super().__init__(message)
5455
self.response = response
56+
57+
58+
class NodeportsDidNotFindNodeError(PydanticErrorMixin, DirectorException):
59+
code = "dynamic_scheduler.output_ports_pulling.node_not_found"
60+
msg_template = (
61+
"Could not find node '{node_uuid}' in the database. Did not upload data to S3, "
62+
"most likely due to service being removed from the study."
63+
)

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from .._namepsace import get_compose_namespace
2828
from ..client_api import DynamicSidecarClient, get_dynamic_sidecar_client
2929
from ..docker_api import (
30+
are_all_services_present,
3031
create_network,
3132
create_service_and_get_id,
3233
get_node_id_from_task_for_service,
@@ -49,6 +50,7 @@
4950
DynamicSidecarUnexpectedResponseStatus,
5051
EntrypointContainerNotFoundError,
5152
GenericDockerError,
53+
NodeportsDidNotFindNodeError,
5254
)
5355
from ..volumes_resolver import DynamicSidecarVolumesPathsResolver
5456
from .abc import DynamicSchedulerEvent
@@ -472,15 +474,23 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
472474
logger.warning(
473475
"Could not contact dynamic-sidecar to begin destruction of %s\n%s",
474476
scheduler_data.service_name,
475-
str(e),
477+
f"{e}",
476478
)
477-
478479
app_settings: AppSettings = app.state.settings
479480
dynamic_sidecar_settings: DynamicSidecarSettings = (
480481
app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
481482
)
482483

483-
if scheduler_data.dynamic_sidecar.service_removal_state.can_save:
484+
# only try to save the status if :
485+
# - the dynamic-sidecar has finished booting correctly
486+
# - it is requested to save the state
487+
if (
488+
scheduler_data.dynamic_sidecar.service_removal_state.can_save
489+
and await are_all_services_present(
490+
node_uuid=scheduler_data.node_uuid,
491+
dynamic_sidecar_settings=app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR,
492+
)
493+
):
484494
dynamic_sidecar_client = get_dynamic_sidecar_client(app)
485495
dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint
486496

@@ -501,7 +511,12 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
501511
dynamic_sidecar_endpoint,
502512
)
503513
)
504-
await logged_gather(*tasks)
514+
515+
try:
516+
await logged_gather(*tasks)
517+
except NodeportsDidNotFindNodeError as err:
518+
logger.warning("%s", f"{err}")
519+
505520
logger.info("Ports data pushed by dynamic-sidecar")
506521
# NOTE: ANE: need to use more specific exception here
507522
except Exception as e: # pylint: disable=broad-except
@@ -511,7 +526,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
511526
"state and upload outputs %s\n%s"
512527
),
513528
scheduler_data.service_name,
514-
str(e),
529+
f"{e}",
515530
)
516531
# ensure dynamic-sidecar does not get removed
517532
# user data can be manually saved and manual

services/dynamic-sidecar/.env-devel

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,9 @@ REGISTRY_AUTH=false
2222
REGISTRY_USER=test
2323
REGISTRY_PW=test
2424
REGISTRY_SSL=false
25+
26+
S3_ENDPOINT=MINIO
27+
S3_ACCESS_KEY=mocked
28+
S3_SECRET_KEY=mocked
29+
S3_BUCKET_NAME=mocked
30+
R_CLONE_PROVIDER=MINIO

services/dynamic-sidecar/openapi.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,9 @@
510510
"204": {
511511
"description": "Successful Response"
512512
},
513+
"404": {
514+
"description": "Could not find node_uuid in the database"
515+
},
513516
"422": {
514517
"description": "Validation Error",
515518
"content": {

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_extension.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,11 @@ async def pull_output_ports(
195195
summary="Push output ports data",
196196
response_class=Response,
197197
status_code=status.HTTP_204_NO_CONTENT,
198+
responses={
199+
status.HTTP_404_NOT_FOUND: {
200+
"description": "Could not find node_uuid in the database"
201+
}
202+
},
198203
)
199204
async def push_output_ports(
200205
port_keys: Optional[List[str]] = None,

services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33

44
from fastapi import FastAPI
55
from servicelib.fastapi.openapi import override_fastapi_openapi_method
6+
from simcore_sdk.node_ports_common.exceptions import NodeNotFound
67

78
from .._meta import API_VTAG, __version__
89
from ..api import main_router
910
from ..models.domains.shared_store import SharedStore
1011
from ..models.schemas.application_health import ApplicationHealth
1112
from ..modules.directory_watcher import setup_directory_watcher
1213
from .docker_logs import setup_background_log_fetcher
13-
from .error_handlers import http_error_handler
14+
from .error_handlers import http_error_handler, node_not_found_error_handler
1415
from .errors import BaseDynamicSidecarError
1516
from .rabbitmq import setup_rabbitmq
1617
from .remote_debug import setup as remote_debug_setup
@@ -78,6 +79,7 @@ def assemble_application() -> FastAPI:
7879
application.include_router(main_router)
7980

8081
# error handlers
82+
application.add_exception_handler(NodeNotFound, node_not_found_error_handler)
8183
application.add_exception_handler(BaseDynamicSidecarError, http_error_handler)
8284

8385
# also sets up mounted_volumes
Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,29 @@
1+
from fastapi import status
12
from fastapi.encoders import jsonable_encoder
3+
from simcore_sdk.node_ports_common.exceptions import NodeNotFound
24
from starlette.requests import Request
35
from starlette.responses import JSONResponse
46

57
from .errors import BaseDynamicSidecarError
68

79

8-
async def http_error_handler(_: Request, exc: BaseDynamicSidecarError) -> JSONResponse:
10+
async def http_error_handler(
11+
_: Request, exception: BaseDynamicSidecarError
12+
) -> JSONResponse:
913
return JSONResponse(
10-
content=jsonable_encoder({"errors": [exc.message]}), status_code=exc.status
14+
content=jsonable_encoder({"errors": [exception.message]}),
15+
status_code=exception.status,
16+
)
17+
18+
19+
async def node_not_found_error_handler(
20+
_: Request, exception: NodeNotFound
21+
) -> JSONResponse:
22+
error_fields = dict(
23+
code="dynamic_sidecar.nodeports.node_not_found",
24+
message=f"{exception}",
25+
node_uuid=f"{exception.node_uuid}",
26+
)
27+
return JSONResponse(
28+
content=jsonable_encoder(error_fields), status_code=status.HTTP_404_NOT_FOUND
1129
)

0 commit comments

Comments
 (0)