diff --git a/packages/models-library/src/models_library/service_settings_labels.py b/packages/models-library/src/models_library/service_settings_labels.py index 2dbeaa2fba94..b3e1956caba4 100644 --- a/packages/models-library/src/models_library/service_settings_labels.py +++ b/packages/models-library/src/models_library/service_settings_labels.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Annotated, Any, Literal, TypeAlias +from common_library.basic_types import DEFAULT_FACTORY from common_library.json_serialization import json_dumps from pydantic import ( BaseModel, @@ -38,23 +39,30 @@ class ContainerSpec(BaseModel): request body: TaskTemplate -> ContainerSpec """ - command: list[str] = Field( - alias="Command", - description="Used to override the container's command", - # NOTE: currently constraint to our use cases. Might mitigate some security issues. - min_length=1, - max_length=2, - ) + command: Annotated[ + list[str], + Field( + alias="Command", + description="Used to override the container's command", + # NOTE: currently constraint to our use cases. Might mitigate some security issues. + min_length=1, + max_length=2, + ), + ] - model_config = _BaseConfig | ConfigDict( - json_schema_extra={ - "examples": [ - {"Command": ["executable"]}, - {"Command": ["executable", "subcommand"]}, - {"Command": ["ofs", "linear-regression"]}, - ] - }, - ) + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + {"Command": ["executable"]}, + {"Command": ["executable", "subcommand"]}, + {"Command": ["ofs", "linear-regression"]}, + ] + } + ) + + model_config = _BaseConfig | ConfigDict(json_schema_extra=_update_json_schema_extra) class SimcoreServiceSettingLabelEntry(BaseModel): @@ -65,24 +73,30 @@ class SimcoreServiceSettingLabelEntry(BaseModel): """ _destination_containers: list[str] = PrivateAttr() - name: str = Field(..., description="The name of the service setting") - setting_type: Literal[ - "string", - "int", - "integer", - "number", - "object", - "ContainerSpec", - "Resources", - ] = Field( - ..., - description="The type of the service setting (follows Docker REST API naming scheme)", - alias="type", - ) - value: Any = Field( - ..., - description="The value of the service setting (shall follow Docker REST API scheme for services", - ) + name: Annotated[str, Field(description="The name of the service setting")] + + setting_type: Annotated[ + Literal[ + "string", + "int", + "integer", + "number", + "object", + "ContainerSpec", + "Resources", + ], + Field( + description="The type of the service setting (follows Docker REST API naming scheme)", + alias="type", + ), + ] + + value: Annotated[ + Any, + Field( + description="The value of the service setting (shall follow Docker REST API scheme for services", + ), + ] def set_destination_containers(self, value: list[str]) -> None: # NOTE: private attributes cannot be transformed into properties @@ -98,106 +112,163 @@ def get_destination_containers(self) -> list[str]: @field_validator("setting_type", mode="before") @classmethod - def ensure_backwards_compatible_setting_type(cls, v): + def _ensure_backwards_compatible_setting_type(cls, v): if v == "resources": # renamed in the latest version as return "Resources" return v - model_config = _BaseConfig | ConfigDict( - populate_by_name=True, - json_schema_extra={ - "examples": [ - # constraints - { - "name": "constraints", - "type": "string", - "value": ["node.platform.os == linux"], - }, - # SEE service_settings_labels.py::ContainerSpec - { - "name": "ContainerSpec", - "type": "ContainerSpec", - "value": {"Command": ["run"]}, - }, - # SEE services_resources.py::ResourceValue - { - "name": "Resources", - "type": "Resources", - "value": { - "Limits": {"NanoCPUs": 4000000000, "MemoryBytes": 17179869184}, - "Reservations": { - "NanoCPUs": 100000000, - "MemoryBytes": 536870912, - "GenericResources": [ - {"DiscreteResourceSpec": {"Kind": "VRAM", "Value": 1}} - ], + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + # constraints + { + "name": "constraints", + "type": "string", + "value": ["node.platform.os == linux"], + }, + # SEE service_settings_labels.py::ContainerSpec + { + "name": "ContainerSpec", + "type": "ContainerSpec", + "value": {"Command": ["run"]}, + }, + # SEE services_resources.py::ResourceValue + { + "name": "Resources", + "type": "Resources", + "value": { + "Limits": { + "NanoCPUs": 4000000000, + "MemoryBytes": 17179869184, + }, + "Reservations": { + "NanoCPUs": 100000000, + "MemoryBytes": 536870912, + "GenericResources": [ + { + "DiscreteResourceSpec": { + "Kind": "VRAM", + "Value": 1, + } + } + ], + }, }, }, - }, - # mounts - { - "name": "mount", - "type": "object", - "value": [ - { - "ReadOnly": True, - "Source": "/tmp/.X11-unix", # nosec # noqa: S108 - "Target": "/tmp/.X11-unix", # nosec # noqa: S108 - "Type": "bind", - } - ], - }, - # environments - {"name": "env", "type": "string", "value": ["DISPLAY=:0"]}, - # SEE 'simcore.service.settings' label annotations for simcore/services/dynamic/jupyter-octave-python-math:1.6.5 - {"name": "ports", "type": "int", "value": 8888}, - { - "name": "resources", - "type": "resources", - "value": { - "Limits": {"NanoCPUs": 4000000000, "MemoryBytes": 8589934592} + # mounts + { + "name": "mount", + "type": "object", + "value": [ + { + "ReadOnly": True, + "Source": "/tmp/.X11-unix", # nosec # noqa: S108 + "Target": "/tmp/.X11-unix", # nosec # noqa: S108 + "Type": "bind", + } + ], + }, + # environments + {"name": "env", "type": "string", "value": ["DISPLAY=:0"]}, + # SEE 'simcore.service.settings' label annotations for simcore/services/dynamic/jupyter-octave-python-math:1.6.5 + {"name": "ports", "type": "int", "value": 8888}, + { + "name": "resources", + "type": "resources", + "value": { + "Limits": { + "NanoCPUs": 4000000000, + "MemoryBytes": 8589934592, + } + }, }, - }, - ] - }, + ] + } + ) + + model_config = _BaseConfig | ConfigDict( + populate_by_name=True, json_schema_extra=_update_json_schema_extra ) SimcoreServiceSettingsLabel = ListModel[SimcoreServiceSettingLabelEntry] +class LegacyState(BaseModel): + old_state_path: Path + new_state_path: Path + + class PathMappingsLabel(BaseModel): """Content of "simcore.service.paths-mapping" label""" - inputs_path: Path = Field( - ..., description="folder path where the service expects all the inputs" - ) - outputs_path: Path = Field( - ..., - description="folder path where the service is expected to provide all its outputs", - ) - state_paths: list[Path] = Field( - default_factory=list, - description="optional list of paths which contents need to be persisted", - ) + inputs_path: Annotated[ + Path, Field(description="folder path where the service expects all the inputs") + ] - state_exclude: set[str] | None = Field( - None, - description="optional list unix shell rules used to exclude files from the state", - ) + outputs_path: Annotated[ + Path, + Field( + description="folder path where the service is expected to provide all its outputs", + ), + ] - volume_size_limits: dict[str, str] | None = Field( - None, - description=( - "Apply volume size limits to entries in: `inputs_path`, `outputs_path` " - "and `state_paths`. Limits must be parsable by Pydantic's ByteSize." + state_paths: Annotated[ + list[Path], + Field( + description="optional list of paths which contents need to be persisted", + default_factory=list, ), - ) + ] = DEFAULT_FACTORY + + state_exclude: Annotated[ + set[str] | None, + Field( + description="optional list unix shell rules used to exclude files from the state", + ), + ] = None + + volume_size_limits: Annotated[ + dict[str, str] | None, + Field( + description=( + "Apply volume size limits to entries in: `inputs_path`, `outputs_path` " + "and `state_paths`. Limits must be parsable by Pydantic's ByteSize." + ), + ), + ] = None + + legacy_state: Annotated[ + LegacyState | None, + Field( + description=( + "if present, the service needs to first try to download the legacy state" + "coming from a different path." + ), + ), + ] = None + + @field_validator("legacy_state") + @classmethod + def _validate_legacy_state( + cls, v: LegacyState | None, info: ValidationInfo + ) -> LegacyState | None: + if v is None: + return v + + state_paths: list[Path] = info.data.get("state_paths", []) + if v.new_state_path not in state_paths: + msg = f"legacy_state={v} not found in state_paths={state_paths}" + raise ValueError(msg) + + return v @field_validator("volume_size_limits") @classmethod - def validate_volume_limits(cls, v, info: ValidationInfo) -> str | None: + def _validate_volume_limits(cls, v, info: ValidationInfo) -> str | None: if v is None: return v @@ -222,39 +293,59 @@ def validate_volume_limits(cls, v, info: ValidationInfo) -> str | None: output: str | None = v return output - model_config = _BaseConfig | ConfigDict( - json_schema_extra={ - "examples": [ - { - "outputs_path": "/tmp/outputs", # noqa: S108 nosec - "inputs_path": "/tmp/inputs", # noqa: S108 nosec - "state_paths": ["/tmp/save_1", "/tmp_save_2"], # noqa: S108 nosec - "state_exclude": ["/tmp/strip_me/*"], # noqa: S108 nosec - }, - { - "outputs_path": "/t_out", - "inputs_path": "/t_inp", - "state_paths": [ - "/s", - "/s0", - "/s1", - "/s2", - "/s3", - "/i_have_no_limit", - ], - "volume_size_limits": { - "/s": "1", - "/s0": "1m", - "/s1": "1kib", - "/s2": "1TIB", - "/s3": "1G", - "/t_out": "12", - "/t_inp": "1EIB", + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + { + "outputs_path": "/tmp/outputs", # noqa: S108 nosec + "inputs_path": "/tmp/inputs", # noqa: S108 nosec + "state_paths": [ + "/tmp/save_1", # noqa: S108 nosec + "/tmp_save_2", # noqa: S108 nosec + ], + "state_exclude": ["/tmp/strip_me/*"], # noqa: S108 nosec }, - }, - ] - }, - ) + { + "outputs_path": "/t_out", + "inputs_path": "/t_inp", + "state_paths": [ + "/s", + "/s0", + "/s1", + "/s2", + "/s3", + "/i_have_no_limit", + ], + "volume_size_limits": { + "/s": "1", + "/s0": "1m", + "/s1": "1kib", + "/s2": "1TIB", + "/s3": "1G", + "/t_out": "12", + "/t_inp": "1EIB", + }, + }, + { + "outputs_path": "/tmp/outputs", # noqa: S108 nosec + "inputs_path": "/tmp/inputs", # noqa: S108 nosec + "state_paths": [ + "/tmp/save_1", # noqa: S108 nosec + "/tmp_save_2", # noqa: S108 nosec + ], + "state_exclude": ["/tmp/strip_me/*"], # noqa: S108 nosec + "legacy_state": { + "old_state_path": "/tmp/save_1_legacy", # noqa: S108 nosec + "new_state_path": "/tmp/save_1", # noqa: S108 nosec + }, + }, + ] + } + ) + + model_config = _BaseConfig | ConfigDict(json_schema_extra=_update_json_schema_extra) ComposeSpecLabelDict: TypeAlias = dict[str, Any] @@ -270,79 +361,91 @@ class RestartPolicy(str, Enum): class DynamicSidecarServiceLabels(BaseModel): """All "simcore.service.*" labels including keys""" - paths_mapping: Json[PathMappingsLabel] | None = Field( - None, - alias="simcore.service.paths-mapping", - description=( - "json encoded, determines how the folders are mapped in " - "the service. Required by dynamic-sidecar." + paths_mapping: Annotated[ + Json[PathMappingsLabel] | None, + Field( + alias="simcore.service.paths-mapping", + description=( + "json encoded, determines how the folders are mapped in " + "the service. Required by dynamic-sidecar." + ), ), - ) + ] = None - compose_spec: Json[ComposeSpecLabelDict | None] | None = Field( - None, - alias="simcore.service.compose-spec", - description=( - "json encoded docker-compose specifications. see " - "https://docs.docker.com/compose/compose-file/, " - "only used by dynamic-sidecar." + compose_spec: Annotated[ + Json[ComposeSpecLabelDict | None] | None, + Field( + alias="simcore.service.compose-spec", + description=( + "json encoded docker-compose specifications. see " + "https://docs.docker.com/compose/compose-file/, " + "only used by dynamic-sidecar." + ), ), - ) - container_http_entry: str | None = Field( - None, - alias="simcore.service.container-http-entrypoint", - description=( - "When a docker-compose specifications is provided, " - "the container where the traffic must flow has to be " - "specified. Required by dynamic-sidecar when " - "compose_spec is set." + ] = None + + container_http_entry: Annotated[ + str | None, + Field( + alias="simcore.service.container-http-entrypoint", + description=( + "When a docker-compose specifications is provided, " + "the container where the traffic must flow has to be " + "specified. Required by dynamic-sidecar when " + "compose_spec is set." + ), + validate_default=True, ), - validate_default=True, - ) + ] = None - user_preferences_path: Path | None = Field( - None, - alias="simcore.service.user-preferences-path", - description=( - "path where the user user preferences folder " - "will be mounted in the user services" + user_preferences_path: Annotated[ + Path | None, + Field( + alias="simcore.service.user-preferences-path", + description=( + "path where the user user preferences folder " + "will be mounted in the user services" + ), ), - ) + ] = None - restart_policy: RestartPolicy = Field( - RestartPolicy.NO_RESTART, - alias="simcore.service.restart-policy", - description=( - "the dynamic-sidecar can restart all running containers " - "on certain events. Supported events:\n" - "- `no-restart` default\n" - "- `on-inputs-downloaded` after inputs are downloaded\n" + restart_policy: Annotated[ + RestartPolicy, + Field( + alias="simcore.service.restart-policy", + description=( + "the dynamic-sidecar can restart all running containers " + "on certain events. Supported events:\n" + "- `no-restart` default\n" + "- `on-inputs-downloaded` after inputs are downloaded\n" + ), ), - ) + ] = RestartPolicy.NO_RESTART containers_allowed_outgoing_permit_list: Annotated[ None | (Json[dict[str, list[NATRule]]]), Field( - None, alias="simcore.service.containers-allowed-outgoing-permit-list", description="allow internet access to certain domain names and ports per container", ), - ] + ] = None - containers_allowed_outgoing_internet: Json[set[str]] | None = Field( - None, - alias="simcore.service.containers-allowed-outgoing-internet", - description="allow complete internet access to containers in here", - ) + containers_allowed_outgoing_internet: Annotated[ + Json[set[str]] | None, + Field( + alias="simcore.service.containers-allowed-outgoing-internet", + description="allow complete internet access to containers in here", + ), + ] = None callbacks_mapping: Annotated[ Json[CallbacksMapping] | None, Field( - default_factory=CallbacksMapping, # NOTE: PC->ANE I still think this could be an issue alias="simcore.service.callbacks-mapping", description="exposes callbacks from user services to the sidecar", + default_factory=CallbacksMapping, ), - ] + ] = DEFAULT_FACTORY @cached_property def needs_dynamic_sidecar(self) -> bool: @@ -351,7 +454,7 @@ def needs_dynamic_sidecar(self) -> bool: @field_validator("container_http_entry") @classmethod - def compose_spec_requires_container_http_entry( + def _compose_spec_requires_container_http_entry( cls, v, info: ValidationInfo ) -> str | None: v = None if v == "" else v @@ -493,85 +596,6 @@ def _not_allowed_in_both_specs(self): model_config = _BaseConfig -def _update_json_schema_extra(schema: JsonDict) -> None: - # - # NOTE: this will be automatically called with SimcoreServiceLabels.model_json_schema - # - - schema.update( - { - "examples": [ - # WARNING: do not change order. Used in tests! - # legacy service - { - "simcore.service.settings": json_dumps( - SimcoreServiceSettingLabelEntry.model_json_schema()["examples"] - ) - }, - # dynamic-service - { - "simcore.service.settings": json_dumps( - SimcoreServiceSettingLabelEntry.model_json_schema()["examples"] - ), - "simcore.service.paths-mapping": json_dumps( - PathMappingsLabel.model_json_schema()["examples"][0] - ), - "simcore.service.restart-policy": RestartPolicy.NO_RESTART.value, - "simcore.service.callbacks-mapping": json_dumps( - { - "metrics": { - "service": DEFAULT_SINGLE_SERVICE_NAME, - "command": "ls", - "timeout": 1, - } - } - ), - "simcore.service.user-preferences-path": json_dumps( - "/tmp/path_to_preferences" # noqa: S108 - ), - }, - # dynamic-service with compose spec - { - "simcore.service.settings": json_dumps( - SimcoreServiceSettingLabelEntry.model_json_schema()["examples"] - ), - "simcore.service.paths-mapping": json_dumps( - PathMappingsLabel.model_json_schema()["examples"][0], - ), - "simcore.service.compose-spec": json_dumps( - { - "version": "2.3", - "services": { - "rt-web": { - "image": "${SIMCORE_REGISTRY}/simcore/services/dynamic/sim4life:${SERVICE_VERSION}", - "init": True, - "depends_on": ["s4l-core"], - "storage_opt": {"size": "10M"}, - }, - "s4l-core": { - "image": "${SIMCORE_REGISTRY}/simcore/services/dynamic/s4l-core:${SERVICE_VERSION}", - "runtime": "nvidia", - "storage_opt": {"size": "5G"}, - "init": True, - "environment": ["DISPLAY=${DISPLAY}"], - "volumes": [ - "/tmp/.X11-unix:/tmp/.X11-unix" # nosec # noqa: S108 - ], - }, - }, - } - ), - "simcore.service.container-http-entrypoint": "rt-web", - "simcore.service.restart-policy": RestartPolicy.ON_INPUTS_DOWNLOADED.value, - "simcore.service.callbacks-mapping": json_dumps( - CallbacksMapping.model_json_schema()["examples"][3] - ), - }, - ] - }, - ) - - class SimcoreServiceLabels(DynamicSidecarServiceLabels): """ Validate all the simcores.services.* labels on a service. @@ -591,15 +615,96 @@ class SimcoreServiceLabels(DynamicSidecarServiceLabels): settings: Annotated[ Json[SimcoreServiceSettingsLabel], Field( - default_factory=lambda: SimcoreServiceSettingsLabel.model_validate([]), alias="simcore.service.settings", description=( "Json encoded. Contains setting like environment variables and " "resource constraints which are required by the service. " "Should be compatible with Docker REST API." ), + default_factory=lambda: SimcoreServiceSettingsLabel.model_validate([]), ), - ] + ] = DEFAULT_FACTORY + + @staticmethod + def _update_json_schema_extra(schema: JsonDict) -> None: + schema.update( + { + "examples": [ + # WARNING: do not change order. Used in tests! + # legacy service + { + "simcore.service.settings": json_dumps( + SimcoreServiceSettingLabelEntry.model_json_schema()[ + "examples" + ] + ) + }, + # dynamic-service + { + "simcore.service.settings": json_dumps( + SimcoreServiceSettingLabelEntry.model_json_schema()[ + "examples" + ] + ), + "simcore.service.paths-mapping": json_dumps( + PathMappingsLabel.model_json_schema()["examples"][0] + ), + "simcore.service.restart-policy": RestartPolicy.NO_RESTART.value, + "simcore.service.callbacks-mapping": json_dumps( + { + "metrics": { + "service": DEFAULT_SINGLE_SERVICE_NAME, + "command": "ls", + "timeout": 1, + } + } + ), + "simcore.service.user-preferences-path": json_dumps( + "/tmp/path_to_preferences" # noqa: S108 + ), + }, + # dynamic-service with compose spec + { + "simcore.service.settings": json_dumps( + SimcoreServiceSettingLabelEntry.model_json_schema()[ + "examples" + ] + ), + "simcore.service.paths-mapping": json_dumps( + PathMappingsLabel.model_json_schema()["examples"][0], + ), + "simcore.service.compose-spec": json_dumps( + { + "version": "2.3", + "services": { + "rt-web": { + "image": "${SIMCORE_REGISTRY}/simcore/services/dynamic/sim4life:${SERVICE_VERSION}", + "init": True, + "depends_on": ["s4l-core"], + "storage_opt": {"size": "10M"}, + }, + "s4l-core": { + "image": "${SIMCORE_REGISTRY}/simcore/services/dynamic/s4l-core:${SERVICE_VERSION}", + "runtime": "nvidia", + "storage_opt": {"size": "5G"}, + "init": True, + "environment": ["DISPLAY=${DISPLAY}"], + "volumes": [ + "/tmp/.X11-unix:/tmp/.X11-unix" # nosec # noqa: S108 + ], + }, + }, + } + ), + "simcore.service.container-http-entrypoint": "rt-web", + "simcore.service.restart-policy": RestartPolicy.ON_INPUTS_DOWNLOADED.value, + "simcore.service.callbacks-mapping": json_dumps( + CallbacksMapping.model_json_schema()["examples"][3] + ), + }, + ] + }, + ) model_config = _BaseConfig | ConfigDict( extra="allow", diff --git a/packages/models-library/tests/test_service_settings_labels.py b/packages/models-library/tests/test_service_settings_labels.py index e66f0cd911e2..c056902d8e92 100644 --- a/packages/models-library/tests/test_service_settings_labels.py +++ b/packages/models-library/tests/test_service_settings_labels.py @@ -78,11 +78,11 @@ def test_simcore_service_labels(example: dict, items: int, uses_dynamic_sidecar: def test_service_settings(): simcore_settings_settings_label = SimcoreServiceSettingsLabel.model_validate( - SimcoreServiceSettingLabelEntry.model_config["json_schema_extra"]["examples"] + SimcoreServiceSettingLabelEntry.model_json_schema()["examples"] ) assert simcore_settings_settings_label assert len(simcore_settings_settings_label) == len( - SimcoreServiceSettingLabelEntry.model_config["json_schema_extra"]["examples"] + SimcoreServiceSettingLabelEntry.model_json_schema()["examples"] ) assert simcore_settings_settings_label[0] @@ -122,16 +122,14 @@ def test_raises_error_if_http_entrypoint_is_missing(): def test_path_mappings_none_state_paths(): - sample_data = deepcopy( - PathMappingsLabel.model_config["json_schema_extra"]["examples"][0] - ) + sample_data = deepcopy(PathMappingsLabel.model_json_schema()["examples"][0]) sample_data["state_paths"] = None with pytest.raises(ValidationError): PathMappingsLabel(**sample_data) def test_path_mappings_json_encoding(): - for example in PathMappingsLabel.model_config["json_schema_extra"]["examples"]: + for example in PathMappingsLabel.model_json_schema()["examples"]: path_mappings = PathMappingsLabel.model_validate(example) print(path_mappings) assert ( @@ -607,7 +605,7 @@ def test_resolving_some_service_labels_at_load_time( def test_user_preferences_path_is_part_of_exiting_volume(): labels_data = { "simcore.service.paths-mapping": json.dumps( - PathMappingsLabel.model_config["json_schema_extra"]["examples"][0] + PathMappingsLabel.model_json_schema()["examples"][0] ), "simcore.service.user-preferences-path": json.dumps( "/tmp/outputs" # noqa: S108 diff --git a/packages/service-integration/tests/test_osparc_config.py b/packages/service-integration/tests/test_osparc_config.py index 9a5a8bd7a818..d22871b5c6a9 100644 --- a/packages/service-integration/tests/test_osparc_config.py +++ b/packages/service-integration/tests/test_osparc_config.py @@ -62,7 +62,9 @@ def test_load_from_labels( ) with open(config_path, "w") as fh: data = json.loads( - model.model_dump_json(exclude_unset=True, by_alias=True, exclude_none=True) + model.model_dump_json( + exclude_unset=True, by_alias=True, exclude_none=True + ) ) yaml.safe_dump(data, fh, sort_keys=False) @@ -73,10 +75,10 @@ def test_load_from_labels( @pytest.mark.parametrize( "example_data", - SimcoreServiceSettingLabelEntry.model_config["json_schema_extra"]["examples"], + SimcoreServiceSettingLabelEntry.model_json_schema()["examples"], ) def test_settings_item_in_sync_with_service_settings_label( - example_data: dict[str, Any] + example_data: dict[str, Any], ): print(pformat(example_data)) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py index 26873e9ec441..db552f193b72 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py @@ -4,6 +4,7 @@ from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID, StorageFileID +from models_library.service_settings_labels import LegacyState from models_library.users import UserID from pydantic import TypeAdapter from servicelib.archiving_utils import unarchive_dir @@ -101,14 +102,16 @@ async def _pull_legacy_archive( *, io_log_redirect_cb: LogRedirectCB, progress_bar: ProgressBarData, + legacy_destination_path: Path | None = None, ) -> None: # NOTE: the legacy way of storing states was as zip archives + archive_path = legacy_destination_path or destination_path async with progress_bar.sub_progress( - steps=2, description=f"pulling {destination_path.name}" + steps=2, description=f"pulling {archive_path.name}" ) as sub_prog: with TemporaryDirectory() as tmp_dir_name: archive_file = Path(tmp_dir_name) / __get_s3_name( - destination_path, is_archive=True + archive_path, is_archive=True ) s3_object = __create_s3_object_key(project_id, node_uuid, archive_file) @@ -124,7 +127,7 @@ async def _pull_legacy_archive( progress_bar=sub_prog, aws_s3_cli_settings=None, ) - _logger.info("completed pull of %s.", destination_path) + _logger.info("completed pull of %s.", archive_path) if io_log_redirect_cb: await io_log_redirect_cb( @@ -194,6 +197,7 @@ async def push( exclude_patterns: set[str] | None = None, progress_bar: ProgressBarData, aws_s3_cli_settings: AwsS3CliSettings | None, + legacy_state: LegacyState | None, ) -> None: """pushes and removes the legacy archive if present""" @@ -208,6 +212,7 @@ async def push( progress_bar=progress_bar, aws_s3_cli_settings=aws_s3_cli_settings, ) + archive_exists = await _state_metadata_entry_exists( user_id=user_id, project_id=project_id, @@ -215,16 +220,31 @@ async def push( path=source_path, is_archive=True, ) + if archive_exists: + with log_context(_logger, logging.INFO, "removing legacy archive"): + await _delete_legacy_archive( + project_id=project_id, + node_uuid=node_uuid, + path=source_path, + ) - if not archive_exists: - return - - with log_context(_logger, logging.INFO, "removing legacy data archive"): - await _delete_legacy_archive( + if legacy_state: + legacy_archive_exists = await _state_metadata_entry_exists( + user_id=user_id, project_id=project_id, node_uuid=node_uuid, - path=source_path, + path=legacy_state.old_state_path, + is_archive=True, ) + if legacy_archive_exists: + with log_context( + _logger, logging.INFO, f"removing legacy archive in {legacy_state}" + ): + await _delete_legacy_archive( + project_id=project_id, + node_uuid=node_uuid, + path=legacy_state.old_state_path, + ) async def pull( @@ -237,9 +257,41 @@ async def pull( r_clone_settings: RCloneSettings, progress_bar: ProgressBarData, aws_s3_cli_settings: AwsS3CliSettings | None, + legacy_state: LegacyState | None, ) -> None: """restores the state folder""" + if legacy_state and legacy_state.new_state_path == destination_path: + _logger.info( + "trying to restore from legacy_state=%s, destination_path=%s", + legacy_state, + destination_path, + ) + legacy_state_exists = await _state_metadata_entry_exists( + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + path=legacy_state.old_state_path, + is_archive=True, + ) + _logger.info("legacy_state_exists=%s", legacy_state_exists) + if legacy_state_exists: + with log_context( + _logger, + logging.INFO, + f"restoring data from legacy archive in {legacy_state}", + ): + await _pull_legacy_archive( + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + destination_path=legacy_state.new_state_path, + io_log_redirect_cb=io_log_redirect_cb, + progress_bar=progress_bar, + legacy_destination_path=legacy_state.old_state_path, + ) + return + state_archive_exists = await _state_metadata_entry_exists( user_id=user_id, project_id=project_id, @@ -248,7 +300,7 @@ async def pull( is_archive=True, ) if state_archive_exists: - with log_context(_logger, logging.INFO, "restoring legacy data archive"): + with log_context(_logger, logging.INFO, "restoring data from legacy archive"): await _pull_legacy_archive( user_id=user_id, project_id=project_id, diff --git a/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py b/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py index a578d410605b..bc36b2c2a5ef 100644 --- a/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py +++ b/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py @@ -172,6 +172,7 @@ async def test_push_file( mock_filemanager.reset_mock() +@pytest.mark.parametrize("create_legacy_archive", [False, True]) async def test_pull_legacy_archive( user_id: int, project_id: ProjectID, @@ -181,6 +182,7 @@ async def test_pull_legacy_archive( create_files: Callable[..., list[Path]], mock_io_log_redirect_cb: LogRedirectCB, faker: Faker, + create_legacy_archive: bool, ): assert tmpdir.exists() # create a folder to compress from @@ -200,7 +202,13 @@ async def test_pull_legacy_archive( create_files(files_number, test_control_folder) compressed_file_name = test_compression_folder / test_folder.stem archive_file = make_archive( - f"{compressed_file_name}", "zip", root_dir=test_control_folder + ( + f"{compressed_file_name}_legacy" + if create_legacy_archive + else f"{compressed_file_name}" + ), + "zip", + root_dir=test_control_folder, ) assert Path(archive_file).exists() # create mock downloaded folder @@ -229,13 +237,16 @@ async def test_pull_legacy_archive( test_folder, io_log_redirect_cb=mock_io_log_redirect_cb, progress_bar=progress_bar, + legacy_destination_path=( + Path(f"{test_folder}_legacy") if create_legacy_archive else None + ), ) assert progress_bar._current_steps == pytest.approx(1) # noqa: SLF001 mock_temporary_directory.assert_called_once() mock_filemanager.download_path_from_s3.assert_called_once_with( user_id=user_id, local_path=test_compression_folder, - s3_object=f"{project_id}/{node_uuid}/{test_folder.stem}.zip", + s3_object=f"{project_id}/{node_uuid}/{f'{test_folder.stem}_legacy' if create_legacy_archive else test_folder.stem}.zip", store_id=SIMCORE_LOCATION, store_name=None, io_log_redirect_cb=mock_io_log_redirect_cb, diff --git a/services/director-v2/openapi.json b/services/director-v2/openapi.json index 144d9f92bedb..78cc4e1fb598 100644 --- a/services/director-v2/openapi.json +++ b/services/director-v2/openapi.json @@ -2291,6 +2291,26 @@ } } }, + "LegacyState": { + "properties": { + "old_state_path": { + "type": "string", + "format": "path", + "title": "Old State Path" + }, + "new_state_path": { + "type": "string", + "format": "path", + "title": "New State Path" + } + }, + "type": "object", + "required": [ + "old_state_path", + "new_state_path" + ], + "title": "LegacyState" + }, "NATRule": { "properties": { "hostname": { @@ -2449,6 +2469,17 @@ ], "title": "Volume Size Limits", "description": "Apply volume size limits to entries in: `inputs_path`, `outputs_path` and `state_paths`. Limits must be parsable by Pydantic's ByteSize." + }, + "legacy_state": { + "anyOf": [ + { + "$ref": "#/components/schemas/LegacyState" + }, + { + "type": "null" + } + ], + "description": "if present, the service needs to first try to download the legacy statecoming from a different path." } }, "additionalProperties": false, diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py index 3e77ed444e13..49b9e0c56704 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/settings.py @@ -426,7 +426,7 @@ def _assemble_env_vars_for_boot_options( return SimcoreServiceSettingsLabel( root=[ SimcoreServiceSettingLabelEntry( - name="env", type="string", value=list(env_vars) + name="env", setting_type="string", value=list(env_vars) ) ] ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index 35fa3ae9ae50..d7d013208cb2 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -134,6 +134,11 @@ def _get_environment_variables( "DY_SIDECAR_USER_SERVICES_HAVE_INTERNET_ACCESS": f"{allow_internet_access}", "DY_SIDECAR_SYSTEM_MONITOR_TELEMETRY_ENABLE": f"{telemetry_enabled}", "DY_SIDECAR_STATE_EXCLUDE": json_dumps(f"{x}" for x in state_exclude), + "DY_SIDECAR_LEGACY_STATE": ( + "null" + if scheduler_data.paths_mapping.legacy_state is None + else scheduler_data.paths_mapping.legacy_state.model_dump_json() + ), "DY_SIDECAR_CALLBACKS_MAPPING": callbacks_mapping.model_dump_json(), "DY_SIDECAR_STATE_PATHS": json_dumps( f"{x}" for x in scheduler_data.paths_mapping.state_paths @@ -451,18 +456,18 @@ async def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: scheduler_data.product_name is not None ), "ONLY for legacy. This function should not be called with product_name==None" # nosec - standard_simcore_docker_labels: dict[ - DockerLabelKey, str - ] = StandardSimcoreDockerLabels( - user_id=scheduler_data.user_id, - project_id=scheduler_data.project_id, - node_id=scheduler_data.node_uuid, - product_name=scheduler_data.product_name, - simcore_user_agent=scheduler_data.request_simcore_user_agent, - swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME, - memory_limit=ByteSize(0), # this should get overwritten - cpu_limit=0, # this should get overwritten - ).to_simcore_runtime_docker_labels() + standard_simcore_docker_labels: dict[DockerLabelKey, str] = ( + StandardSimcoreDockerLabels( + user_id=scheduler_data.user_id, + project_id=scheduler_data.project_id, + node_id=scheduler_data.node_uuid, + product_name=scheduler_data.product_name, + simcore_user_agent=scheduler_data.request_simcore_user_agent, + swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME, + memory_limit=ByteSize(0), # this should get overwritten + cpu_limit=0, # this should get overwritten + ).to_simcore_runtime_docker_labels() + ) service_labels: dict[str, str] = ( { @@ -494,9 +499,7 @@ async def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: ) ) - placement_substitutions: dict[ - str, DockerPlacementConstraint - ] = ( + placement_substitutions: dict[str, DockerPlacementConstraint] = ( placement_settings.DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS ) for image_resources in scheduler_data.service_resources.values(): diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py index f4870a140c46..27cdb8319144 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py @@ -20,6 +20,7 @@ "DY_DOCKER_HUB_REGISTRY_SETTINGS", "DY_SIDECAR_AWS_S3_CLI_SETTINGS", "DY_SIDECAR_CALLBACKS_MAPPING", + "DY_SIDECAR_LEGACY_STATE", "DY_SIDECAR_LOG_FORMAT_LOCAL_DEV_ENABLED", "DY_SIDECAR_NODE_ID", "DY_SIDECAR_PATH_INPUTS", diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py index 8f26a8bd297d..4618a9a9ba01 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py @@ -267,6 +267,7 @@ def expected_dynamic_sidecar_spec( "DY_SIDECAR_SERVICE_VERSION": "2.4.5", "DY_SIDECAR_PRODUCT_NAME": osparc_product_name, "DY_SIDECAR_USER_PREFERENCES_PATH": "None", + "DY_SIDECAR_LEGACY_STATE": "null", "DY_SIDECAR_LOG_FORMAT_LOCAL_DEV_ENABLED": "True", "POSTGRES_DB": "test", "POSTGRES_HOST": "localhost", diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py index 1f42b5b848e9..4187f08b02c9 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py @@ -10,6 +10,7 @@ from models_library.products import ProductName from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID +from models_library.service_settings_labels import LegacyState from models_library.services import DynamicServiceKey, ServiceRunID, ServiceVersion from models_library.users import UserID from pydantic import ( @@ -141,6 +142,10 @@ class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings): DY_SIDECAR_STATE_EXCLUDE: set[str] = Field( ..., description="list of patterns to exclude files when saving states" ) + DY_SIDECAR_LEGACY_STATE: LegacyState | None = Field( + default=None, description="used to recover state when upgrading service" + ) + DY_SIDECAR_LOG_FORMAT_LOCAL_DEV_ENABLED: bool = Field( default=False, validation_alias=AliasChoices( diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py index 83458fd9d1e0..42412376d08c 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/long_running_tasks.py @@ -12,6 +12,7 @@ ) from models_library.generated_models.docker_rest_api import ContainerState from models_library.rabbitmq_messages import ProgressType, SimcorePlatformStatus +from models_library.service_settings_labels import LegacyState from pydantic import PositiveInt from servicelib.file_utils import log_directory_changes from servicelib.logging_utils import log_context @@ -337,6 +338,19 @@ def _get_satate_folders_size(paths: list[Path]) -> int: return total_size +def _get_legacy_state_with_dy_volumes_path( + settings: ApplicationSettings, +) -> LegacyState | None: + legacy_state = settings.DY_SIDECAR_LEGACY_STATE + if legacy_state is None: + return None + dy_volumes = settings.DYNAMIC_SIDECAR_DY_VOLUMES_MOUNT_DIR + return LegacyState( + old_state_path=dy_volumes / legacy_state.old_state_path.relative_to("/"), + new_state_path=dy_volumes / legacy_state.new_state_path.relative_to("/"), + ) + + async def _restore_state_folder( app: FastAPI, *, @@ -348,13 +362,14 @@ async def _restore_state_folder( user_id=settings.DY_SIDECAR_USER_ID, project_id=settings.DY_SIDECAR_PROJECT_ID, node_uuid=settings.DY_SIDECAR_NODE_ID, - destination_path=state_path, + destination_path=Path(state_path), io_log_redirect_cb=functools.partial( post_sidecar_log_message, app, log_level=logging.INFO ), r_clone_settings=settings.DY_SIDECAR_R_CLONE_SETTINGS, progress_bar=progress_bar, aws_s3_cli_settings=settings.DY_SIDECAR_AWS_S3_CLI_SETTINGS, + legacy_state=_get_legacy_state_with_dy_volumes_path(settings), ) @@ -429,6 +444,7 @@ async def _save_state_folder( ), progress_bar=progress_bar, aws_s3_cli_settings=settings.DY_SIDECAR_AWS_S3_CLI_SETTINGS, + legacy_state=_get_legacy_state_with_dy_volumes_path(settings), )