Enhance artifact storage handling for in-memory materializers

safoinme · safoinme · commit aba28cf58c90 · 2025-09-24T13:27:43.000+01:00
This commit introduces logic to handle in-memory materializers more effectively by avoiding unnecessary interactions with the artifact store. When using an in-memory materializer, the URI is now prefixed with "memory://" to clearly indicate its storage type. Additionally, the artifact store's `makedirs` method is only called when not using in-memory storage, improving performance and clarity.

Furthermore, the parameter model construction in the deployment service has been refined for better readability and maintainability.

No functional changes were made to the application code outside of these improvements.
diff --git a/src/zenml/artifacts/utils.py b/src/zenml/artifacts/utils.py
@@ -152,7 +152,21 @@ def _store_artifact_data_and_prepare_request(
         Artifact version request for the artifact data that was stored.
     """
     artifact_store = Client().active_stack.artifact_store
-    artifact_store.makedirs(uri)
+
+    # Detect in-memory materializer to avoid touching the artifact store.
+    # Local import to minimize import-time dependencies.
+    from zenml.materializers.in_memory_materializer import (
+        InMemoryMaterializer,
+    )
+
+    is_in_memory = issubclass(materializer_class, InMemoryMaterializer)
+
+    if not is_in_memory:
+        artifact_store.makedirs(uri)
+    else:
+        # Ensure URI clearly indicates in-memory storage and not the artifact store
+        if not uri.startswith("memory://"):
+            uri = f"memory://custom_artifacts/{name}/{uuid4()}"
 
     materializer = materializer_class(uri=uri, artifact_store=artifact_store)
     materializer.uri = materializer.uri.replace("\\", "/")
@@ -190,7 +204,7 @@ def _store_artifact_data_and_prepare_request(
         data_type=source_utils.resolve(data_type),
         content_hash=content_hash,
         project=Client().active_project.id,
-        artifact_store_id=artifact_store.id,
+        artifact_store_id=None if is_in_memory else artifact_store.id,
         visualizations=visualizations,
         has_custom_name=has_custom_name,
         save_type=save_type,
diff --git a/src/zenml/deployers/server/parameters.py b/src/zenml/deployers/server/parameters.py
@@ -37,15 +37,14 @@ def build_params_model_from_snapshot(
 
     Args:
         snapshot: The snapshot to derive the model from.
-        strict: Whether to raise an error if the model cannot be constructed.
 
     Returns:
         A Pydantic `BaseModel` subclass that validates the pipeline parameters,
-        or None if the model could not be constructed.
+        or None if the snapshot lacks a valid `pipeline_spec.source`.
 
     Raises:
-        RuntimeError: If the model cannot be constructed and `strict` is True.
-        Exception: If loading the pipeline class fails when `strict` is True.
+        RuntimeError: If the pipeline class cannot be loaded or if no
+            parameters model can be constructed for the pipeline.
     """
     if not snapshot.pipeline_spec or not snapshot.pipeline_spec.source:
         msg = (
@@ -66,13 +65,7 @@ def build_params_model_from_snapshot(
 
     model = pipeline_class.get_parameters_model()
     if not model:
-        message = (
-            f"Failed to construct parameters model from pipeline "
-            f"`{snapshot.pipeline_configuration.name}`."
+        raise RuntimeError(
+            f"Failed to construct parameters model from pipeline `{snapshot.pipeline_configuration.name}`."
         )
-        logger.error(message)
-        raise RuntimeError(message)
-    else:
-            logger.debug(message)
-
     return model
diff --git a/src/zenml/deployers/server/runtime.py b/src/zenml/deployers/server/runtime.py
@@ -167,4 +167,4 @@ def get_in_memory_data(uri: str) -> Any:
     if is_active():
         state = _get_context()
         return state.in_memory_data.get(uri)
-    return None
+    return None
diff --git a/src/zenml/deployers/server/service.py b/src/zenml/deployers/server/service.py
@@ -17,7 +17,7 @@
 import time
 import traceback
 from datetime import datetime, timezone
-from typing import Any, Dict, Optional, Type, Union
+from typing import Any, Dict, Optional, Tuple, Type, Union
 from uuid import UUID, uuid4
 
 from pydantic import BaseModel
@@ -136,7 +136,9 @@ def initialize(self) -> None:
             integration_registry.activate_integrations()
 
             # Build parameter model
-            self._params_model = build_params_model_from_snapshot(self.snapshot, strict=True)
+            self._params_model = build_params_model_from_snapshot(
+                snapshot=self.snapshot,
+            )
 
             # Initialize orchestrator
             self._orchestrator = SharedLocalOrchestrator(
@@ -208,11 +210,12 @@ def execute_pipeline(
 
         placeholder_run: Optional[PipelineRunResponse] = None
         try:
-            placeholder_run = self._prepare_execute_with_orchestrator()
-
             # Execute pipeline and get runtime outputs captured internally
-            captured_outputs = self._execute_with_orchestrator(
-                placeholder_run, parameters, request.use_in_memory
+            placeholder_run, captured_outputs = (
+                self._execute_with_orchestrator(
+                    resolved_params=parameters,
+                    use_in_memory=request.use_in_memory,
+                )
             )
 
             # Map outputs using fast (in-memory) or slow (artifact) path
@@ -327,19 +330,17 @@ def _map_outputs(
 
     def _execute_with_orchestrator(
         self,
-        placeholder_run: PipelineRunResponse,
         resolved_params: Dict[str, Any],
         use_in_memory: bool,
-    ) -> Optional[Dict[str, Dict[str, Any]]]:
+    ) -> Tuple[PipelineRunResponse, Optional[Dict[str, Dict[str, Any]]]]:
         """Run the snapshot via the orchestrator and return the concrete run.
 
         Args:
-            placeholder_run: The placeholder run to execute the pipeline on.
             resolved_params: Normalized pipeline parameters.
             use_in_memory: Whether runtime should capture in-memory outputs.
 
         Returns:
-            The in-memory outputs of the pipeline execution.
+            A tuple of (placeholder_run, in-memory outputs of the execution).
 
         Raises:
             RuntimeError: If the orchestrator has not been initialized.
@@ -400,9 +401,7 @@ def _execute_with_orchestrator(
         finally:
             # Always stop deployment runtime context
             runtime.stop()
-
-        # Store captured outputs for the caller to use
-        return captured_outputs
+        return placeholder_run, captured_outputs
 
     def _execute_init_hook(self) -> None:
         """Execute init hook if present.
diff --git a/src/zenml/orchestrators/output_utils.py b/src/zenml/orchestrators/output_utils.py
@@ -58,14 +58,19 @@ def generate_artifact_uri(
 
 
 def prepare_output_artifact_uris(
-    step_run: "StepRunResponse", stack: "Stack", step: "Step"
+    step_run: "StepRunResponse",
+    stack: "Stack",
+    step: "Step",
+    *,
+    create_dirs: bool = True,
 ) -> Dict[str, str]:
     """Prepares the output artifact URIs to run the current step.
 
     Args:
         step_run: The step run for which to prepare the artifact URIs.
         stack: The stack on which the pipeline is running.
         step: The step configuration.
+        create_dirs: Whether to pre-create directories in the artifact store.
 
     Raises:
         RuntimeError: If an artifact URI already exists.
@@ -75,18 +80,43 @@ def prepare_output_artifact_uris(
     """
     artifact_store = stack.artifact_store
     output_artifact_uris: Dict[str, str] = {}
+
     for output_name in step.config.outputs.keys():
         substituted_output_name = string_utils.format_name_template(
             output_name, substitutions=step_run.config.substitutions
         )
-        artifact_uri = generate_artifact_uri(
-            artifact_store=stack.artifact_store,
-            step_run=step_run,
-            output_name=substituted_output_name,
-        )
-        if artifact_store.exists(artifact_uri):
-            raise RuntimeError("Artifact already exists")
-        artifact_store.makedirs(artifact_uri)
+        if create_dirs:
+            artifact_uri = generate_artifact_uri(
+                artifact_store=artifact_store,
+                step_run=step_run,
+                output_name=substituted_output_name,
+            )
+        else:
+            # Produce a clear in-memory URI that doesn't point to the store.
+            sanitized_output = substituted_output_name
+            for banned_character in [
+                "<",
+                ">",
+                ":",
+                '"',
+                "/",
+                "\\",
+                "|",
+                "?",
+                "*",
+            ]:
+                sanitized_output = sanitized_output.replace(
+                    banned_character, "_"
+                )
+            artifact_uri = (
+                f"memory://{step_run.name}/{sanitized_output}/"
+                f"{step_run.id}/{str(uuid4())[:8]}"
+            )
+
+        if create_dirs:
+            if artifact_store.exists(artifact_uri):
+                raise RuntimeError("Artifact already exists")
+            artifact_store.makedirs(artifact_uri)
         output_artifact_uris[output_name] = artifact_uri
     return output_artifact_uris
 
diff --git a/src/zenml/orchestrators/step_launcher.py b/src/zenml/orchestrators/step_launcher.py
@@ -26,6 +26,7 @@
     ENV_ZENML_STEP_OPERATOR,
     handle_bool_env_var,
 )
+from zenml.deployers.server import runtime
 from zenml.enums import ExecutionMode, ExecutionStatus
 from zenml.environment import get_run_environment_dict
 from zenml.exceptions import RunInterruptedException, RunStoppedException
@@ -438,7 +439,10 @@ def _run_step(
         )
 
         output_artifact_uris = output_utils.prepare_output_artifact_uris(
-            step_run=step_run, stack=self._stack, step=self._step
+            step_run=step_run,
+            stack=self._stack,
+            step=self._step,
+            create_dirs=not runtime.should_use_in_memory_mode(),
         )
 
         start_time = time.time()
diff --git a/src/zenml/orchestrators/utils.py b/src/zenml/orchestrators/utils.py
@@ -434,8 +434,27 @@ def deployment_snapshot_request_from_source_snapshot(
 
     if source_snapshot.stack is None:
         raise ValueError("Source snapshot stack is None")
-    if source_snapshot.pipeline is None:
-        raise ValueError("Source snapshot pipeline is None")
+
+    # Update the pipeline spec parameters by overriding only known keys
+    updated_pipeline_spec = source_snapshot.pipeline_spec
+    try:
+        if (
+            source_snapshot.pipeline_spec
+            and source_snapshot.pipeline_spec.parameters is not None
+        ):
+            original_params: Dict[str, Any] = dict(
+                source_snapshot.pipeline_spec.parameters
+            )
+            merged_params: Dict[str, Any] = original_params.copy()
+            for k, v in deployment_parameters.items():
+                if k in original_params:
+                    merged_params[k] = v
+            updated_pipeline_spec = pydantic_utils.update_model(
+                source_snapshot.pipeline_spec, {"parameters": merged_params}
+            )
+    except Exception:
+        # In case of any unforeseen errors, fall back to the original spec
+        updated_pipeline_spec = source_snapshot.pipeline_spec
 
     return PipelineSnapshotRequest(
         project=source_snapshot.project_id,
@@ -454,5 +473,5 @@ def deployment_snapshot_request_from_source_snapshot(
         template=template_id,
         source_snapshot=source_snapshot_id,
         pipeline_version_hash=source_snapshot.pipeline_version_hash,
-        pipeline_spec=source_snapshot.pipeline_spec,
+        pipeline_spec=updated_pipeline_spec,
     )

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@`
`26`	`26`	`ENV_ZENML_STEP_OPERATOR,`
`27`	`27`	`handle_bool_env_var,`
`28`	`28`	`)`
	`29`	`+from zenml.deployers.server import runtime`
`29`	`30`	`from zenml.enums import ExecutionMode, ExecutionStatus`
`30`	`31`	`from zenml.environment import get_run_environment_dict`
`31`	`32`	`from zenml.exceptions import RunInterruptedException, RunStoppedException`
`@@ -438,7 +439,10 @@ def _run_step(`
`438`	`439`	`)`
`439`	`440`
`440`	`441`	`output_artifact_uris = output_utils.prepare_output_artifact_uris(`
`441`		`- step_run=step_run, stack=self._stack, step=self._step`
	`442`	`+ step_run=step_run,`
	`443`	`+ stack=self._stack,`
	`444`	`+ step=self._step,`
	`445`	`+ create_dirs=not runtime.should_use_in_memory_mode(),`
`442`	`446`	`)`
`443`	`447`
`444`	`448`	`start_time = time.time()`