Feature:4015 Documentation update

Json-Andriopoulos · Json-Andriopoulos · commit f42ceba9eb31 · 2025-10-23T12:31:33.000+03:00
diff --git a/docs/book/how-to/metadata/metadata.md b/docs/book/how-to/metadata/metadata.md
@@ -195,6 +195,99 @@ log_metadata(
 )
 ```
 
+## Bulk Metadata Logging
+
+The log_metadata function does not support logging the same metadata for multiple entities of the same type simultaneously. To achieve this, you can use the bulk_log_metadata function:
+
+~~~python
+from zenml.models import (
+    PipelineRunIdentifier, StepRunIdentifier, VersionedIdentifier,
+)
+from zenml import bulk_log_metadata
+
+bulk_log_metadata(
+    metadata={"python_version": "3.11", "environment": "macosx"},
+    pipeline_runs=[
+        PipelineRunIdentifier(id="<run_id>"),
+        PipelineRunIdentifier(name="run name")
+    ],
+    step_runs=[
+        StepRunIdentifier(id="<step_run_id>"),
+        StepRunIdentifier(name="<step_name>", pipeline=PipelineRunIdentifier(id="<run_id>"))
+    ],
+    artifact_versions=[
+        VersionedIdentifier(id="<artifact_version_id>"),
+        VersionedIdentifier(name="artifact_name", version="artifact_version")
+    ],
+    model_versions=[
+        VersionedIdentifier(id="<model_version_id>"),
+        VersionedIdentifier(name="model_name", version="model_version")
+    ]
+)
+
+~~~
+
+Note that the bulk_log_metadata function has a slightly different signature compared to log_metadata.
+You can use the Identifier class objects to specify any parameter combination that uniquely identifies an object:
+
+* VersionedIdentifier
+  * Specify either an id or a combination of name and version.
+  * Used for artifact and model versions.
+* PipelineRunIdentifier
+  * Specify an id, name, or prefix.
+* StepRunIdentifier
+  * Specify an id or a combination of name and a pipeline run identifier.
+
+Similar to the log_metadata function, if you are calling bulk_log_metadata from within a step, you can use the infer options to automatically log metadata for the step’s model version or artifacts:
+
+```python
+from zenml import bulk_log_metadata, step
+
+@step()
+def get_train_test_datasets():
+    
+    train_dataset, test_dataset = get_datasets()
+
+    bulk_log_metadata(
+        metadata={"python_version": "3.11", "environment": "macosx"},
+        infer_models=True,
+        infer_artifacts=True
+    )
+    
+    return train_dataset, test_dataset
+```
+
+Keep in mind that when using the infer_artifacts option, the bulk_log_metadata function logs metadata to all output artifacts of the step.
+
+## Performance improvements hints
+
+Both `log_metadata` and `bulk_log_metadata` internally use parameters such as name and version to resolve the actual IDs of entities.
+For example, when you provide an artifact's name and version, the function performs an additional lookup to resolve the artifact version ID.
+
+To improve performance, prefer using the entity's ID directly instead of its name, version, or other identifiers whenever possible.
+
+## Using the client directly
+
+If the `log_metadata` or `bulk_log_metadata` functions are too restrictive for your use case, you can use the ZenML Client directly to create run metadata for resources:
+
+~~~python
+from zenml.client import Client
+from zenml.enums import MetadataResourceTypes
+from zenml.models import RunMetadataResource
+
+client = Client()
+
+client.create_run_metadata(
+    metadata={"python": "3.11"},
+    resources=[
+        RunMetadataResource(id="<step_run_id>", type=MetadataResourceTypes.STEP_RUN),
+        RunMetadataResource(id="<run_id>", type=MetadataResourceTypes.PIPELINE_RUN),
+        RunMetadataResource(id="<artifact_version_id>", type=MetadataResourceTypes.ARTIFACT_VERSION),
+        RunMetadataResource(id="<model_version_id>", type=MetadataResourceTypes.MODEL_VERSION)
+    ]
+)
+~~~
+
 ## Special Metadata Types
 
 ZenML includes several special metadata types that provide standardized ways to represent common metadata:
diff --git a/src/zenml/__init__.py b/src/zenml/__init__.py
@@ -59,7 +59,7 @@ def __getattr__(name: str) -> Any:
 from zenml.pipelines import get_pipeline_context, pipeline
 from zenml.steps import step, get_step_context
 from zenml.steps.utils import log_step_metadata
-from zenml.utils.metadata_utils import log_metadata
+from zenml.utils.metadata_utils import log_metadata, bulk_log_metadata
 from zenml.utils.tag_utils import Tag, add_tags, remove_tags
 
 
@@ -73,6 +73,7 @@ def __getattr__(name: str) -> Any:
     "get_step_context",
     "load_artifact",
     "log_metadata",
+    "bulk_log_metadata",
     "log_artifact_metadata",
     "log_model_metadata",
     "log_step_metadata",
diff --git a/src/zenml/models/v2/misc/run_metadata.py b/src/zenml/models/v2/misc/run_metadata.py
@@ -29,7 +29,7 @@ class RunMetadataResource(BaseModel):
     id: UUID = Field(title="The ID of the resource.")
     type: MetadataResourceTypes = Field(title="The type of the resource.")
 
-    def __eq__(self, other: Any):
+    def __eq__(self, other: Any) -> bool:
         """Overrides equality operator.
 
         Args:
diff --git a/src/zenml/utils/metadata_utils.py b/src/zenml/utils/metadata_utils.py
@@ -455,7 +455,7 @@ def bulk_log_metadata(
     # resolve step runs and add metadata resources
 
     for step in step_runs or []:
-        if not step.id:
+        if not step.id and (step.name and step.pipeline):
             step.id = (
                 client.get_pipeline_run(name_id_or_prefix=step.pipeline.value)
                 .steps[step.name]
@@ -471,7 +471,7 @@ def bulk_log_metadata(
     # resolve artifacts and add metadata resources
 
     for artifact_version in artifact_versions or []:
-        if not artifact_version.id:
+        if not artifact_version.id and (artifact_version.name and artifact_version.version):
             artifact_version.id = client.get_artifact_version(
                 name_id_or_prefix=artifact_version.name,
                 version=artifact_version.version,
@@ -499,11 +499,11 @@ def bulk_log_metadata(
 
     # infer models - resolve from step context
 
-    if infer_models and not step_context.model_version:
+    if infer_models and step_context and not step_context.model_version:
         raise ValueError(
             "The step context does not feature any model versions."
         )
-    elif infer_models:
+    elif infer_models and step_context and step_context.model_version:
         resources.add(
             RunMetadataResource(
                 id=step_context.model_version.id,
@@ -513,7 +513,7 @@ def bulk_log_metadata(
 
     # infer artifacts - resolve from step context
 
-    if infer_artifacts:
+    if infer_artifacts and step_context:
         step_output_names = list(step_context._outputs.keys())
 
         for artifact_name in step_output_names: