Feature:4015 Artifact/Model specific versioned identifiers

Json-Andriopoulos · Json-Andriopoulos · commit 9327944ace50 · 2025-10-23T12:31:33.000+03:00
diff --git a/docs/book/how-to/metadata/metadata.md b/docs/book/how-to/metadata/metadata.md
@@ -197,11 +197,14 @@ log_metadata(
 
 ## Bulk Metadata Logging
 
-The log_metadata function does not support logging the same metadata for multiple entities of the same type simultaneously. To achieve this, you can use the bulk_log_metadata function:
+The `log_metadata` function does not support logging the same metadata for multiple entities simultaneously. To achieve this, you can use the `bulk_log_metadata` function:
 
 ~~~python
 from zenml.models import (
-    PipelineRunIdentifier, StepRunIdentifier, VersionedIdentifier,
+    ArtifactVersionIdentifier,
+    ModelVersionIdentifier,
+    PipelineRunIdentifier,
+    StepRunIdentifier,
 )
 from zenml import bulk_log_metadata
 
@@ -213,32 +216,32 @@ bulk_log_metadata(
     ],
     step_runs=[
         StepRunIdentifier(id="<step_run_id>"),
-        StepRunIdentifier(name="<step_name>", pipeline=PipelineRunIdentifier(id="<run_id>"))
+        StepRunIdentifier(name="<step_name>", run=PipelineRunIdentifier(id="<run_id>"))
     ],
     artifact_versions=[
-        VersionedIdentifier(id="<artifact_version_id>"),
-        VersionedIdentifier(name="artifact_name", version="artifact_version")
+        ArtifactVersionIdentifier(id="<artifact_version_id>"),
+        ArtifactVersionIdentifier(name="artifact_name", version="artifact_version")
     ],
     model_versions=[
-        VersionedIdentifier(id="<model_version_id>"),
-        VersionedIdentifier(name="model_name", version="model_version")
+        ModelVersionIdentifier(id="<model_version_id>"),
+        ModelVersionIdentifier(name="model_name", version="model_version")
     ]
 )
 
 ~~~
 
-Note that the bulk_log_metadata function has a slightly different signature compared to log_metadata.
+Note that the `bulk_log_metadata` function has a slightly different signature compared to `log_metadata`.
 You can use the Identifier class objects to specify any parameter combination that uniquely identifies an object:
 
-* VersionedIdentifier
+* VersionedIdentifiers
+  * ArtifactVersionIdentifier & ModelVersionIdentifier
   * Specify either an id or a combination of name and version.
-  * Used for artifact and model versions.
 * PipelineRunIdentifier
   * Specify an id, name, or prefix.
 * StepRunIdentifier
   * Specify an id or a combination of name and a pipeline run identifier.
 
-Similar to the log_metadata function, if you are calling bulk_log_metadata from within a step, you can use the infer options to automatically log metadata for the step’s model version or artifacts:
+Similar to the `log_metadata` function, if you are calling `bulk_log_metadata` from within a step, you can use the infer options to automatically log metadata for the step’s model version or artifacts:
 
 ```python
 from zenml import bulk_log_metadata, step
@@ -257,16 +260,16 @@ def get_train_test_datasets():
     return train_dataset, test_dataset
 ```
 
-Keep in mind that when using the infer_artifacts option, the bulk_log_metadata function logs metadata to all output artifacts of the step.
+Keep in mind that when using the `infer_artifacts` option, the `bulk_log_metadata` function logs metadata to all output artifacts of the step.
 
-## Performance improvements hints
+### Performance improvements hints
 
 Both `log_metadata` and `bulk_log_metadata` internally use parameters such as name and version to resolve the actual IDs of entities.
 For example, when you provide an artifact's name and version, the function performs an additional lookup to resolve the artifact version ID.
 
 To improve performance, prefer using the entity's ID directly instead of its name, version, or other identifiers whenever possible.
 
-## Using the client directly
+### Using the client directly
 
 If the `log_metadata` or `bulk_log_metadata` functions are too restrictive for your use case, you can use the ZenML Client directly to create run metadata for resources:
 
diff --git a/src/zenml/models/__init__.py b/src/zenml/models/__init__.py
@@ -323,7 +323,8 @@
 from zenml.models.v2.misc.param_groups import (
     PipelineRunIdentifier,
     StepRunIdentifier,
-    VersionedIdentifier,
+    ArtifactVersionIdentifier,
+    ModelVersionIdentifier,
 )
 from zenml.models.v2.misc.statistics import (
     ProjectStatistics,
@@ -879,7 +880,8 @@
     "ProjectStatistics",
     "PipelineRunDAG",
     "ExceptionInfo",
-    "VersionedIdentifier",
     "PipelineRunIdentifier",
     "StepRunIdentifier",
+    "ArtifactVersionIdentifier",
+    "ModelVersionIdentifier",
 ]
diff --git a/src/zenml/models/v2/misc/param_groups.py b/src/zenml/models/v2/misc/param_groups.py
@@ -1,4 +1,4 @@
-#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#  Copyright (c) ZenML GmbH 2025. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -45,6 +45,18 @@ def _validate_options(self) -> "VersionedIdentifier":
         return self
 
 
+class ArtifactVersionIdentifier(VersionedIdentifier):
+    """Class for artifact version identifier group."""
+
+    pass
+
+
+class ModelVersionIdentifier(VersionedIdentifier):
+    """Class for model version identifier group."""
+
+    pass
+
+
 class PipelineRunIdentifier(BaseModel):
     """Class grouping different pipeline run identifiers."""
 
@@ -89,7 +101,7 @@ class StepRunIdentifier(BaseModel):
 
     id: UUID | None = None
     name: str | None = None
-    pipeline: PipelineRunIdentifier | None = None
+    run: PipelineRunIdentifier | None = None
 
     @model_validator(mode="after")
     def _validate_options(self) -> "StepRunIdentifier":
@@ -105,9 +117,9 @@ def _validate_options(self) -> "StepRunIdentifier":
                 "Use either id or name."
             )
 
-        if bool(self.name) ^ bool(self.pipeline):
+        if bool(self.name) ^ bool(self.run):
             raise ValueError(
-                "To identify a run by name you need to specify a pipeline run identifier."
+                "To identify a step run by name you need to specify a pipeline run identifier."
             )
 
         return self
diff --git a/src/zenml/models/v2/misc/run_metadata.py b/src/zenml/models/v2/misc/run_metadata.py
@@ -36,13 +36,11 @@ def __eq__(self, other: Any) -> bool:
             other: The object to compare.
 
         Returns:
-            True if the object is equal to the given object.
+            True if the object is equal to the given object. Will always return False if compared to a different type.
 
-        Raises:
-            TypeError: If the object is not an instance of RunMetadataResource.
         """
         if not isinstance(other, RunMetadataResource):
-            raise TypeError(f"Expected RunMetadataResource, got {type(other)}")
+            return False
 
         return hash(other) == hash(self)
 
diff --git a/src/zenml/utils/metadata_utils.py b/src/zenml/utils/metadata_utils.py
@@ -21,10 +21,11 @@
 from zenml.logger import get_logger
 from zenml.metadata.metadata_types import MetadataType
 from zenml.models import (
+    ArtifactVersionIdentifier,
+    ModelVersionIdentifier,
     PipelineRunIdentifier,
     RunMetadataResource,
     StepRunIdentifier,
-    VersionedIdentifier,
 )
 from zenml.steps.step_context import get_step_context
 
@@ -377,8 +378,8 @@ def bulk_log_metadata(
     metadata: Dict[str, MetadataType],
     pipeline_runs: list[PipelineRunIdentifier] | None = None,
     step_runs: list[StepRunIdentifier] | None = None,
-    artifact_versions: list[VersionedIdentifier] | None = None,
-    model_versions: list[VersionedIdentifier] | None = None,
+    artifact_versions: list[ArtifactVersionIdentifier] | None = None,
+    model_versions: list[ModelVersionIdentifier] | None = None,
     infer_models: bool = False,
     infer_artifacts: bool = False,
 ) -> None:
@@ -416,7 +417,7 @@ def bulk_log_metadata(
         ]
     ):
         raise ValueError(
-            "You must select at least one pipeline/step/artifact/model to log metadata to."
+            "You must select at least one entity to log metadata to."
         )
 
     if infer_models and model_versions:
@@ -441,23 +442,21 @@ def bulk_log_metadata(
 
     # resolve pipeline runs and add metadata resources
 
-    for pipeline in pipeline_runs or []:
-        if not pipeline.id:
-            pipeline.id = client.get_pipeline_run(
-                name_id_or_prefix=pipeline.value
-            ).id
+    for run in pipeline_runs or []:
+        if not run.id:
+            run.id = client.get_pipeline_run(name_id_or_prefix=run.value).id
         resources.add(
             RunMetadataResource(
-                id=pipeline.id, type=MetadataResourceTypes.PIPELINE_RUN
+                id=run.id, type=MetadataResourceTypes.PIPELINE_RUN
             )
         )
 
     # resolve step runs and add metadata resources
 
     for step in step_runs or []:
-        if not step.id and (step.name and step.pipeline):
+        if not step.id and (step.name and step.run):
             step.id = (
-                client.get_pipeline_run(name_id_or_prefix=step.pipeline.value)
+                client.get_pipeline_run(name_id_or_prefix=step.run.value)
                 .steps[step.name]
                 .id
             )
@@ -471,7 +470,9 @@ def bulk_log_metadata(
     # resolve artifacts and add metadata resources
 
     for artifact_version in artifact_versions or []:
-        if not artifact_version.id and (artifact_version.name and artifact_version.version):
+        if not artifact_version.id and (
+            artifact_version.name and artifact_version.version
+        ):
             artifact_version.id = client.get_artifact_version(
                 name_id_or_prefix=artifact_version.name,
                 version=artifact_version.version,
diff --git a/tests/unit/models/test_param_groups.py b/tests/unit/models/test_param_groups.py
@@ -4,30 +4,34 @@
 from pydantic import ValidationError
 
 from zenml.models import (
+    ArtifactVersionIdentifier,
+    ModelVersionIdentifier,
     PipelineRunIdentifier,
     StepRunIdentifier,
-    VersionedIdentifier,
 )
 
 
 def test_versioned_identifier_validators():
-    v_id = VersionedIdentifier(id=uuid4())
-    assert v_id.id is not None and v_id.name is None and v_id.version is None
+    for cls in [ArtifactVersionIdentifier, ModelVersionIdentifier]:
+        v_id = cls(id=uuid4())
+        assert (
+            v_id.id is not None and v_id.name is None and v_id.version is None
+        )
 
-    v_nv = VersionedIdentifier(id=None, name="artifact", version="1.2.3")
-    assert v_nv.name == "artifact" and v_nv.version == "1.2.3"
+        v_nv = cls(id=None, name="artifact", version="1.2.3")
+        assert v_nv.name == "artifact" and v_nv.version == "1.2.3"
 
-    with pytest.raises(ValidationError):
-        VersionedIdentifier(id=uuid4(), name="artifact", version="1.0")
+        with pytest.raises(ValidationError):
+            cls(id=uuid4(), name="artifact", version="1.0")
 
-    with pytest.raises(ValidationError):
-        VersionedIdentifier(id=None, name=None, version=None)
+        with pytest.raises(ValidationError):
+            cls(id=None, name=None, version=None)
 
-    with pytest.raises(ValidationError):
-        VersionedIdentifier(name="artifact")
+        with pytest.raises(ValidationError):
+            cls(name="artifact")
 
-    with pytest.raises(ValidationError):
-        VersionedIdentifier(version="1.0.0")
+        with pytest.raises(ValidationError):
+            cls(version="1.0.0")
 
 
 def test_pipeline_run_identifier_validators():
@@ -63,30 +67,29 @@ def test_pipeline_run_identifier_validators():
 
 
 def test_step_run_identifier_validators():
-    s_id_only = StepRunIdentifier(id=uuid4(), name="", pipeline=None)
-    assert (
-        s_id_only.id is not None
-        and s_id_only.name == ""
-        and s_id_only.pipeline is None
-    )
+    id_ = uuid4()
+
+    s_id_only = StepRunIdentifier(id=id_)
+
+    assert s_id_only.id == id_
 
     run_ident = PipelineRunIdentifier(id=None, name="nightly", prefix=None)
 
     s_name_with_pipeline = StepRunIdentifier(
-        id=None, name="load_data", pipeline=run_ident
+        id=None, name="load_data", run=run_ident
     )
     assert s_name_with_pipeline.id is None
     assert s_name_with_pipeline.name == "load_data"
-    assert isinstance(s_name_with_pipeline.pipeline, PipelineRunIdentifier)
+    assert isinstance(s_name_with_pipeline.run, PipelineRunIdentifier)
 
     with pytest.raises(ValidationError):
-        StepRunIdentifier(id=uuid4(), name="transform", pipeline=run_ident)
+        StepRunIdentifier(id=uuid4(), name="transform", run=run_ident)
 
     with pytest.raises(ValidationError):
-        StepRunIdentifier(id=None, name="", pipeline=None)
+        StepRunIdentifier(id=None, name="")
 
     with pytest.raises(ValidationError):
-        StepRunIdentifier(id=None, name="train_model", pipeline=None)
+        StepRunIdentifier(id=None, name="train_model")
 
     with pytest.raises(ValidationError):
-        StepRunIdentifier(id=None, name="", pipeline=run_ident)
+        StepRunIdentifier(id=None, run=run_ident)
diff --git a/tests/unit/models/test_run_metadata.py b/tests/unit/models/test_run_metadata.py
@@ -1,7 +1,5 @@
 from uuid import uuid4
 
-import pytest
-
 from zenml.enums import MetadataResourceTypes
 from zenml.models import RunMetadataResource
 
@@ -31,8 +29,7 @@ def test_run_metadata_resource_equality_and_hash():
     )
     assert hash(r1) != hash(r4), "Hashes should differ for different types"
 
-    with pytest.raises(TypeError):
-        _ = r1 == "not_a_resource"
+    assert r1 != 1
 
     s = {r1, r2, r3}
     assert len(s) == 2, "Set should treat r1 and r2 as the same object"
diff --git a/tests/unit/utils/test_metadata_utils.py b/tests/unit/utils/test_metadata_utils.py