Skip to content

Commit 9327944

Browse files
Feature:4015 Artifact/Model specific versioned identifiers
1 parent f42ceba commit 9327944

File tree

8 files changed

+92
-75
lines changed

8 files changed

+92
-75
lines changed

docs/book/how-to/metadata/metadata.md

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,14 @@ log_metadata(
197197

198198
## Bulk Metadata Logging
199199

200-
The log_metadata function does not support logging the same metadata for multiple entities of the same type simultaneously. To achieve this, you can use the bulk_log_metadata function:
200+
The `log_metadata` function does not support logging the same metadata for multiple entities simultaneously. To achieve this, you can use the `bulk_log_metadata` function:
201201

202202
~~~python
203203
from zenml.models import (
204-
PipelineRunIdentifier, StepRunIdentifier, VersionedIdentifier,
204+
ArtifactVersionIdentifier,
205+
ModelVersionIdentifier,
206+
PipelineRunIdentifier,
207+
StepRunIdentifier,
205208
)
206209
from zenml import bulk_log_metadata
207210

@@ -213,32 +216,32 @@ bulk_log_metadata(
213216
],
214217
step_runs=[
215218
StepRunIdentifier(id="<step_run_id>"),
216-
StepRunIdentifier(name="<step_name>", pipeline=PipelineRunIdentifier(id="<run_id>"))
219+
StepRunIdentifier(name="<step_name>", run=PipelineRunIdentifier(id="<run_id>"))
217220
],
218221
artifact_versions=[
219-
VersionedIdentifier(id="<artifact_version_id>"),
220-
VersionedIdentifier(name="artifact_name", version="artifact_version")
222+
ArtifactVersionIdentifier(id="<artifact_version_id>"),
223+
ArtifactVersionIdentifier(name="artifact_name", version="artifact_version")
221224
],
222225
model_versions=[
223-
VersionedIdentifier(id="<model_version_id>"),
224-
VersionedIdentifier(name="model_name", version="model_version")
226+
ModelVersionIdentifier(id="<model_version_id>"),
227+
ModelVersionIdentifier(name="model_name", version="model_version")
225228
]
226229
)
227230

228231
~~~
229232

230-
Note that the bulk_log_metadata function has a slightly different signature compared to log_metadata.
233+
Note that the `bulk_log_metadata` function has a slightly different signature compared to `log_metadata`.
231234
You can use the Identifier class objects to specify any parameter combination that uniquely identifies an object:
232235

233-
* VersionedIdentifier
236+
* VersionedIdentifiers
237+
* ArtifactVersionIdentifier & ModelVersionIdentifier
234238
* Specify either an id or a combination of name and version.
235-
* Used for artifact and model versions.
236239
* PipelineRunIdentifier
237240
* Specify an id, name, or prefix.
238241
* StepRunIdentifier
239242
* Specify an id or a combination of name and a pipeline run identifier.
240243

241-
Similar to the log_metadata function, if you are calling bulk_log_metadata from within a step, you can use the infer options to automatically log metadata for the step’s model version or artifacts:
244+
Similar to the `log_metadata` function, if you are calling `bulk_log_metadata` from within a step, you can use the infer options to automatically log metadata for the step’s model version or artifacts:
242245

243246
```python
244247
from zenml import bulk_log_metadata, step
@@ -257,16 +260,16 @@ def get_train_test_datasets():
257260
return train_dataset, test_dataset
258261
```
259262

260-
Keep in mind that when using the infer_artifacts option, the bulk_log_metadata function logs metadata to all output artifacts of the step.
263+
Keep in mind that when using the `infer_artifacts` option, the `bulk_log_metadata` function logs metadata to all output artifacts of the step.
261264

262-
## Performance improvements hints
265+
### Performance improvements hints
263266

264267
Both `log_metadata` and `bulk_log_metadata` internally use parameters such as name and version to resolve the actual IDs of entities.
265268
For example, when you provide an artifact's name and version, the function performs an additional lookup to resolve the artifact version ID.
266269

267270
To improve performance, prefer using the entity's ID directly instead of its name, version, or other identifiers whenever possible.
268271

269-
## Using the client directly
272+
### Using the client directly
270273

271274
If the `log_metadata` or `bulk_log_metadata` functions are too restrictive for your use case, you can use the ZenML Client directly to create run metadata for resources:
272275

src/zenml/models/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,8 @@
323323
from zenml.models.v2.misc.param_groups import (
324324
PipelineRunIdentifier,
325325
StepRunIdentifier,
326-
VersionedIdentifier,
326+
ArtifactVersionIdentifier,
327+
ModelVersionIdentifier,
327328
)
328329
from zenml.models.v2.misc.statistics import (
329330
ProjectStatistics,
@@ -879,7 +880,8 @@
879880
"ProjectStatistics",
880881
"PipelineRunDAG",
881882
"ExceptionInfo",
882-
"VersionedIdentifier",
883883
"PipelineRunIdentifier",
884884
"StepRunIdentifier",
885+
"ArtifactVersionIdentifier",
886+
"ModelVersionIdentifier",
885887
]

src/zenml/models/v2/misc/param_groups.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) ZenML GmbH 2024. All Rights Reserved.
1+
# Copyright (c) ZenML GmbH 2025. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -45,6 +45,18 @@ def _validate_options(self) -> "VersionedIdentifier":
4545
return self
4646

4747

48+
class ArtifactVersionIdentifier(VersionedIdentifier):
49+
"""Class for artifact version identifier group."""
50+
51+
pass
52+
53+
54+
class ModelVersionIdentifier(VersionedIdentifier):
55+
"""Class for model version identifier group."""
56+
57+
pass
58+
59+
4860
class PipelineRunIdentifier(BaseModel):
4961
"""Class grouping different pipeline run identifiers."""
5062

@@ -89,7 +101,7 @@ class StepRunIdentifier(BaseModel):
89101

90102
id: UUID | None = None
91103
name: str | None = None
92-
pipeline: PipelineRunIdentifier | None = None
104+
run: PipelineRunIdentifier | None = None
93105

94106
@model_validator(mode="after")
95107
def _validate_options(self) -> "StepRunIdentifier":
@@ -105,9 +117,9 @@ def _validate_options(self) -> "StepRunIdentifier":
105117
"Use either id or name."
106118
)
107119

108-
if bool(self.name) ^ bool(self.pipeline):
120+
if bool(self.name) ^ bool(self.run):
109121
raise ValueError(
110-
"To identify a run by name you need to specify a pipeline run identifier."
122+
"To identify a step run by name you need to specify a pipeline run identifier."
111123
)
112124

113125
return self

src/zenml/models/v2/misc/run_metadata.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,11 @@ def __eq__(self, other: Any) -> bool:
3636
other: The object to compare.
3737
3838
Returns:
39-
True if the object is equal to the given object.
39+
True if the object is equal to the given object. Will always return False if compared to a different type.
4040
41-
Raises:
42-
TypeError: If the object is not an instance of RunMetadataResource.
4341
"""
4442
if not isinstance(other, RunMetadataResource):
45-
raise TypeError(f"Expected RunMetadataResource, got {type(other)}")
43+
return False
4644

4745
return hash(other) == hash(self)
4846

src/zenml/utils/metadata_utils.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
from zenml.logger import get_logger
2222
from zenml.metadata.metadata_types import MetadataType
2323
from zenml.models import (
24+
ArtifactVersionIdentifier,
25+
ModelVersionIdentifier,
2426
PipelineRunIdentifier,
2527
RunMetadataResource,
2628
StepRunIdentifier,
27-
VersionedIdentifier,
2829
)
2930
from zenml.steps.step_context import get_step_context
3031

@@ -377,8 +378,8 @@ def bulk_log_metadata(
377378
metadata: Dict[str, MetadataType],
378379
pipeline_runs: list[PipelineRunIdentifier] | None = None,
379380
step_runs: list[StepRunIdentifier] | None = None,
380-
artifact_versions: list[VersionedIdentifier] | None = None,
381-
model_versions: list[VersionedIdentifier] | None = None,
381+
artifact_versions: list[ArtifactVersionIdentifier] | None = None,
382+
model_versions: list[ModelVersionIdentifier] | None = None,
382383
infer_models: bool = False,
383384
infer_artifacts: bool = False,
384385
) -> None:
@@ -416,7 +417,7 @@ def bulk_log_metadata(
416417
]
417418
):
418419
raise ValueError(
419-
"You must select at least one pipeline/step/artifact/model to log metadata to."
420+
"You must select at least one entity to log metadata to."
420421
)
421422

422423
if infer_models and model_versions:
@@ -441,23 +442,21 @@ def bulk_log_metadata(
441442

442443
# resolve pipeline runs and add metadata resources
443444

444-
for pipeline in pipeline_runs or []:
445-
if not pipeline.id:
446-
pipeline.id = client.get_pipeline_run(
447-
name_id_or_prefix=pipeline.value
448-
).id
445+
for run in pipeline_runs or []:
446+
if not run.id:
447+
run.id = client.get_pipeline_run(name_id_or_prefix=run.value).id
449448
resources.add(
450449
RunMetadataResource(
451-
id=pipeline.id, type=MetadataResourceTypes.PIPELINE_RUN
450+
id=run.id, type=MetadataResourceTypes.PIPELINE_RUN
452451
)
453452
)
454453

455454
# resolve step runs and add metadata resources
456455

457456
for step in step_runs or []:
458-
if not step.id and (step.name and step.pipeline):
457+
if not step.id and (step.name and step.run):
459458
step.id = (
460-
client.get_pipeline_run(name_id_or_prefix=step.pipeline.value)
459+
client.get_pipeline_run(name_id_or_prefix=step.run.value)
461460
.steps[step.name]
462461
.id
463462
)
@@ -471,7 +470,9 @@ def bulk_log_metadata(
471470
# resolve artifacts and add metadata resources
472471

473472
for artifact_version in artifact_versions or []:
474-
if not artifact_version.id and (artifact_version.name and artifact_version.version):
473+
if not artifact_version.id and (
474+
artifact_version.name and artifact_version.version
475+
):
475476
artifact_version.id = client.get_artifact_version(
476477
name_id_or_prefix=artifact_version.name,
477478
version=artifact_version.version,

tests/unit/models/test_param_groups.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,34 @@
44
from pydantic import ValidationError
55

66
from zenml.models import (
7+
ArtifactVersionIdentifier,
8+
ModelVersionIdentifier,
79
PipelineRunIdentifier,
810
StepRunIdentifier,
9-
VersionedIdentifier,
1011
)
1112

1213

1314
def test_versioned_identifier_validators():
14-
v_id = VersionedIdentifier(id=uuid4())
15-
assert v_id.id is not None and v_id.name is None and v_id.version is None
15+
for cls in [ArtifactVersionIdentifier, ModelVersionIdentifier]:
16+
v_id = cls(id=uuid4())
17+
assert (
18+
v_id.id is not None and v_id.name is None and v_id.version is None
19+
)
1620

17-
v_nv = VersionedIdentifier(id=None, name="artifact", version="1.2.3")
18-
assert v_nv.name == "artifact" and v_nv.version == "1.2.3"
21+
v_nv = cls(id=None, name="artifact", version="1.2.3")
22+
assert v_nv.name == "artifact" and v_nv.version == "1.2.3"
1923

20-
with pytest.raises(ValidationError):
21-
VersionedIdentifier(id=uuid4(), name="artifact", version="1.0")
24+
with pytest.raises(ValidationError):
25+
cls(id=uuid4(), name="artifact", version="1.0")
2226

23-
with pytest.raises(ValidationError):
24-
VersionedIdentifier(id=None, name=None, version=None)
27+
with pytest.raises(ValidationError):
28+
cls(id=None, name=None, version=None)
2529

26-
with pytest.raises(ValidationError):
27-
VersionedIdentifier(name="artifact")
30+
with pytest.raises(ValidationError):
31+
cls(name="artifact")
2832

29-
with pytest.raises(ValidationError):
30-
VersionedIdentifier(version="1.0.0")
33+
with pytest.raises(ValidationError):
34+
cls(version="1.0.0")
3135

3236

3337
def test_pipeline_run_identifier_validators():
@@ -63,30 +67,29 @@ def test_pipeline_run_identifier_validators():
6367

6468

6569
def test_step_run_identifier_validators():
66-
s_id_only = StepRunIdentifier(id=uuid4(), name="", pipeline=None)
67-
assert (
68-
s_id_only.id is not None
69-
and s_id_only.name == ""
70-
and s_id_only.pipeline is None
71-
)
70+
id_ = uuid4()
71+
72+
s_id_only = StepRunIdentifier(id=id_)
73+
74+
assert s_id_only.id == id_
7275

7376
run_ident = PipelineRunIdentifier(id=None, name="nightly", prefix=None)
7477

7578
s_name_with_pipeline = StepRunIdentifier(
76-
id=None, name="load_data", pipeline=run_ident
79+
id=None, name="load_data", run=run_ident
7780
)
7881
assert s_name_with_pipeline.id is None
7982
assert s_name_with_pipeline.name == "load_data"
80-
assert isinstance(s_name_with_pipeline.pipeline, PipelineRunIdentifier)
83+
assert isinstance(s_name_with_pipeline.run, PipelineRunIdentifier)
8184

8285
with pytest.raises(ValidationError):
83-
StepRunIdentifier(id=uuid4(), name="transform", pipeline=run_ident)
86+
StepRunIdentifier(id=uuid4(), name="transform", run=run_ident)
8487

8588
with pytest.raises(ValidationError):
86-
StepRunIdentifier(id=None, name="", pipeline=None)
89+
StepRunIdentifier(id=None, name="")
8790

8891
with pytest.raises(ValidationError):
89-
StepRunIdentifier(id=None, name="train_model", pipeline=None)
92+
StepRunIdentifier(id=None, name="train_model")
9093

9194
with pytest.raises(ValidationError):
92-
StepRunIdentifier(id=None, name="", pipeline=run_ident)
95+
StepRunIdentifier(id=None, run=run_ident)

tests/unit/models/test_run_metadata.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from uuid import uuid4
22

3-
import pytest
4-
53
from zenml.enums import MetadataResourceTypes
64
from zenml.models import RunMetadataResource
75

@@ -31,8 +29,7 @@ def test_run_metadata_resource_equality_and_hash():
3129
)
3230
assert hash(r1) != hash(r4), "Hashes should differ for different types"
3331

34-
with pytest.raises(TypeError):
35-
_ = r1 == "not_a_resource"
32+
assert r1 != 1
3633

3734
s = {r1, r2, r3}
3835
assert len(s) == 2, "Set should treat r1 and r2 as the same object"

0 commit comments

Comments
 (0)