Skip to content

Commit f42ceba

Browse files
Feature:4015 Documentation update
1 parent 97a2db0 commit f42ceba

File tree

4 files changed

+101
-7
lines changed

4 files changed

+101
-7
lines changed

docs/book/how-to/metadata/metadata.md

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,99 @@ log_metadata(
195195
)
196196
```
197197

198+
## Bulk Metadata Logging
199+
200+
The log_metadata function does not support logging the same metadata for multiple entities of the same type simultaneously. To achieve this, you can use the bulk_log_metadata function:
201+
202+
~~~python
203+
from zenml.models import (
204+
PipelineRunIdentifier, StepRunIdentifier, VersionedIdentifier,
205+
)
206+
from zenml import bulk_log_metadata
207+
208+
bulk_log_metadata(
209+
metadata={"python_version": "3.11", "environment": "macosx"},
210+
pipeline_runs=[
211+
PipelineRunIdentifier(id="<run_id>"),
212+
PipelineRunIdentifier(name="run name")
213+
],
214+
step_runs=[
215+
StepRunIdentifier(id="<step_run_id>"),
216+
StepRunIdentifier(name="<step_name>", pipeline=PipelineRunIdentifier(id="<run_id>"))
217+
],
218+
artifact_versions=[
219+
VersionedIdentifier(id="<artifact_version_id>"),
220+
VersionedIdentifier(name="artifact_name", version="artifact_version")
221+
],
222+
model_versions=[
223+
VersionedIdentifier(id="<model_version_id>"),
224+
VersionedIdentifier(name="model_name", version="model_version")
225+
]
226+
)
227+
228+
~~~
229+
230+
Note that the bulk_log_metadata function has a slightly different signature compared to log_metadata.
231+
You can use the Identifier class objects to specify any parameter combination that uniquely identifies an object:
232+
233+
* VersionedIdentifier
234+
* Specify either an id or a combination of name and version.
235+
* Used for artifact and model versions.
236+
* PipelineRunIdentifier
237+
* Specify an id, name, or prefix.
238+
* StepRunIdentifier
239+
* Specify an id or a combination of name and a pipeline run identifier.
240+
241+
Similar to the log_metadata function, if you are calling bulk_log_metadata from within a step, you can use the infer options to automatically log metadata for the step’s model version or artifacts:
242+
243+
```python
244+
from zenml import bulk_log_metadata, step
245+
246+
@step()
247+
def get_train_test_datasets():
248+
249+
train_dataset, test_dataset = get_datasets()
250+
251+
bulk_log_metadata(
252+
metadata={"python_version": "3.11", "environment": "macosx"},
253+
infer_models=True,
254+
infer_artifacts=True
255+
)
256+
257+
return train_dataset, test_dataset
258+
```
259+
260+
Keep in mind that when using the infer_artifacts option, the bulk_log_metadata function logs metadata to all output artifacts of the step.
261+
262+
## Performance improvements hints
263+
264+
Both `log_metadata` and `bulk_log_metadata` internally use parameters such as name and version to resolve the actual IDs of entities.
265+
For example, when you provide an artifact's name and version, the function performs an additional lookup to resolve the artifact version ID.
266+
267+
To improve performance, prefer using the entity's ID directly instead of its name, version, or other identifiers whenever possible.
268+
269+
## Using the client directly
270+
271+
If the `log_metadata` or `bulk_log_metadata` functions are too restrictive for your use case, you can use the ZenML Client directly to create run metadata for resources:
272+
273+
~~~python
274+
from zenml.client import Client
275+
from zenml.enums import MetadataResourceTypes
276+
from zenml.models import RunMetadataResource
277+
278+
client = Client()
279+
280+
client.create_run_metadata(
281+
metadata={"python": "3.11"},
282+
resources=[
283+
RunMetadataResource(id="<step_run_id>", type=MetadataResourceTypes.STEP_RUN),
284+
RunMetadataResource(id="<run_id>", type=MetadataResourceTypes.PIPELINE_RUN),
285+
RunMetadataResource(id="<artifact_version_id>", type=MetadataResourceTypes.ARTIFACT_VERSION),
286+
RunMetadataResource(id="<model_version_id>", type=MetadataResourceTypes.MODEL_VERSION)
287+
]
288+
)
289+
~~~
290+
198291
## Special Metadata Types
199292

200293
ZenML includes several special metadata types that provide standardized ways to represent common metadata:

src/zenml/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __getattr__(name: str) -> Any:
5959
from zenml.pipelines import get_pipeline_context, pipeline
6060
from zenml.steps import step, get_step_context
6161
from zenml.steps.utils import log_step_metadata
62-
from zenml.utils.metadata_utils import log_metadata
62+
from zenml.utils.metadata_utils import log_metadata, bulk_log_metadata
6363
from zenml.utils.tag_utils import Tag, add_tags, remove_tags
6464

6565

@@ -73,6 +73,7 @@ def __getattr__(name: str) -> Any:
7373
"get_step_context",
7474
"load_artifact",
7575
"log_metadata",
76+
"bulk_log_metadata",
7677
"log_artifact_metadata",
7778
"log_model_metadata",
7879
"log_step_metadata",

src/zenml/models/v2/misc/run_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class RunMetadataResource(BaseModel):
2929
id: UUID = Field(title="The ID of the resource.")
3030
type: MetadataResourceTypes = Field(title="The type of the resource.")
3131

32-
def __eq__(self, other: Any):
32+
def __eq__(self, other: Any) -> bool:
3333
"""Overrides equality operator.
3434
3535
Args:

src/zenml/utils/metadata_utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def bulk_log_metadata(
455455
# resolve step runs and add metadata resources
456456

457457
for step in step_runs or []:
458-
if not step.id:
458+
if not step.id and (step.name and step.pipeline):
459459
step.id = (
460460
client.get_pipeline_run(name_id_or_prefix=step.pipeline.value)
461461
.steps[step.name]
@@ -471,7 +471,7 @@ def bulk_log_metadata(
471471
# resolve artifacts and add metadata resources
472472

473473
for artifact_version in artifact_versions or []:
474-
if not artifact_version.id:
474+
if not artifact_version.id and (artifact_version.name and artifact_version.version):
475475
artifact_version.id = client.get_artifact_version(
476476
name_id_or_prefix=artifact_version.name,
477477
version=artifact_version.version,
@@ -499,11 +499,11 @@ def bulk_log_metadata(
499499

500500
# infer models - resolve from step context
501501

502-
if infer_models and not step_context.model_version:
502+
if infer_models and step_context and not step_context.model_version:
503503
raise ValueError(
504504
"The step context does not feature any model versions."
505505
)
506-
elif infer_models:
506+
elif infer_models and step_context and step_context.model_version:
507507
resources.add(
508508
RunMetadataResource(
509509
id=step_context.model_version.id,
@@ -513,7 +513,7 @@ def bulk_log_metadata(
513513

514514
# infer artifacts - resolve from step context
515515

516-
if infer_artifacts:
516+
if infer_artifacts and step_context:
517517
step_output_names = list(step_context._outputs.keys())
518518

519519
for artifact_name in step_output_names:

0 commit comments

Comments
 (0)