Skip to content

Commit 3a46d39

Browse files
authored
Merge pull request #159 from DanielAvdar/docs
Refactor documentation and improve code clarity across modules.
2 parents 1bbd352 + 9c9352c commit 3a46d39

File tree

18 files changed

+436
-60
lines changed

18 files changed

+436
-60
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ test:
1212
coverage:
1313
poetry run pytest --cov=ml_orchestrator --cov-report=xml --junitxml=junit.xml -o junit_family=legacy
1414

15+
cov:
16+
poetry run pytest --cov=ml_orchestrator --cov-report=term-missing
17+
1518
check:
1619
poetry run pre-commit run --all-files
1720
mypy:

docs/source/usage.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ This guide provides a comprehensive walkthrough for utilizing the **ml-orchestra
77

88
Installation
99
------------
10-
To install the package, use `Poetry` with the following command:
10+
1111

1212
.. code-block:: bash
1313
14-
pip install ml-orchestrator
14+
pip install ml-orchestrator[editor]
1515
16-
The **ml-orchestrator** package has no external dependencies by default.
16+
.. note::
17+
The core **ml-orchestrator** package is intentionally designed to be dependency-free. This architectural decision ensures that components created with ml-orchestrator won't have unnecessary liabilities or dependencies, making them more portable and easier to maintain in production environments.
1718

1819
Quick Start
1920
-----------

ml_orchestrator/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""ML Orchestrator package for machine learning pipeline components management.
2+
3+
This package provides tools for creating, parsing, and managing machine learning
4+
pipeline components with a focus on Kubeflow Pipelines integration.
5+
"""
6+
17
from importlib.metadata import version
28

39
from .comp_parser import ComponentParser

ml_orchestrator/artifacts/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
"""Artifacts package for ML pipeline component inputs and outputs.
2+
3+
This package provides classes and types for representing and managing
4+
different kinds of machine learning artifacts such as models, datasets,
5+
and metrics, with support for JSON serialization.
6+
"""
7+
18
from typing import Annotated, TypeVar
29

310
from ml_orchestrator.artifacts.artifact import Artifact

ml_orchestrator/artifacts/artifact.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,32 @@
1+
"""Module for defining base artifact classes used in ML pipeline components.
2+
3+
This module provides base classes for representing and managing artifacts
4+
with support for JSON serialization of metadata.
5+
"""
6+
17
import json
28
from typing import Any, Dict, Mapping, Optional
39

410

511
class JSONSerializableDict(dict):
12+
"""Dictionary that ensures all values are JSON serializable.
13+
14+
A specialized dictionary that validates all values to ensure they can be
15+
serialized to JSON format. This is useful for storing metadata that
16+
needs to be serialized for storage or transmission.
17+
"""
18+
619
def __setitem__(self, key: Any, value: Any) -> None:
20+
"""Set an item in the dictionary after ensuring it's JSON serializable.
21+
22+
Args:
23+
key: The dictionary key
24+
value: The value to set, must be JSON serializable
25+
26+
Raises:
27+
ValueError: If the value cannot be serialized to JSON
28+
29+
"""
730
try:
831
json.dumps(value)
932
return super().__setitem__(key, value)
@@ -13,6 +36,16 @@ def __setitem__(self, key: Any, value: Any) -> None:
1336
raise ValueError(f"Value for key '{key}' is not JSON serializable: {value}")
1437

1538
def update(self, m: Mapping = None, **kwargs: Any) -> None: # type: ignore[override]
39+
"""Update the dictionary with another mapping after ensuring values are JSON serializable.
40+
41+
Args:
42+
m: A mapping to update from
43+
**kwargs: Additional key-value pairs to update with
44+
45+
Raises:
46+
ValueError: If any values cannot be serialized to JSON
47+
48+
"""
1649
try:
1750
json.dumps(m)
1851
return super().update(m, **kwargs)
@@ -22,30 +55,79 @@ def update(self, m: Mapping = None, **kwargs: Any) -> None: # type: ignore[over
2255
raise ValueError(f"Aritfact metadata must be JSON serializable, got: {m}")
2356

2457
def __or__(self, other: Mapping) -> "JSONSerializableDict":
58+
"""Implement the | operator to combine two mappings.
59+
60+
Args:
61+
other: Another mapping to combine with this one
62+
63+
Returns:
64+
A new JSONSerializableDict containing items from both mappings
65+
66+
"""
2567
new_dict = JSONSerializableDict(self)
2668
new_dict.update(other)
2769
return new_dict
2870

2971

3072
class Artifact:
73+
"""Base class for all artifacts used in ML pipeline components.
74+
75+
An artifact represents a file or directory that is an input to or output from
76+
a pipeline component, with associated metadata stored in a JSON-serializable format.
77+
78+
Attributes:
79+
schema_title: The schema title for this artifact type
80+
schema_version: The schema version for this artifact type
81+
82+
"""
83+
3184
schema_title = "system.Artifact"
3285
schema_version = "0.0.1"
3386

3487
def __init__(self, name: Optional[str] = None, uri: Optional[str] = None, metadata: Optional[Dict] = None) -> None:
88+
"""Initialize an artifact with a name, URI, and metadata.
89+
90+
Args:
91+
name: The name of the artifact
92+
uri: The URI where the artifact is stored
93+
metadata: A dictionary of metadata to associate with the artifact
94+
95+
"""
3596
self.uri = uri or "./"
3697
self.name = name or "artifact"
3798
self._metadata = JSONSerializableDict(metadata or dict())
3899

39100
@property
40101
def path(self) -> str:
102+
"""Get the path where the artifact is stored.
103+
104+
Returns:
105+
The URI of the artifact
106+
107+
"""
41108
return self.uri
42109

43110
@property
44111
def metadata(self) -> "JSONSerializableDict":
112+
"""Get the metadata associated with this artifact.
113+
114+
Returns:
115+
A JSON-serializable dictionary of metadata
116+
117+
"""
45118
return self._metadata
46119

47120
@metadata.setter
48121
def metadata(self, new_data: Mapping) -> None:
122+
"""Set new metadata for this artifact.
123+
124+
Args:
125+
new_data: A mapping containing the new metadata
126+
127+
Raises:
128+
ValueError: If the new metadata cannot be serialized to JSON
129+
130+
"""
49131
try:
50132
json.dumps(new_data)
51133
self._metadata = JSONSerializableDict(new_data)
Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""Module for specialized artifact types used in ML pipeline components.
2+
3+
This module defines various specialized artifact types that extend the base Artifact
4+
class, providing specific implementations for different kinds of ML outputs and inputs.
5+
"""
6+
17
from typing import Dict, Optional, Union
28

39
from ml_orchestrator.artifacts import Artifact
@@ -6,29 +12,94 @@
612

713

814
class Model(Artifact):
15+
"""Artifact type representing a machine learning model.
16+
17+
This artifact type is used for storing and managing machine learning models
18+
in pipeline components.
19+
20+
Attributes:
21+
schema_title: The schema title for this model artifact type
22+
23+
"""
24+
925
schema_title: str = "system.Model"
1026

1127

1228
class Dataset(Artifact):
13-
schema_title = "system.Dataset"
29+
"""Artifact type representing a dataset.
1430
15-
pass
31+
This artifact type is used for storing and managing datasets
32+
in pipeline components.
33+
34+
Attributes:
35+
schema_title: The schema title for this dataset artifact type
36+
37+
"""
38+
39+
schema_title = "system.Dataset"
1640

1741

1842
class HTML(Artifact):
43+
"""Artifact type representing HTML content.
44+
45+
This artifact type is used for storing and managing HTML content
46+
generated by pipeline components, such as visualizations or reports.
47+
48+
Attributes:
49+
schema_title: The schema title for this HTML artifact type
50+
51+
"""
52+
1953
schema_title = "system.HTML"
2054

2155
def __init__(self, name: Optional[str] = None, uri: Optional[str] = None, metadata: Optional[Dict] = None) -> None:
56+
"""Initialize an HTML artifact with a name, URI, and metadata.
57+
58+
Ensures the URI ends with .html extension.
59+
60+
Args:
61+
name: The name of the artifact
62+
uri: The URI where the artifact is stored
63+
metadata: A dictionary of metadata to associate with the artifact
64+
65+
"""
2266
super().__init__(name=name, uri=uri, metadata=metadata)
2367
self.uri = self.uri + ".html"
2468

2569

2670
class Markdown(Artifact):
71+
"""Artifact type representing Markdown content.
72+
73+
This artifact type is used for storing and managing Markdown content
74+
generated by pipeline components, such as documentation or reports.
75+
76+
Attributes:
77+
schema_title: The schema title for this Markdown artifact type
78+
79+
"""
80+
2781
schema_title = "system.Markdown"
2882

2983

3084
class Metrics(Artifact):
85+
"""Artifact type representing evaluation metrics.
86+
87+
This artifact type is used for storing and managing evaluation metrics
88+
from pipeline components, such as accuracy, precision, or custom metrics.
89+
90+
Attributes:
91+
schema_title: The schema title for this metrics artifact type
92+
93+
"""
94+
3195
schema_title = "system.Metrics"
3296

3397
def log_metric(self, metric: str, value: MetricTypes) -> None:
98+
"""Log a metric value with the given name.
99+
100+
Args:
101+
metric: The name of the metric to log
102+
value: The value of the metric, which must be one of the supported metric types
103+
104+
"""
34105
self.metadata[metric] = value
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""Module for artifact type notation definitions.
2+
3+
This module provides type annotations and notations for artifacts
4+
used in ML pipeline components.
5+
"""

ml_orchestrator/comp_parser.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
"""Module for parsing ML components into Kubeflow Pipeline components.
2+
3+
This module provides a ComponentParser class that extends FunctionParser to create
4+
and manage components for Kubeflow Pipelines (KFP), handling decorators and
5+
serialized representations.
6+
"""
7+
18
import dataclasses
29
from typing import List
310

@@ -8,28 +15,30 @@
815

916
@dataclasses.dataclass
1017
class ComponentParser(FunctionParser):
11-
"""
12-
A parser class that extends FunctionParser to create and manage components
13-
for Kubeflow pipelines (KFP), including decorators and serialized representations.
18+
"""A parser class that extends FunctionParser to create and manage components.
19+
20+
This class handles components for Kubeflow pipelines (KFP), including decorators
21+
and serialized representations.
1422
1523
Attributes:
1624
add_imports (List[str]): Additional import statements to prepend to output files.
1725
only_function (bool): Whether to generate only the function body.
26+
1827
"""
1928

2029
add_imports: List[str] = dataclasses.field(default_factory=lambda: [])
2130
only_function: bool = False
2231

2332
@classmethod
2433
def _create_decorator(cls, env: EnvironmentParams) -> str:
25-
"""
26-
Creates a component decorator string based on environment parameters.
34+
"""Create a component decorator string based on environment parameters.
2735
2836
Args:
2937
env (EnvironmentParams): The environment parameters object containing variable definitions.
3038
3139
Returns:
3240
str: A formatted string representing the component decorator.
41+
3342
"""
3443
dec_vars = env.comp_vars()
3544
prams = cls.get_func_params(dec_vars, with_typing=False)
@@ -41,28 +50,28 @@ def _create_decorator(cls, env: EnvironmentParams) -> str:
4150

4251
@classmethod
4352
def create_decorator(cls, component: MetaComponent) -> str:
44-
"""
45-
Creates a decorator string for a given MetaComponent.
53+
"""Create a decorator string for a given MetaComponent.
4654
4755
Args:
4856
component (MetaComponent): The component object used to generate a decorator.
4957
5058
Returns:
5159
str: A decorator string for use in Kubeflow components.
60+
5261
"""
5362
if isinstance(component, MetaComponent):
5463
return cls._create_decorator(component.env)
5564
return cls._create_decorator(component.env())
5665

5766
def _create_kfp_str(self, component: _MetaComponent) -> str: # type: ignore
58-
"""
59-
Generates a serialized Kubeflow Pipeline (KFP) component string.
67+
"""Generate a serialized Kubeflow Pipeline (KFP) component string.
6068
6169
Args:
6270
component (_MetaComponent): The component object to serialize.
6371
6472
Returns:
6573
str: A string representation of the KFP component, including decorators.
74+
6675
"""
6776
function_str = super()._create_kfp_str(component) # type: ignore
6877
if self.only_function:
@@ -75,15 +84,15 @@ def _create_kfp_str(self, component: _MetaComponent) -> str: # type: ignore
7584
return kfp_component_str + "\n"
7685

7786
def _write_to_file(self, filename: str, file_content: str) -> None:
78-
"""
79-
Writes component string content to a file, including additional imports.
87+
"""Write component string content to a file, including additional imports.
8088
8189
Args:
8290
filename (str): The name of the file to write to.
8391
file_content (str): The content to write to the file.
8492
8593
Returns:
8694
None
95+
8796
"""
8897
for imp in self.add_imports:
8998
file_content = f"{imp}\n{file_content}"

0 commit comments

Comments
 (0)