Skip to content

Commit 93993ea

Browse files
committed
Merge branch 'main' into refactor/fix-broken-projects
2 parents 8d5e298 + 8b8bb8d commit 93993ea

File tree

126 files changed

+1286
-1591
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+1286
-1591
lines changed

airflow-cloud-composer-etl-feature-train/steps/training/model_trainer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from materializers import BigQueryDataset, CSVDataset
2222
from typing_extensions import Annotated
2323
from zenml import ArtifactConfig, step
24+
from zenml.enums import ArtifactType
2425
from zenml.logger import get_logger
2526

2627
logger = get_logger(__name__)
@@ -31,7 +32,7 @@ def train_xgboost_model(
3132
dataset: Union[BigQueryDataset, CSVDataset],
3233
) -> Tuple[
3334
Annotated[
34-
xgb.Booster, ArtifactConfig(name="xgb_model", is_model_artifact=True)
35+
xgb.Booster, ArtifactConfig(name="xgb_model", artifact_type=ArtifactType.MODEL)
3536
],
3637
Annotated[Dict[str, float], "metrics"],
3738
]:

classifier-e2e/README.md

Lines changed: 59 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,58 +11,76 @@ pinned: false
1111
license: apache-2.0
1212
---
1313

14-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14+
# ZenML MLOps Breast Cancer Classification Demo
1515

16-
# 📜 ZenML Stack Show Case
16+
## 🌍 Project Overview
1717

18-
This project aims to demonstrate the power of stacks. The code in this
19-
project assumes that you have quite a few stacks registered already.
18+
This is a minimalistic MLOps project demonstrating how to put machine learning
19+
workflows into production using ZenML. The project focuses on building a breast
20+
cancer classification model with end-to-end ML pipeline management.
2021

21-
## default
22-
* `default` Orchestrator
23-
* `default` Artifact Store
22+
### Key Features
2423

25-
```commandline
26-
zenml stack set default
27-
python run.py --training-pipeline
24+
- 🔬 Feature engineering pipeline
25+
- 🤖 Model training pipeline
26+
- 🧪 Batch inference pipeline
27+
- 📊 Artifact and model lineage tracking
28+
- 🔗 Integration with Weights & Biases for experiment tracking
29+
30+
## 🚀 Installation
31+
32+
1. Clone the repository
33+
2. Install requirements:
34+
```bash
35+
pip install -r requirements.txt
36+
```
37+
3. Install ZenML integrations:
38+
```bash
39+
zenml integration install sklearn xgboost wandb -y
40+
zenml login
41+
zenml init
42+
```
43+
4. You need to register a stack with a [Weights & Biases Experiment Tracker](https://docs.zenml.io/stack-components/experiment-trackers/wandb).
44+
45+
## 🧠 Project Structure
46+
47+
- `steps/`: Contains individual pipeline steps
48+
- `pipelines/`: Pipeline definitions
49+
- `run.py`: Main script to execute pipelines
50+
51+
## 🔍 Workflow and Execution
52+
53+
First, you need to set your stack:
54+
55+
```bash
56+
zenml stack set stack-with-wandb
2857
```
2958

30-
## local-sagemaker-step-operator-stack
31-
* `default` Orchestrator
32-
* `s3` Artifact Store
33-
* `local` Image Builder
34-
* `aws` Container Registry
35-
* `Sagemaker` Step Operator
59+
### 1. Data Loading and Feature Engineering
3660

37-
```commandline
38-
zenml stack set local-sagemaker-step-operator-stack
39-
zenml integration install aws -y
40-
python run.py --training-pipeline
61+
- Uses the Breast Cancer dataset from scikit-learn
62+
- Splits data into training and inference sets
63+
- Preprocesses data for model training
64+
65+
```bash
66+
python run.py --feature-pipeline
4167
```
4268

43-
## sagemaker-airflow-stack
44-
* `Airflow` Orchestrator
45-
* `s3` Artifact Store
46-
* `local` Image Builder
47-
* `aws` Container Registry
48-
* `Sagemaker` Step Operator
49-
50-
```commandline
51-
zenml stack set sagemaker-airflow-stack
52-
zenml integration install airflow -y
53-
pip install apache-airflow-providers-docker apache-airflow~=2.5.0
54-
zenml stack up
69+
### 2. Model Training
70+
71+
- Supports multiple model types (SGD, XGBoost)
72+
- Evaluates and compares model performance
73+
- Tracks model metrics with Weights & Biases
74+
75+
```bash
5576
python run.py --training-pipeline
5677
```
5778

58-
## sagemaker-stack
59-
* `Sagemaker` Orchestrator
60-
* `s3` Artifact Store
61-
* `local` Image Builder
62-
* `aws` Container Registry
63-
* `Sagemaker` Step Operator
79+
### 3. Batch Inference
6480

65-
```commandline
66-
zenml stack set sagemaker-stack
67-
python run.py --training-pipeline
81+
- Loads production model
82+
- Generates predictions on new data
83+
84+
```bash
85+
python run.py --inference-pipeline
6886
```

classifier-e2e/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
zenml[server]>=0.55.2
1+
zenml[server]>=0.70.0
22
notebook
33
scikit-learn<1.3
44
s3fs>2022.3.0,<=2023.4.0

classifier-e2e/run_full.ipynb

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"source": [
3939
"! pip3 install -r requirements.txt\n",
4040
"! zenml integration install sklearn xgboost -y\n",
41-
"! zenml connect --url https://1cf18d95-zenml.cloudinfra.zenml.io \n",
41+
"! zenml login https://1cf18d95-zenml.cloudinfra.zenml.io \n",
4242
"\n",
4343
"import IPython\n",
4444
"\n",
@@ -957,10 +957,17 @@
957957
" .ravel()\n",
958958
" .tolist(),\n",
959959
" }\n",
960-
" log_model_metadata(metadata={\"wandb_url\": wandb.run.url})\n",
961-
" log_artifact_metadata(\n",
960+
"\n",
961+
" try:\n",
962+
" if get_step_context().model:\n",
963+
" log_metadata(metadata=metadata, infer_model=True)\n",
964+
" except StepContextError:\n",
965+
" # If a model is not configured, it is not able to log metadata\n",
966+
" pass\n",
967+
"\n",
968+
" log_metadata(\n",
962969
" metadata=metadata,\n",
963-
" artifact_name=\"breast_cancer_classifier\",\n",
970+
" artifact_version_id=get_step_context().inputs[\"model\"].id,\n",
964971
" )\n",
965972
"\n",
966973
" wandb.log({\"train_accuracy\": metadata[\"train_accuracy\"]})\n",
@@ -1103,7 +1110,7 @@
11031110
{
11041111
"cell_type": "code",
11051112
"execution_count": null,
1106-
"id": "7fb27b941602401d91542211134fc71a",
1113+
"id": "1e2130b9",
11071114
"metadata": {},
11081115
"outputs": [],
11091116
"source": [
@@ -1114,7 +1121,7 @@
11141121
{
11151122
"cell_type": "code",
11161123
"execution_count": null,
1117-
"id": "acae54e37e7d407bbb7b55eff062a284",
1124+
"id": "476cbf5c",
11181125
"metadata": {},
11191126
"outputs": [],
11201127
"source": [
@@ -1123,7 +1130,7 @@
11231130
},
11241131
{
11251132
"cell_type": "markdown",
1126-
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
1133+
"id": "75df10e7",
11271134
"metadata": {},
11281135
"source": [
11291136
"Now full run executed on local stack and experiment is tracked using Model Control Plane and Weights&Biases.\n",
@@ -1136,7 +1143,7 @@
11361143
{
11371144
"cell_type": "code",
11381145
"execution_count": null,
1139-
"id": "8dd0d8092fe74a7c96281538738b07e2",
1146+
"id": "bfd6345f",
11401147
"metadata": {},
11411148
"outputs": [],
11421149
"source": [
@@ -1147,7 +1154,7 @@
11471154
{
11481155
"cell_type": "code",
11491156
"execution_count": null,
1150-
"id": "72eea5119410473aa328ad9291626812",
1157+
"id": "24358031",
11511158
"metadata": {},
11521159
"outputs": [],
11531160
"source": [
@@ -1171,7 +1178,7 @@
11711178
"name": "python",
11721179
"nbconvert_exporter": "python",
11731180
"pygments_lexer": "ipython3",
1174-
"version": "3.9.18"
1181+
"version": "3.11.3"
11751182
}
11761183
},
11771184
"nbformat": 4,

classifier-e2e/run_skip_basics.ipynb

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"source": [
3939
"! pip3 install -r requirements.txt\n",
4040
"! zenml integration install sklearn xgboost -y\n",
41-
"! zenml connect --url https://1cf18d95-zenml.cloudinfra.zenml.io \n",
41+
"! zenml login https://1cf18d95-zenml.cloudinfra.zenml.io \n",
4242
"\n",
4343
"import IPython\n",
4444
"\n",
@@ -839,10 +839,17 @@
839839
" .ravel()\n",
840840
" .tolist(),\n",
841841
" }\n",
842-
" log_model_metadata(metadata={\"wandb_url\": wandb.run.url})\n",
843-
" log_artifact_metadata(\n",
842+
"\n",
843+
" try:\n",
844+
" if get_step_context().model:\n",
845+
" log_metadata(metadata=metadata, infer_model=True)\n",
846+
" except StepContextError:\n",
847+
" # If a model is not configured, it is not able to log metadata\n",
848+
" pass\n",
849+
"\n",
850+
" log_metadata(\n",
844851
" metadata=metadata,\n",
845-
" artifact_name=\"breast_cancer_classifier\",\n",
852+
" artifact_version_id=get_step_context().inputs[\"model\"].id,\n",
846853
" )\n",
847854
"\n",
848855
" wandb.log({\"train_accuracy\": metadata[\"train_accuracy\"]})\n",
@@ -1242,7 +1249,7 @@
12421249
"name": "python",
12431250
"nbconvert_exporter": "python",
12441251
"pygments_lexer": "ipython3",
1245-
"version": "3.9.18"
1252+
"version": "3.11.3"
12461253
}
12471254
},
12481255
"nbformat": 4,

classifier-e2e/steps/deploy_endpoint.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from utils.aws import get_aws_config
88
from utils.sagemaker_materializer import SagemakerPredictorMaterializer
99
from zenml import ArtifactConfig, get_step_context, log_artifact_metadata, step
10+
from zenml.enums import ArtifactType
1011

1112

1213
@step(
@@ -16,7 +17,10 @@
1617
def deploy_endpoint() -> (
1718
Annotated[
1819
Predictor,
19-
ArtifactConfig(name="sagemaker_endpoint", is_deployment_artifact=True),
20+
ArtifactConfig(
21+
name="sagemaker_endpoint",
22+
artifact_type=ArtifactType.SERVICE
23+
),
2024
]
2125
):
2226
role, session, region = get_aws_config()

classifier-e2e/steps/model_evaluator.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,7 @@
2121
import wandb
2222
from sklearn.base import ClassifierMixin
2323
from sklearn.metrics import confusion_matrix
24-
from zenml import (
25-
get_step_context,
26-
log_artifact_metadata,
27-
log_model_metadata,
28-
step,
29-
)
24+
from zenml import step, log_metadata, get_step_context
3025
from zenml.client import Client
3126
from zenml.exceptions import StepContextError
3227
from zenml.logger import get_logger
@@ -60,12 +55,12 @@ def model_evaluator(
6055
step to force the pipeline run to fail early and all subsequent steps to
6156
be skipped.
6257
63-
This step is parameterized to configure the step independently of the step code,
64-
before running it in a pipeline. In this example, the step can be configured
65-
to use different values for the acceptable model performance thresholds and
66-
to control whether the pipeline run should fail if the model performance
67-
does not meet the minimum criteria. See the documentation for more
68-
information:
58+
This step is parameterized to configure the step independently of the step
59+
code, before running it in a pipeline. In this example, the step can be
60+
configured to use different values for the acceptable model performance
61+
thresholds and to control whether the pipeline run should fail if the model
62+
performance does not meet the minimum criteria. See the documentation for
63+
more information:
6964
7065
https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines
7166
@@ -89,17 +84,19 @@ def model_evaluator(
8984
dataset_tst.drop(columns=[target]),
9085
dataset_tst[target],
9186
)
92-
logger.info(f"Train accuracy={trn_acc*100:.2f}%")
93-
logger.info(f"Test accuracy={tst_acc*100:.2f}%")
87+
logger.info(f"Train accuracy={trn_acc * 100:.2f}%")
88+
logger.info(f"Test accuracy={tst_acc * 100:.2f}%")
9489

9590
messages = []
9691
if trn_acc < min_train_accuracy:
9792
messages.append(
98-
f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !"
93+
f"Train accuracy {trn_acc * 100:.2f}% is below "
94+
f"{min_train_accuracy * 100:.2f}% !"
9995
)
10096
if tst_acc < min_test_accuracy:
10197
messages.append(
102-
f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !"
98+
f"Test accuracy {tst_acc * 100:.2f}% is below "
99+
f"{min_test_accuracy * 100:.2f}% !"
103100
)
104101
else:
105102
for message in messages:
@@ -115,14 +112,14 @@ def model_evaluator(
115112
}
116113
try:
117114
if get_step_context().model:
118-
log_model_metadata(metadata={"wandb_url": wandb.run.url})
115+
log_metadata(metadata=metadata, infer_model=True)
119116
except StepContextError:
120117
# if model not configured not able to log metadata
121118
pass
122119

123-
log_artifact_metadata(
120+
log_metadata(
124121
metadata=metadata,
125-
artifact_name="breast_cancer_classifier",
122+
artifact_version_id=get_step_context().inputs["model"].id,
126123
)
127124

128125
wandb.log(

classifier-e2e/steps/model_trainer.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16-
#
1716

1817
from typing import Optional
1918

@@ -23,6 +22,7 @@
2322
from typing_extensions import Annotated
2423
from utils.sagemaker_materializer import SagemakerMaterializer
2524
from zenml import ArtifactConfig, step
25+
from zenml.enums import ArtifactType
2626
from zenml.logger import get_logger
2727

2828
logger = get_logger(__name__)
@@ -39,7 +39,10 @@ def model_trainer(
3939
target: Optional[str] = "target",
4040
) -> Annotated[
4141
ClassifierMixin,
42-
ArtifactConfig(name="breast_cancer_classifier", is_model_artifact=True),
42+
ArtifactConfig(
43+
name="breast_cancer_classifier",
44+
artifact_type=ArtifactType.MODEL,
45+
),
4346
]:
4447
"""Configure and train a model on the training dataset.
4548

0 commit comments

Comments
 (0)