zenml-io
diff --git a/‎airflow-cloud-composer-etl-feature-train/steps/training/model_trainer.py‎
Lines changed: 2 additions & 1 deletion b/‎airflow-cloud-composer-etl-feature-train/steps/training/model_trainer.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎classifier-e2e/README.md‎
Lines changed: 59 additions & 41 deletions b/‎classifier-e2e/README.md‎
Lines changed: 59 additions & 41 deletions
diff --git a/‎classifier-e2e/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎classifier-e2e/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎classifier-e2e/run_full.ipynb‎
Lines changed: 17 additions & 10 deletions b/‎classifier-e2e/run_full.ipynb‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎classifier-e2e/run_skip_basics.ipynb‎
Lines changed: 12 additions & 5 deletions b/‎classifier-e2e/run_skip_basics.ipynb‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎classifier-e2e/steps/deploy_endpoint.py‎
Lines changed: 5 additions & 1 deletion b/‎classifier-e2e/steps/deploy_endpoint.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎classifier-e2e/steps/model_evaluator.py‎
Lines changed: 16 additions & 19 deletions b/‎classifier-e2e/steps/model_evaluator.py‎
Lines changed: 16 additions & 19 deletions
diff --git a/‎classifier-e2e/steps/model_trainer.py‎
Lines changed: 5 additions & 2 deletions b/‎classifier-e2e/steps/model_trainer.py‎
Lines changed: 5 additions & 2 deletions
@@ -21,6 +21,7 @@
 from materializers import BigQueryDataset, CSVDataset
 from typing_extensions import Annotated
 from zenml import ArtifactConfig, step
+from zenml.enums import ArtifactType
 from zenml.logger import get_logger
 
 logger = get_logger(__name__)
@@ -31,7 +32,7 @@ def train_xgboost_model(
     dataset: Union[BigQueryDataset, CSVDataset],
 ) -> Tuple[
     Annotated[
-        xgb.Booster, ArtifactConfig(name="xgb_model", is_model_artifact=True)
+        xgb.Booster, ArtifactConfig(name="xgb_model", artifact_type=ArtifactType.MODEL)
     ],
     Annotated[Dict[str, float], "metrics"],
 ]:
 
@@ -11,58 +11,76 @@ pinned: false
 license: apache-2.0
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# ZenML MLOps Breast Cancer Classification Demo
 
-# 📜 ZenML Stack Show Case
+## 🌍 Project Overview
 
-This project aims to demonstrate the power of stacks. The code in this 
-project assumes that you have quite a few stacks registered already. 
+This is a minimalistic MLOps project demonstrating how to put machine learning 
+workflows into production using ZenML. The project focuses on building a breast 
+cancer classification model with end-to-end ML pipeline management.
 
-## default
-  * `default` Orchestrator
-  * `default` Artifact Store
+### Key Features
 
-```commandline
-zenml stack set default
-python run.py --training-pipeline
+- 🔬 Feature engineering pipeline
+- 🤖 Model training pipeline
+- 🧪 Batch inference pipeline
+- 📊 Artifact and model lineage tracking
+- 🔗 Integration with Weights & Biases for experiment tracking
+
+## 🚀 Installation
+
+1. Clone the repository
+2. Install requirements:
+    ```bash
+    pip install -r requirements.txt
+    ```
+3. Install ZenML integrations:
+    ```bash
+    zenml integration install sklearn xgboost wandb -y
+    zenml login
+    zenml init
+    ```
+4. You need to register a stack with a [Weights & Biases Experiment Tracker](https://docs.zenml.io/stack-components/experiment-trackers/wandb). 
+
+## 🧠 Project Structure
+
+- `steps/`: Contains individual pipeline steps
+- `pipelines/`: Pipeline definitions
+- `run.py`: Main script to execute pipelines
+
+## 🔍 Workflow and Execution
+
+First, you need to set your stack:
+
+```bash
+zenml stack set stack-with-wandb
 ```
 
-## local-sagemaker-step-operator-stack
-  * `default` Orchestrator
-  * `s3` Artifact Store
-  * `local` Image Builder
-  * `aws` Container Registry
-  * `Sagemaker` Step Operator
+### 1. Data Loading and Feature Engineering
 
-```commandline
-zenml stack set local-sagemaker-step-operator-stack
-zenml integration install aws -y
-python run.py --training-pipeline
+- Uses the Breast Cancer dataset from scikit-learn
+- Splits data into training and inference sets
+- Preprocesses data for model training
+
+```bash
+python run.py --feature-pipeline
 ```
 
-## sagemaker-airflow-stack
-  * `Airflow` Orchestrator
-  * `s3` Artifact Store
-  * `local` Image Builder
-  * `aws` Container Registry
-  * `Sagemaker` Step Operator
-
-```commandline
-zenml stack set sagemaker-airflow-stack
-zenml integration install airflow -y
-pip install apache-airflow-providers-docker apache-airflow~=2.5.0
-zenml stack up
+### 2. Model Training
+
+- Supports multiple model types (SGD, XGBoost)
+- Evaluates and compares model performance
+- Tracks model metrics with Weights & Biases
+
+```bash
 python run.py --training-pipeline
 ```
 
-## sagemaker-stack
-  * `Sagemaker` Orchestrator
-  * `s3` Artifact Store
-  * `local` Image Builder
-  * `aws` Container Registry
-  * `Sagemaker` Step Operator
+### 3. Batch Inference
 
-```commandline
-zenml stack set sagemaker-stack
-python run.py --training-pipeline
+- Loads production model
+- Generates predictions on new data
+
+```bash
+python run.py --inference-pipeline
 ```
@@ -1,4 +1,4 @@
-zenml[server]>=0.55.2
+zenml[server]>=0.70.0
 notebook
 scikit-learn<1.3
 s3fs>2022.3.0,<=2023.4.0
 
@@ -38,7 +38,7 @@
    "source": [
     "! pip3 install -r requirements.txt\n",
     "! zenml integration install sklearn xgboost -y\n",
-    "! zenml connect --url https://1cf18d95-zenml.cloudinfra.zenml.io \n",
+    "! zenml login https://1cf18d95-zenml.cloudinfra.zenml.io \n",
     "\n",
     "import IPython\n",
     "\n",
@@ -957,10 +957,17 @@
     "        .ravel()\n",
     "        .tolist(),\n",
     "    }\n",
-    "    log_model_metadata(metadata={\"wandb_url\": wandb.run.url})\n",
-    "    log_artifact_metadata(\n",
+    "\n",
+    "    try:\n",
+    "        if get_step_context().model:\n",
+    "            log_metadata(metadata=metadata, infer_model=True)\n",
+    "    except StepContextError:\n",
+    "        # If a model is not configured, it is not able to log metadata\n",
+    "        pass\n",
+    "\n",
+    "    log_metadata(\n",
     "        metadata=metadata,\n",
-    "        artifact_name=\"breast_cancer_classifier\",\n",
+    "        artifact_version_id=get_step_context().inputs[\"model\"].id,\n",
     "    )\n",
     "\n",
     "    wandb.log({\"train_accuracy\": metadata[\"train_accuracy\"]})\n",
@@ -1103,7 +1110,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7fb27b941602401d91542211134fc71a",
+   "id": "1e2130b9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1114,7 +1121,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "acae54e37e7d407bbb7b55eff062a284",
+   "id": "476cbf5c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1123,7 +1130,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9a63283cbaf04dbcab1f6479b197f3a8",
+   "id": "75df10e7",
    "metadata": {},
    "source": [
     "Now full run executed on local stack and experiment is tracked using Model Control Plane and Weights&Biases.\n",
@@ -1136,7 +1143,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "8dd0d8092fe74a7c96281538738b07e2",
+   "id": "bfd6345f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1147,7 +1154,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "72eea5119410473aa328ad9291626812",
+   "id": "24358031",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1171,7 +1178,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,
 
@@ -38,7 +38,7 @@
    "source": [
     "! pip3 install -r requirements.txt\n",
     "! zenml integration install sklearn xgboost -y\n",
-    "! zenml connect --url https://1cf18d95-zenml.cloudinfra.zenml.io \n",
+    "! zenml login https://1cf18d95-zenml.cloudinfra.zenml.io \n",
     "\n",
     "import IPython\n",
     "\n",
@@ -839,10 +839,17 @@
     "        .ravel()\n",
     "        .tolist(),\n",
     "    }\n",
-    "    log_model_metadata(metadata={\"wandb_url\": wandb.run.url})\n",
-    "    log_artifact_metadata(\n",
+    "\n",
+    "    try:\n",
+    "        if get_step_context().model:\n",
+    "            log_metadata(metadata=metadata, infer_model=True)\n",
+    "    except StepContextError:\n",
+    "        # If a model is not configured, it is not able to log metadata\n",
+    "        pass\n",
+    "\n",
+    "    log_metadata(\n",
     "        metadata=metadata,\n",
-    "        artifact_name=\"breast_cancer_classifier\",\n",
+    "        artifact_version_id=get_step_context().inputs[\"model\"].id,\n",
     "    )\n",
     "\n",
     "    wandb.log({\"train_accuracy\": metadata[\"train_accuracy\"]})\n",
@@ -1242,7 +1249,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,
 
@@ -7,6 +7,7 @@
 from utils.aws import get_aws_config
 from utils.sagemaker_materializer import SagemakerPredictorMaterializer
 from zenml import ArtifactConfig, get_step_context, log_artifact_metadata, step
+from zenml.enums import ArtifactType
 
 
 @step(
@@ -16,7 +17,10 @@
 def deploy_endpoint() -> (
     Annotated[
         Predictor,
-        ArtifactConfig(name="sagemaker_endpoint", is_deployment_artifact=True),
+        ArtifactConfig(
+            name="sagemaker_endpoint",
+            artifact_type=ArtifactType.SERVICE
+        ),
     ]
 ):
     role, session, region = get_aws_config()
 
@@ -21,12 +21,7 @@
 import wandb
 from sklearn.base import ClassifierMixin
 from sklearn.metrics import confusion_matrix
-from zenml import (
-    get_step_context,
-    log_artifact_metadata,
-    log_model_metadata,
-    step,
-)
+from zenml import step, log_metadata, get_step_context
 from zenml.client import Client
 from zenml.exceptions import StepContextError
 from zenml.logger import get_logger
@@ -60,12 +55,12 @@ def model_evaluator(
     step to force the pipeline run to fail early and all subsequent steps to
     be skipped.
 
-    This step is parameterized to configure the step independently of the step code,
-    before running it in a pipeline. In this example, the step can be configured
-    to use different values for the acceptable model performance thresholds and
-    to control whether the pipeline run should fail if the model performance
-    does not meet the minimum criteria. See the documentation for more
-    information:
+    This step is parameterized to configure the step independently of the step
+    code, before running it in a pipeline. In this example, the step can be
+    configured to use different values for the acceptable model performance
+    thresholds and to control whether the pipeline run should fail if the model
+    performance does not meet the minimum criteria. See the documentation for
+    more information:
 
         https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines
 
@@ -89,17 +84,19 @@ def model_evaluator(
         dataset_tst.drop(columns=[target]),
         dataset_tst[target],
     )
-    logger.info(f"Train accuracy={trn_acc*100:.2f}%")
-    logger.info(f"Test accuracy={tst_acc*100:.2f}%")
+    logger.info(f"Train accuracy={trn_acc * 100:.2f}%")
+    logger.info(f"Test accuracy={tst_acc * 100:.2f}%")
 
     messages = []
     if trn_acc < min_train_accuracy:
         messages.append(
-            f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !"
+            f"Train accuracy {trn_acc * 100:.2f}% is below "
+            f"{min_train_accuracy * 100:.2f}% !"
         )
     if tst_acc < min_test_accuracy:
         messages.append(
-            f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !"
+            f"Test accuracy {tst_acc * 100:.2f}% is below "
+            f"{min_test_accuracy * 100:.2f}% !"
         )
     else:
         for message in messages:
@@ -115,14 +112,14 @@ def model_evaluator(
     }
     try:
         if get_step_context().model:
-            log_model_metadata(metadata={"wandb_url": wandb.run.url})
+            log_metadata(metadata=metadata, infer_model=True)
     except StepContextError:
         # if model not configured not able to log metadata
         pass
 
-    log_artifact_metadata(
+    log_metadata(
         metadata=metadata,
-        artifact_name="breast_cancer_classifier",
+        artifact_version_id=get_step_context().inputs["model"].id,
     )
 
     wandb.log(
 
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
 from typing import Optional
 
@@ -23,6 +22,7 @@
 from typing_extensions import Annotated
 from utils.sagemaker_materializer import SagemakerMaterializer
 from zenml import ArtifactConfig, step
+from zenml.enums import ArtifactType
 from zenml.logger import get_logger
 
 logger = get_logger(__name__)
@@ -39,7 +39,10 @@ def model_trainer(
     target: Optional[str] = "target",
 ) -> Annotated[
     ClassifierMixin,
-    ArtifactConfig(name="breast_cancer_classifier", is_model_artifact=True),
+    ArtifactConfig(
+        name="breast_cancer_classifier",
+        artifact_type=ArtifactType.MODEL,
+    ),
 ]:
     """Configure and train a model on the training dataset.
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-zenml[server]>=0.55.2`
	`1`	`+zenml[server]>=0.70.0`
`2`	`2`	`notebook`
`3`	`3`	`scikit-learn<1.3`
`4`	`4`	`s3fs>2022.3.0,<=2023.4.0`