zenml-io
diff --git a/‎.github/workflows/production_run_complete_llm.yml‎
Lines changed: 12 additions & 10 deletions b/‎.github/workflows/production_run_complete_llm.yml‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎.github/workflows/staging_run_complete_llm.yml‎
Lines changed: 9 additions & 8 deletions b/‎.github/workflows/staging_run_complete_llm.yml‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎huggingface-sagemaker/steps/deploying/sagemaker_deployment.py‎
Lines changed: 17 additions & 2 deletions b/‎huggingface-sagemaker/steps/deploying/sagemaker_deployment.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎huggingface-sagemaker/steps/promotion/promote_metric_compare_promoter.py‎
Lines changed: 2 additions & 2 deletions b/‎huggingface-sagemaker/steps/promotion/promote_metric_compare_promoter.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎huggingface-sagemaker/steps/training/model_trainer.py‎
Lines changed: 3 additions & 1 deletion b/‎huggingface-sagemaker/steps/training/model_trainer.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎llm-complete-guide/.assets/huggingface-space-rag-deployment.png‎
193 KB b/‎llm-complete-guide/.assets/huggingface-space-rag-deployment.png‎
193 KB
diff --git a/‎llm-complete-guide/README.md‎
Lines changed: 55 additions & 11 deletions b/‎llm-complete-guide/README.md‎
Lines changed: 55 additions & 11 deletions
diff --git a/‎llm-complete-guide/ZENML_VERSION.txt‎
Lines changed: 1 addition & 0 deletions b/‎llm-complete-guide/ZENML_VERSION.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎…m-complete-guide/configs/embeddings.yaml‎ ‎…mplete-guide/configs/dev/embeddings.yaml‎llm-complete-guide/configs/embeddings.yaml renamed to llm-complete-guide/configs/dev/embeddings.yaml
Lines changed: 5 additions & 12 deletions b/‎…m-complete-guide/configs/embeddings.yaml‎ ‎…mplete-guide/configs/dev/embeddings.yaml‎llm-complete-guide/configs/embeddings.yaml renamed to llm-complete-guide/configs/dev/embeddings.yaml
Lines changed: 5 additions & 12 deletions
diff --git a/‎…omplete-guide/configs/rag_local_dev.yaml‎ ‎llm-complete-guide/configs/dev/rag.yaml‎llm-complete-guide/configs/rag_local_dev.yaml renamed to llm-complete-guide/configs/dev/rag.yaml
Lines changed: 7 additions & 10 deletions b/‎…omplete-guide/configs/rag_local_dev.yaml‎ ‎llm-complete-guide/configs/dev/rag.yaml‎llm-complete-guide/configs/rag_local_dev.yaml renamed to llm-complete-guide/configs/dev/rag.yaml
Lines changed: 7 additions & 10 deletions
@@ -11,20 +11,21 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  run-staging-workflow:
+  run-production-workflow:
     runs-on: ubuntu-latest
     env:
-      ZENML_HOST: ${{ secrets.ZENML_HOST }}
-      ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }}
-      ZENML_PRODUCTION_STACK : 51a49786-b82a-4646-bde7-a460efb0a9c5
+      ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
+      ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
+      ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID
       ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
       ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
       ZENML_DEBUG: true
       ZENML_ANALYTICS_OPT_IN: false
       ZENML_LOGGING_VERBOSITY: INFO
       ZENML_PROJECT_SECRET_NAME: llm-complete
       ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
-      ZENML_ACTION_ID: 23a4d58c-bd2b-47d5-a41d-0a845d2982f8
+      ZENML_EVENT_SOURCE_ID: ae6ae536-d811-4838-a44b-744b768a0f31  # Set this to your preferred event source ID
+      ZENML_SERVICE_ACCOUNT_ID: fef76af2-382f-4ab2-9e6b-5eb85a303f0e  # Set this to your service account ID or delete
 
     steps:
       - name: Check out repository code
@@ -37,15 +38,16 @@ jobs:
       - name: Install requirements
         working-directory: ./llm-complete-guide
         run: |
-          pip3 install -r requirements.txt
-          pip3 install -r requirements-argilla.txt
-          zenml integration install gcp -y
+          pip3 install uv
+          uv pip install -r requirements.txt --system
+          uv pip install -r requirements-argilla.txt --system
+          zenml integration install gcp -y --uv
 
       - name: Connect to ZenML server
         working-directory: ./llm-complete-guide
         run: |
           zenml init
-          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
+          zenml connect --url $ZENML_STORE_URL --api-key $ZENML_STORE_API_KEY
 
       - name: Set stack (Production)
         working-directory: ./llm-complete-guide
@@ -55,4 +57,4 @@ jobs:
       - name: Run pipeline, create pipeline, configure trigger (Production)
         working-directory: ./llm-complete-guide
         run: |
-          python gh_action_rag.py --no-cache --create-template --action-id  ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml
+          python gh_action_rag.py --no-cache --create-template --event-source-id ${{ env.ZENML_EVENT_SOURCE_ID }} --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --config production/rag.yaml --zenml-model-version production
@@ -12,9 +12,9 @@ jobs:
   run-staging-workflow:
     runs-on: ubuntu-latest
     env:
-      ZENML_HOST: ${{ secrets.ZENML_HOST }}
-      ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }}
-      ZENML_STAGING_STACK: 51a49786-b82a-4646-bde7-a460efb0a9c5
+      ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
+      ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
+      ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID
       ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
       ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
       ZENML_DEBUG: true
@@ -34,15 +34,16 @@ jobs:
       - name: Install requirements
         working-directory: ./llm-complete-guide
         run: |
-          pip3 install -r requirements.txt
-          pip3 install -r requirements-argilla.txt
-          zenml integration install gcp -y
+          pip3 install uv
+          uv pip install -r requirements.txt --system
+          uv pip install -r requirements-argilla.txt --system
+          zenml integration install aws s3 -y --uv
 
       - name: Connect to ZenML server
         working-directory: ./llm-complete-guide
         run: |
           zenml init
-          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
+          zenml connect --url $ZENML_STORE_URL --api-key $ZENML_STORE_API_KEY
 
       - name: Set stack (Staging)
         working-directory: ./llm-complete-guide
@@ -52,4 +53,4 @@ jobs:
       - name: Run pipeline (Staging)
         working-directory: ./llm-complete-guide
         run: |
-          python gh_action_rag.py --no-cache --config rag_gcp.yaml
+          python gh_action_rag.py --no-cache --config staging/rag.yaml --zenml-model-version staging
@@ -15,12 +15,13 @@
 # limitations under the License.
 #
 
+import os
 from typing import Optional
 
 from gradio.aws_helper import get_sagemaker_role, get_sagemaker_session
 from sagemaker.huggingface import HuggingFaceModel
 from typing_extensions import Annotated
-from zenml import get_step_context, step
+from zenml import get_step_context, log_artifact_metadata, step
 from zenml.logger import get_logger
 
 # Initialize logger
@@ -35,7 +36,7 @@ def deploy_hf_to_sagemaker(
     pytorch_version: str = "1.13.1",
     py_version: str = "py39",
     hf_task: str = "text-classification",
-    instance_type: str = "ml.g5.2xlarge",
+    instance_type: str = "ml.t2.medium",
     container_startup_health_check_timeout: int = 300,
 ) -> Annotated[str, "sagemaker_endpoint_name"]:
     """
@@ -83,4 +84,18 @@ def deploy_hf_to_sagemaker(
     )
     endpoint_name = predictor.endpoint_name
     logger.info(f"Model deployed to SageMaker: {endpoint_name}")
+
+    # get region from env variable
+    region = os.environ["AWS_REGION"] or "eu-central-1"
+    invocation_url = f"https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{endpoint_name}/invocations"
+
+    log_artifact_metadata(
+        artifact_name="sagemaker_endpoint_name",
+        metadata={
+            "invocation_url": invocation_url,
+            "endpoint_name": endpoint_name,
+        },
+    )
+
+
     return endpoint_name
@@ -28,8 +28,8 @@
 
 @step
 def promote_metric_compare_promoter(
-    latest_metrics: Dict[str, str],
-    current_metrics: Dict[str, str],
+    latest_metrics: Dict[str, float],
+    current_metrics: Dict[str, float],
     metric_to_compare: str = "accuracy",
 ):
     """Try to promote trained model.
 
@@ -154,6 +154,8 @@ def model_trainer(
     eval_results = trainer.evaluate(metric_key_prefix="")
 
     # Log the evaluation results in model control plane
-    log_artifact_metadata(output_name="model", metrics=eval_results)
+    log_artifact_metadata(
+        artifact_name="model", metadata={"metrics": eval_results}
+    )
 
     return model, tokenizer
@@ -23,7 +23,7 @@ instructions are provided below for how to set that up.
 
 ## 📽️ Watch the webinars
 
-We've recently been holding some webinars about this repository and project. Watche the videos below if you want an introduction and context around the code and ideas covered in this project.
+We've recently been holding some webinars about this repository and project. Watch the videos below if you want an introduction and context around the code and ideas covered in this project.
 
 [![Building and Optimizing RAG Pipelines: Data Preprocessing, Embeddings, and Evaluation with ZenML](https://github.com/user-attachments/assets/1aea2bd4-8079-4ea2-98e1-8da6ba9aeebe)](https://www.youtube.com/watch?v=PazRMY8bo3U)
 
@@ -45,7 +45,7 @@ pip install -r requirements.txt
 
 Depending on your hardware you may run into some issues when running the `pip install` command with the
 `flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation` 
-could help you.
+could help you. Possibly you might also need to install torch separately.
 
 In order to use the default LLM for this query, you'll need an account and an
 API key from OpenAI specified as a ZenML secret:
@@ -57,9 +57,9 @@ export ZENML_PROJECT_SECRET_NAME=llm-complete
 
 ### Setting up Supabase
 
-[Supabase](https://supabase.com/) is a cloud provider that provides a PostgreSQL
+[Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
 database. It's simple to use and has a free tier that should be sufficient for
-this project. Once you've created a Supabase account and organisation, you'll
+this project. Once you've created a Supabase account and organization, you'll
 need to create a new project.
 
 ![](.assets/supabase-create-project.png)
@@ -76,7 +76,7 @@ string from the Supabase dashboard.
 
 ![](.assets/supabase-connection-string.png)
 
-In case supabase is not an option for you, you can use a different database as the backend. 
+In case Supabase is not an option for you, you can use a different database as the backend.
 
 ### Running the RAG pipeline
 
@@ -85,7 +85,7 @@ to run the pipelines in the correct order. You can run the script with the
 following command:
 
 ```shell
-python run.py --rag
+python run.py rag
 ```
 
 This will run the basic RAG pipeline, which scrapes the ZenML documentation and
@@ -100,7 +100,7 @@ use for the LLM.
 When you're ready to make the query, run the following command:
 
 ```shell
-python run.py --query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
+python run.py query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
 ```
 
 Alternative options for LLMs to use include:
@@ -114,12 +114,57 @@ Note that Claude will require a different API key from Anthropic. See [the
 `litellm` docs](https://docs.litellm.ai/docs/providers/anthropic) on how to set
 this up.
 
+### Deploying the RAG pipeline
+
+![](.assets/huggingface-space-rag-deployment.png)
+
+You'll need to update and add some secrets to make this work with your Hugging
+Face account. To get your ZenML service account API token and store URL, you can
+first create a new service account:
+
+```bash
+zenml service-account create <SERVICE_ACCOUNT_NAME>
+```
+
+For more information on this part of the process, please refer to the [ZenML
+documentation](https://docs.zenml.io/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account).
+
+Once you have your service account API token and store URL (the URL of your
+deployed ZenML tenant), you can update the secrets with the following command:
+
+```bash
+zenml secret update llm-complete --zenml_api_token=<YOUR_ZENML_SERVICE_ACCOUNT_API_TOKEN> --zenml_store_url=<YOUR_ZENML_STORE_URL>
+```
+
+To set the Hugging Face user space that gets used for the Gradio app deployment,
+you should set an environment variable with the following command:
+
+```bash
+export ZENML_HF_USERNAME=<YOUR_HF_USERNAME>
+export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-complete-guide-rag"
+```
+
+To deploy the RAG pipeline, you can use the following command:
+
+```shell
+python run.py --deploy
+```
+
+Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
+
+```shell
+python run.py --rag --deploy
+```
+
+This will open a Hugging Face space in your browser where you can interact with
+the RAG pipeline.
+
 ### Run the LLM RAG evaluation pipeline
 
 To run the evaluation pipeline, you can use the following command:
 
 ```shell
-python run.py --evaluation
+python run.py evaluation
 ```
 
 You'll need to have first run the RAG pipeline to have the necessary assets in
@@ -137,7 +182,7 @@ To run the `distilabel` synthetic data generation pipeline, you can use the foll
 
 ```shell
 pip install -r requirements-argilla.txt # special requirements
-python run.py --synthetic
+python run.py synthetic
 ```
 
 You will also need to have set up and connected to an Argilla instance for this
@@ -157,7 +202,6 @@ will need to change the hf repo urls to a space you have permissions to.
 zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
 ```
 
-
 ### Finetune the embeddings
 
 As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this
@@ -177,7 +221,7 @@ commands:
 
 ```shell
 pip install -r requirements-argilla.txt # special requirements
-python run.py --embeddings
+python run.py embeddings
 ```
 
 *Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka
 
@@ -0,0 +1 @@
+v0.68.1
@@ -3,7 +3,7 @@
 # environment configuration
 settings:
   docker:
-    parent_image: "zenmldocker/prepare-release:base-0.68.0"
+    python_package_installer: "uv"
     requirements:
       - langchain-community
       - ratelimit
@@ -16,7 +16,7 @@ settings:
       - pandas
       - numpy
       - sentence-transformers>=3
-      - transformers[torch]
+      - transformers[torch]==4.43.1
       - litellm
       - ollama
       - tiktoken
@@ -26,14 +26,7 @@ settings:
       - rerankers[flashrank]
       - datasets
       - torch
+      - pygithub
+      - openai
     environment:
-      ZENML_PROJECT_SECRET_NAME: llm_complete
-
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  version: latest
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
+      ZENML_PROJECT_SECRET_NAME: llm_complete
@@ -14,20 +14,17 @@ settings:
       - tiktoken
       - ratelimit
       - rerankers
+      - pygithub
+      - rerankers[flashrank]
+      - matplotlib
+
     environment:
       ZENML_PROJECT_SECRET_NAME: llm_complete
       ZENML_ENABLE_RICH_TRACEBACK: FALSE
       ZENML_LOGGING_VERBOSITY: INFO
-
-
-# configuration of the Model Control Plane
-model:
-  name: finetuned-zenml-docs-embeddings
-  license: Apache 2.0
-  description: Finetuned LLM on ZenML docs
-  tags: ["rag", "finetuned"]
-
+    python_package_installer: "uv"
 steps:
   url_scraper:
     parameters:
-      docs_url: https://docs.zenml.io/stack-components/orchestrators
+      docs_url: https://docs.zenml.io/
+      use_dev_set: true