zenml-io
diff --git a/‎.github/workflows/production_run_complete_llm.yml‎
Lines changed: 59 additions & 0 deletions b/‎.github/workflows/production_run_complete_llm.yml‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎.github/workflows/staging_run_complete_llm.yml‎
Lines changed: 55 additions & 0 deletions b/‎.github/workflows/staging_run_complete_llm.yml‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎huggingface-sagemaker/steps/deploying/sagemaker_deployment.py‎
Lines changed: 17 additions & 2 deletions b/‎huggingface-sagemaker/steps/deploying/sagemaker_deployment.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎huggingface-sagemaker/steps/promotion/promote_metric_compare_promoter.py‎
Lines changed: 2 additions & 2 deletions b/‎huggingface-sagemaker/steps/promotion/promote_metric_compare_promoter.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎huggingface-sagemaker/steps/training/model_trainer.py‎
Lines changed: 3 additions & 1 deletion b/‎huggingface-sagemaker/steps/training/model_trainer.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎llm-complete-guide/.assets/argilla_secret.png‎
37.7 KB b/‎llm-complete-guide/.assets/argilla_secret.png‎
37.7 KB
diff --git a/‎llm-complete-guide/.assets/huggingface-space-rag-deployment.png‎
193 KB b/‎llm-complete-guide/.assets/huggingface-space-rag-deployment.png‎
193 KB
diff --git a/‎llm-complete-guide/README.md‎
Lines changed: 75 additions & 21 deletions b/‎llm-complete-guide/README.md‎
Lines changed: 75 additions & 21 deletions
diff --git a/‎llm-complete-guide/configs/embeddings.yaml‎
Lines changed: 40 additions & 0 deletions b/‎llm-complete-guide/configs/embeddings.yaml‎
Lines changed: 40 additions & 0 deletions
@@ -0,0 +1,59 @@
+name: Production LLM-COMPLETE
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'llm-complete-guide/**'
+concurrency:
+  # New commit on branch cancels running workflows of the same branch
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run-staging-workflow:
+    runs-on: ubuntu-latest
+    env:
+      ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }}
+      ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
+      ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID
+      ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
+      ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
+      ZENML_DEBUG: true
+      ZENML_ANALYTICS_OPT_IN: false
+      ZENML_LOGGING_VERBOSITY: INFO
+      ZENML_PROJECT_SECRET_NAME: llm-complete
+      ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
+      ZENML_EVENT_SOURCE_ID: ae6ae536-d811-4838-a44b-744b768a0f31  # Set this to your preferred event source ID
+      ZENML_SERVICE_ACCOUNT_ID: fef76af2-382f-4ab2-9e6b-5eb85a303f0e  # Set this to your service account ID or delete
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install requirements
+        working-directory: ./llm-complete-guide
+        run: |
+          pip3 install -r requirements.txt
+          pip3 install -r requirements-argilla.txt
+          zenml integration install gcp -y
+
+      - name: Connect to ZenML server
+        working-directory: ./llm-complete-guide
+        run: |
+          zenml init
+          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
+
+      - name: Set stack (Production)
+        working-directory: ./llm-complete-guide
+        run: |
+          zenml stack set ${{ env.ZENML_PRODUCTION_STACK }}
+
+      - name: Run pipeline, create pipeline, configure trigger (Production)
+        working-directory: ./llm-complete-guide
+        run: |
+          python gh_action_rag.py --no-cache --create-template ----event-source-id  --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id  ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml
@@ -0,0 +1,55 @@
+name: Staging Trigger LLM-COMPLETE
+on:
+  pull_request:
+    types: [opened, synchronize]
+    branches: [staging, main]
+concurrency:
+  # New commit on branch cancels running workflows of the same branch
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run-staging-workflow:
+    runs-on: ubuntu-latest
+    env:
+      ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }}
+      ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
+      ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID
+      ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
+      ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
+      ZENML_DEBUG: true
+      ZENML_ANALYTICS_OPT_IN: false
+      ZENML_LOGGING_VERBOSITY: INFO
+      ZENML_PROJECT_SECRET_NAME: llm-complete
+      ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
+
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install requirements
+        working-directory: ./llm-complete-guide
+        run: |
+          pip3 install -r requirements.txt
+          pip3 install -r requirements-argilla.txt
+          zenml integration install aws s3 -y
+
+      - name: Connect to ZenML server
+        working-directory: ./llm-complete-guide
+        run: |
+          zenml init
+          zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
+
+      - name: Set stack (Staging)
+        working-directory: ./llm-complete-guide
+        run: |
+          zenml stack set ${{ env.ZENML_STAGING_STACK }}
+
+      - name: Run pipeline (Staging)
+        working-directory: ./llm-complete-guide
+        run: |
+          python gh_action_rag.py --no-cache --config rag_local_dev.yaml
@@ -162,6 +162,8 @@ llm-lora-finetuning/configs/shopify.yaml
 finetuned-matryoshka/
 finetuned-all-MiniLM-L6-v2/
 finetuned-snowflake-arctic-embed-m/
+finetuned-snowflake-arctic-embed-m-v1.5/
+.gradio/
 
 # ollama ignores
 nohup.out
@@ -15,12 +15,13 @@
 # limitations under the License.
 #
 
+import os
 from typing import Optional
 
 from gradio.aws_helper import get_sagemaker_role, get_sagemaker_session
 from sagemaker.huggingface import HuggingFaceModel
 from typing_extensions import Annotated
-from zenml import get_step_context, step
+from zenml import get_step_context, log_artifact_metadata, step
 from zenml.logger import get_logger
 
 # Initialize logger
@@ -35,7 +36,7 @@ def deploy_hf_to_sagemaker(
     pytorch_version: str = "1.13.1",
     py_version: str = "py39",
     hf_task: str = "text-classification",
-    instance_type: str = "ml.g5.2xlarge",
+    instance_type: str = "ml.t2.medium",
     container_startup_health_check_timeout: int = 300,
 ) -> Annotated[str, "sagemaker_endpoint_name"]:
     """
@@ -83,4 +84,18 @@ def deploy_hf_to_sagemaker(
     )
     endpoint_name = predictor.endpoint_name
     logger.info(f"Model deployed to SageMaker: {endpoint_name}")
+
+    # get region from env variable
+    region = os.environ["AWS_REGION"] or "eu-central-1"
+    invocation_url = f"https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{endpoint_name}/invocations"
+
+    log_artifact_metadata(
+        artifact_name="sagemaker_endpoint_name",
+        metadata={
+            "invocation_url": invocation_url,
+            "endpoint_name": endpoint_name,
+        },
+    )
+
+
     return endpoint_name
@@ -28,8 +28,8 @@
 
 @step
 def promote_metric_compare_promoter(
-    latest_metrics: Dict[str, str],
-    current_metrics: Dict[str, str],
+    latest_metrics: Dict[str, float],
+    current_metrics: Dict[str, float],
     metric_to_compare: str = "accuracy",
 ):
     """Try to promote trained model.
 
@@ -154,6 +154,8 @@ def model_trainer(
     eval_results = trainer.evaluate(metric_key_prefix="")
 
     # Log the evaluation results in model control plane
-    log_artifact_metadata(output_name="model", metrics=eval_results)
+    log_artifact_metadata(
+        artifact_name="model", metadata={"metrics": eval_results}
+    )
 
     return model, tokenizer
@@ -43,18 +43,23 @@ environment and install the dependencies using the following command:
 pip install -r requirements.txt
 ```
 
+Depending on your hardware you may run into some issues when running the `pip install` command with the
+`flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation` 
+could help you.
+
 In order to use the default LLM for this query, you'll need an account and an
-API key from OpenAI specified as another environment variable:
+API key from OpenAI specified as a ZenML secret:
 
 ```shell
-export OPENAI_API_KEY=<your-openai-api-key>
+zenml secret create llm-complete --openai_api_key=<your-openai-api-key>
+export ZENML_PROJECT_SECRET_NAME=llm-complete
 ```
 
 ### Setting up Supabase
 
-[Supabase](https://supabase.com/) is a cloud provider that provides a PostgreSQL
+[Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
 database. It's simple to use and has a free tier that should be sufficient for
-this project. Once you've created a Supabase account and organisation, you'll
+this project. Once you've created a Supabase account and organization, you'll
 need to create a new project.
 
 ![](.assets/supabase-create-project.png)
@@ -63,22 +68,15 @@ You'll want to save the Supabase database password as a ZenML secret so that it
 isn't stored in plaintext. You can do this by running the following command:
 
 ```shell
-zenml secret create supabase_postgres_db --password="YOUR_PASSWORD"
+zenml secret update llm-complete -v '{"supabase_password": "YOUR_PASSWORD", "supabase_user": "YOUR_USER", "supabase_host": "YOUR_HOST", "supabase_port": "YOUR_PORT"}'
 ```
 
-You'll then want to connect to this database instance by getting the connection
+You can get the user, host and port for this database instance by getting the connection
 string from the Supabase dashboard.
 
 ![](.assets/supabase-connection-string.png)
 
-You can use these details to populate some environment variables where the
-pipeline code expects them:
-
-```shell
-export ZENML_POSTGRES_USER=<your-supabase-user>
-export ZENML_POSTGRES_HOST=<your-supabase-host>
-export ZENML_POSTGRES_PORT=<your-supabase-port>
-```
+In case Supabase is not an option for you, you can use a different database as the backend.
 
 ### Running the RAG pipeline
 
@@ -116,6 +114,51 @@ Note that Claude will require a different API key from Anthropic. See [the
 `litellm` docs](https://docs.litellm.ai/docs/providers/anthropic) on how to set
 this up.
 
+### Deploying the RAG pipeline
+
+![](.assets/huggingface-space-rag-deployment.png)
+
+You'll need to update and add some secrets to make this work with your Hugging
+Face account. To get your ZenML service account API token and store URL, you can
+first create a new service account:
+
+```bash
+zenml service-account create <SERVICE_ACCOUNT_NAME>
+```
+
+For more information on this part of the process, please refer to the [ZenML
+documentation](https://docs.zenml.io/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account).
+
+Once you have your service account API token and store URL (the URL of your
+deployed ZenML tenant), you can update the secrets with the following command:
+
+```bash
+zenml secret update llm-complete --zenml_api_token=<YOUR_ZENML_SERVICE_ACCOUNT_API_TOKEN> --zenml_store_url=<YOUR_ZENML_STORE_URL>
+```
+
+To set the Hugging Face user space that gets used for the Gradio app deployment,
+you should set an environment variable with the following command:
+
+```bash
+export ZENML_HF_USERNAME=<YOUR_HF_USERNAME>
+export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-complete-guide-rag"
+```
+
+To deploy the RAG pipeline, you can use the following command:
+
+```shell
+python run.py --deploy
+```
+
+Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
+
+```shell
+python run.py --rag --deploy
+```
+
+This will open a Hugging Face space in your browser where you can interact with
+the RAG pipeline.
+
 ### Run the LLM RAG evaluation pipeline
 
 To run the evaluation pipeline, you can use the following command:
@@ -151,16 +194,16 @@ documentation](https://docs.zenml.io/v/docs/stack-components/annotators/argilla)
 will guide you through the process of connecting to your instance as a stack
 component.
 
-### Finetune the embeddings
+Please use the secret from above to track all the secrets. Here we are also
+setting a Huggingface write key. In order to make the rest of the pipeline work for you, you
+will need to change the hf repo urls to a space you have permissions to.
 
-To run the pipeline for finetuning the embeddings, you can use the following
-commands:
-
-```shell
-pip install -r requirements-argilla.txt # special requirements
-python run.py --embeddings
+```bash
+zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
 ```
 
+### Finetune the embeddings
+
 As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this
 to work. Please follow the instructions in the [Argilla
 documentation](https://docs.argilla.io/latest/getting_started/quickstart/)
@@ -170,6 +213,17 @@ documentation](https://docs.zenml.io/v/docs/stack-components/annotators/argilla)
 will guide you through the process of connecting to your instance as a stack
 component.
 
+The pipeline assumes that your argilla secret is stored within a ZenML secret called `argilla_secrets`. 
+![Argilla Secret](.assets/argilla_secret.png)
+
+To run the pipeline for finetuning the embeddings, you can use the following
+commands:
+
+```shell
+pip install -r requirements-argilla.txt # special requirements
+python run.py --embeddings
+```
+
 *Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka
 loss function](https://www.philschmid.de/fine-tune-embedding-model-for-rag) which we adapted for this project.*
 
 
@@ -0,0 +1,40 @@
+# enable_cache: False
+
+# environment configuration
+settings:
+  docker:
+    parent_image: "zenmldocker/prepare-release:base-0.68.0"
+    requirements:
+      - langchain-community
+      - ratelimit
+      - langchain>=0.0.325
+      - langchain-openai
+      - pgvector
+      - psycopg2-binary
+      - beautifulsoup4
+      - unstructured
+      - pandas
+      - numpy
+      - sentence-transformers>=3
+      - transformers[torch]==4.43.1
+      - litellm
+      - ollama
+      - tiktoken
+      - umap-learn
+      - matplotlib
+      - pyarrow
+      - rerankers[flashrank]
+      - datasets
+      - torch
+      - pygithub
+    environment:
+      ZENML_PROJECT_SECRET_NAME: llm_complete
+
+
+# configuration of the Model Control Plane
+model:
+  name: finetuned-zenml-docs-embeddings
+  version: latest
+  license: Apache 2.0
+  description: Finetuned LLM on ZenML docs
+  tags: ["rag", "finetuned"]