Skip to content

Commit 16f3016

Browse files
authored
Merge branch 'main' into feature/PRD-700-replacing-zenml-login-references
2 parents 8c6ee83 + 99f97c3 commit 16f3016

39 files changed

+1401
-786
lines changed

.github/workflows/production_run_complete_llm.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,21 @@ concurrency:
1111
cancel-in-progress: true
1212

1313
jobs:
14-
run-staging-workflow:
14+
run-production-workflow:
1515
runs-on: ubuntu-latest
1616
env:
17-
ZENML_HOST: ${{ secrets.ZENML_HOST }}
18-
ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }}
19-
ZENML_PRODUCTION_STACK : 51a49786-b82a-4646-bde7-a460efb0a9c5
17+
ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
18+
ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
19+
ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID
2020
ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
2121
ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
2222
ZENML_DEBUG: true
2323
ZENML_ANALYTICS_OPT_IN: false
2424
ZENML_LOGGING_VERBOSITY: INFO
2525
ZENML_PROJECT_SECRET_NAME: llm-complete
2626
ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
27-
ZENML_ACTION_ID: 23a4d58c-bd2b-47d5-a41d-0a845d2982f8
27+
ZENML_EVENT_SOURCE_ID: ae6ae536-d811-4838-a44b-744b768a0f31 # Set this to your preferred event source ID
28+
ZENML_SERVICE_ACCOUNT_ID: fef76af2-382f-4ab2-9e6b-5eb85a303f0e # Set this to your service account ID or delete
2829

2930
steps:
3031
- name: Check out repository code
@@ -37,15 +38,16 @@ jobs:
3738
- name: Install requirements
3839
working-directory: ./llm-complete-guide
3940
run: |
40-
pip3 install -r requirements.txt
41-
pip3 install -r requirements-argilla.txt
42-
zenml integration install gcp -y
41+
pip3 install uv
42+
uv pip install -r requirements.txt --system
43+
uv pip install -r requirements-argilla.txt --system
44+
zenml integration install gcp -y --uv
4345
4446
- name: Connect to ZenML server
4547
working-directory: ./llm-complete-guide
4648
run: |
4749
zenml init
48-
zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
50+
zenml connect --url $ZENML_STORE_URL --api-key $ZENML_STORE_API_KEY
4951
5052
- name: Set stack (Production)
5153
working-directory: ./llm-complete-guide
@@ -55,4 +57,4 @@ jobs:
5557
- name: Run pipeline, create pipeline, configure trigger (Production)
5658
working-directory: ./llm-complete-guide
5759
run: |
58-
python gh_action_rag.py --no-cache --create-template --action-id ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml
60+
python gh_action_rag.py --no-cache --create-template --event-source-id ${{ env.ZENML_EVENT_SOURCE_ID }} --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --config production/rag.yaml --zenml-model-version production

.github/workflows/staging_run_complete_llm.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ jobs:
1212
run-staging-workflow:
1313
runs-on: ubuntu-latest
1414
env:
15-
ZENML_HOST: ${{ secrets.ZENML_HOST }}
16-
ZENML_API_KEY: ${{ secrets.ZENML_API_KEY }}
17-
ZENML_STAGING_STACK: 51a49786-b82a-4646-bde7-a460efb0a9c5
15+
ZENML_STORE_URL: ${{ secrets.ZENML_PROJECTS_HOST }}
16+
ZENML_STORE_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
17+
ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID
1818
ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
1919
ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
2020
ZENML_DEBUG: true
@@ -34,15 +34,16 @@ jobs:
3434
- name: Install requirements
3535
working-directory: ./llm-complete-guide
3636
run: |
37-
pip3 install -r requirements.txt
38-
pip3 install -r requirements-argilla.txt
39-
zenml integration install gcp -y
37+
pip3 install uv
38+
uv pip install -r requirements.txt --system
39+
uv pip install -r requirements-argilla.txt --system
40+
zenml integration install aws s3 -y --uv
4041
4142
- name: Connect to ZenML server
4243
working-directory: ./llm-complete-guide
4344
run: |
4445
zenml init
45-
zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
46+
zenml connect --url $ZENML_STORE_URL --api-key $ZENML_STORE_API_KEY
4647
4748
- name: Set stack (Staging)
4849
working-directory: ./llm-complete-guide
@@ -52,4 +53,4 @@ jobs:
5253
- name: Run pipeline (Staging)
5354
working-directory: ./llm-complete-guide
5455
run: |
55-
python gh_action_rag.py --no-cache --config rag_gcp.yaml
56+
python gh_action_rag.py --no-cache --config staging/rag.yaml --zenml-model-version staging

huggingface-sagemaker/steps/deploying/sagemaker_deployment.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@
1515
# limitations under the License.
1616
#
1717

18+
import os
1819
from typing import Optional
1920

2021
from gradio.aws_helper import get_sagemaker_role, get_sagemaker_session
2122
from sagemaker.huggingface import HuggingFaceModel
2223
from typing_extensions import Annotated
23-
from zenml import get_step_context, step
24+
from zenml import get_step_context, log_artifact_metadata, step
2425
from zenml.logger import get_logger
2526

2627
# Initialize logger
@@ -35,7 +36,7 @@ def deploy_hf_to_sagemaker(
3536
pytorch_version: str = "1.13.1",
3637
py_version: str = "py39",
3738
hf_task: str = "text-classification",
38-
instance_type: str = "ml.g5.2xlarge",
39+
instance_type: str = "ml.t2.medium",
3940
container_startup_health_check_timeout: int = 300,
4041
) -> Annotated[str, "sagemaker_endpoint_name"]:
4142
"""
@@ -83,4 +84,18 @@ def deploy_hf_to_sagemaker(
8384
)
8485
endpoint_name = predictor.endpoint_name
8586
logger.info(f"Model deployed to SageMaker: {endpoint_name}")
87+
88+
# get region from env variable
89+
region = os.environ["AWS_REGION"] or "eu-central-1"
90+
invocation_url = f"https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{endpoint_name}/invocations"
91+
92+
log_artifact_metadata(
93+
artifact_name="sagemaker_endpoint_name",
94+
metadata={
95+
"invocation_url": invocation_url,
96+
"endpoint_name": endpoint_name,
97+
},
98+
)
99+
100+
86101
return endpoint_name

huggingface-sagemaker/steps/promotion/promote_metric_compare_promoter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828

2929
@step
3030
def promote_metric_compare_promoter(
31-
latest_metrics: Dict[str, str],
32-
current_metrics: Dict[str, str],
31+
latest_metrics: Dict[str, float],
32+
current_metrics: Dict[str, float],
3333
metric_to_compare: str = "accuracy",
3434
):
3535
"""Try to promote trained model.

huggingface-sagemaker/steps/training/model_trainer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ def model_trainer(
154154
eval_results = trainer.evaluate(metric_key_prefix="")
155155

156156
# Log the evaluation results in model control plane
157-
log_artifact_metadata(output_name="model", metrics=eval_results)
157+
log_artifact_metadata(
158+
artifact_name="model", metadata={"metrics": eval_results}
159+
)
158160

159161
return model, tokenizer
193 KB
Loading

llm-complete-guide/README.md

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ instructions are provided below for how to set that up.
2323

2424
## 📽️ Watch the webinars
2525

26-
We've recently been holding some webinars about this repository and project. Watche the videos below if you want an introduction and context around the code and ideas covered in this project.
26+
We've recently been holding some webinars about this repository and project. Watch the videos below if you want an introduction and context around the code and ideas covered in this project.
2727

2828
[![Building and Optimizing RAG Pipelines: Data Preprocessing, Embeddings, and Evaluation with ZenML](https://github.com/user-attachments/assets/1aea2bd4-8079-4ea2-98e1-8da6ba9aeebe)](https://www.youtube.com/watch?v=PazRMY8bo3U)
2929

@@ -45,7 +45,7 @@ pip install -r requirements.txt
4545

4646
Depending on your hardware you may run into some issues when running the `pip install` command with the
4747
`flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation`
48-
could help you.
48+
could help you. Possibly you might also need to install torch separately.
4949

5050
In order to use the default LLM for this query, you'll need an account and an
5151
API key from OpenAI specified as a ZenML secret:
@@ -57,9 +57,9 @@ export ZENML_PROJECT_SECRET_NAME=llm-complete
5757

5858
### Setting up Supabase
5959

60-
[Supabase](https://supabase.com/) is a cloud provider that provides a PostgreSQL
60+
[Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
6161
database. It's simple to use and has a free tier that should be sufficient for
62-
this project. Once you've created a Supabase account and organisation, you'll
62+
this project. Once you've created a Supabase account and organization, you'll
6363
need to create a new project.
6464

6565
![](.assets/supabase-create-project.png)
@@ -76,7 +76,7 @@ string from the Supabase dashboard.
7676

7777
![](.assets/supabase-connection-string.png)
7878

79-
In case supabase is not an option for you, you can use a different database as the backend.
79+
In case Supabase is not an option for you, you can use a different database as the backend.
8080

8181
### Running the RAG pipeline
8282

@@ -85,7 +85,7 @@ to run the pipelines in the correct order. You can run the script with the
8585
following command:
8686

8787
```shell
88-
python run.py --rag
88+
python run.py rag
8989
```
9090

9191
This will run the basic RAG pipeline, which scrapes the ZenML documentation and
@@ -100,7 +100,7 @@ use for the LLM.
100100
When you're ready to make the query, run the following command:
101101

102102
```shell
103-
python run.py --query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
103+
python run.py query "how do I use a custom materializer inside my own zenml steps? i.e. how do I set it? inside the @step decorator?" --model=gpt4
104104
```
105105

106106
Alternative options for LLMs to use include:
@@ -114,12 +114,57 @@ Note that Claude will require a different API key from Anthropic. See [the
114114
`litellm` docs](https://docs.litellm.ai/docs/providers/anthropic) on how to set
115115
this up.
116116

117+
### Deploying the RAG pipeline
118+
119+
![](.assets/huggingface-space-rag-deployment.png)
120+
121+
You'll need to update and add some secrets to make this work with your Hugging
122+
Face account. To get your ZenML service account API token and store URL, you can
123+
first create a new service account:
124+
125+
```bash
126+
zenml service-account create <SERVICE_ACCOUNT_NAME>
127+
```
128+
129+
For more information on this part of the process, please refer to the [ZenML
130+
documentation](https://docs.zenml.io/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account).
131+
132+
Once you have your service account API token and store URL (the URL of your
133+
deployed ZenML tenant), you can update the secrets with the following command:
134+
135+
```bash
136+
zenml secret update llm-complete --zenml_api_token=<YOUR_ZENML_SERVICE_ACCOUNT_API_TOKEN> --zenml_store_url=<YOUR_ZENML_STORE_URL>
137+
```
138+
139+
To set the Hugging Face user space that gets used for the Gradio app deployment,
140+
you should set an environment variable with the following command:
141+
142+
```bash
143+
export ZENML_HF_USERNAME=<YOUR_HF_USERNAME>
144+
export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-complete-guide-rag"
145+
```
146+
147+
To deploy the RAG pipeline, you can use the following command:
148+
149+
```shell
150+
python run.py --deploy
151+
```
152+
153+
Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
154+
155+
```shell
156+
python run.py --rag --deploy
157+
```
158+
159+
This will open a Hugging Face space in your browser where you can interact with
160+
the RAG pipeline.
161+
117162
### Run the LLM RAG evaluation pipeline
118163

119164
To run the evaluation pipeline, you can use the following command:
120165

121166
```shell
122-
python run.py --evaluation
167+
python run.py evaluation
123168
```
124169

125170
You'll need to have first run the RAG pipeline to have the necessary assets in
@@ -137,7 +182,7 @@ To run the `distilabel` synthetic data generation pipeline, you can use the foll
137182

138183
```shell
139184
pip install -r requirements-argilla.txt # special requirements
140-
python run.py --synthetic
185+
python run.py synthetic
141186
```
142187

143188
You will also need to have set up and connected to an Argilla instance for this
@@ -157,7 +202,6 @@ will need to change the hf repo urls to a space you have permissions to.
157202
zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
158203
```
159204

160-
161205
### Finetune the embeddings
162206

163207
As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this
@@ -177,7 +221,7 @@ commands:
177221

178222
```shell
179223
pip install -r requirements-argilla.txt # special requirements
180-
python run.py --embeddings
224+
python run.py embeddings
181225
```
182226

183227
*Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v0.68.1

llm-complete-guide/configs/embeddings.yaml renamed to llm-complete-guide/configs/dev/embeddings.yaml

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# environment configuration
44
settings:
55
docker:
6-
parent_image: "zenmldocker/prepare-release:base-0.68.0"
6+
python_package_installer: "uv"
77
requirements:
88
- langchain-community
99
- ratelimit
@@ -16,7 +16,7 @@ settings:
1616
- pandas
1717
- numpy
1818
- sentence-transformers>=3
19-
- transformers[torch]
19+
- transformers[torch]==4.43.1
2020
- litellm
2121
- ollama
2222
- tiktoken
@@ -26,14 +26,7 @@ settings:
2626
- rerankers[flashrank]
2727
- datasets
2828
- torch
29+
- pygithub
30+
- openai
2931
environment:
30-
ZENML_PROJECT_SECRET_NAME: llm_complete
31-
32-
33-
# configuration of the Model Control Plane
34-
model:
35-
name: finetuned-zenml-docs-embeddings
36-
version: latest
37-
license: Apache 2.0
38-
description: Finetuned LLM on ZenML docs
39-
tags: ["rag", "finetuned"]
32+
ZENML_PROJECT_SECRET_NAME: llm_complete

llm-complete-guide/configs/rag_local_dev.yaml renamed to llm-complete-guide/configs/dev/rag.yaml

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,17 @@ settings:
1414
- tiktoken
1515
- ratelimit
1616
- rerankers
17+
- pygithub
18+
- rerankers[flashrank]
19+
- matplotlib
20+
1721
environment:
1822
ZENML_PROJECT_SECRET_NAME: llm_complete
1923
ZENML_ENABLE_RICH_TRACEBACK: FALSE
2024
ZENML_LOGGING_VERBOSITY: INFO
21-
22-
23-
# configuration of the Model Control Plane
24-
model:
25-
name: finetuned-zenml-docs-embeddings
26-
license: Apache 2.0
27-
description: Finetuned LLM on ZenML docs
28-
tags: ["rag", "finetuned"]
29-
25+
python_package_installer: "uv"
3026
steps:
3127
url_scraper:
3228
parameters:
33-
docs_url: https://docs.zenml.io/stack-components/orchestrators
29+
docs_url: https://docs.zenml.io/
30+
use_dev_set: true

0 commit comments

Comments
 (0)