Skip to content

Commit 1c31571

Browse files
authored
Merge branch 'main' into example/vllm-model-deployer
2 parents 97df909 + b20d5af commit 1c31571

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1868
-332
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: Production LLM-COMPLETE
2+
on:
3+
push:
4+
branches:
5+
- main
6+
paths:
7+
- 'llm-complete-guide/**'
8+
concurrency:
9+
# New commit on branch cancels running workflows of the same branch
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
run-staging-workflow:
15+
runs-on: ubuntu-latest
16+
env:
17+
ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }}
18+
ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
19+
ZENML_PRODUCTION_STACK: b3951d43-0fb2-4d32-89c5-3399374e7c7e # Set this to your production stack ID
20+
ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
21+
ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
22+
ZENML_DEBUG: true
23+
ZENML_ANALYTICS_OPT_IN: false
24+
ZENML_LOGGING_VERBOSITY: INFO
25+
ZENML_PROJECT_SECRET_NAME: llm-complete
26+
ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
27+
ZENML_EVENT_SOURCE_ID: ae6ae536-d811-4838-a44b-744b768a0f31 # Set this to your preferred event source ID
28+
ZENML_SERVICE_ACCOUNT_ID: fef76af2-382f-4ab2-9e6b-5eb85a303f0e # Set this to your service account ID or delete
29+
30+
steps:
31+
- name: Check out repository code
32+
uses: actions/checkout@v3
33+
34+
- uses: actions/setup-python@v4
35+
with:
36+
python-version: '3.11'
37+
38+
- name: Install requirements
39+
working-directory: ./llm-complete-guide
40+
run: |
41+
pip3 install -r requirements.txt
42+
pip3 install -r requirements-argilla.txt
43+
zenml integration install gcp -y
44+
45+
- name: Connect to ZenML server
46+
working-directory: ./llm-complete-guide
47+
run: |
48+
zenml init
49+
zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
50+
51+
- name: Set stack (Production)
52+
working-directory: ./llm-complete-guide
53+
run: |
54+
zenml stack set ${{ env.ZENML_PRODUCTION_STACK }}
55+
56+
- name: Run pipeline, create pipeline, configure trigger (Production)
57+
working-directory: ./llm-complete-guide
58+
run: |
59+
python gh_action_rag.py --no-cache --create-template ----event-source-id --service-account-id ${{ env.ZENML_SERVICE_ACCOUNT_ID }} --action-id ${{ env.ZENML_ACTION_ID }} --config rag_gcp.yaml
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: Staging Trigger LLM-COMPLETE
2+
on:
3+
pull_request:
4+
types: [opened, synchronize]
5+
branches: [staging, main]
6+
concurrency:
7+
# New commit on branch cancels running workflows of the same branch
8+
group: ${{ github.workflow }}-${{ github.ref }}
9+
cancel-in-progress: true
10+
11+
jobs:
12+
run-staging-workflow:
13+
runs-on: ubuntu-latest
14+
env:
15+
ZENML_HOST: ${{ secrets.ZENML_PROJECTS_HOST }}
16+
ZENML_API_KEY: ${{ secrets.ZENML_PROJECTS_API_KEY }}
17+
ZENML_STAGING_STACK : 67166d73-a44e-42f9-b67f-011e9afab9b5 # Set this to your staging stack ID
18+
ZENML_GITHUB_SHA: ${{ github.event.pull_request.head.sha }}
19+
ZENML_GITHUB_URL_PR: ${{ github.event.pull_request._links.html.href }}
20+
ZENML_DEBUG: true
21+
ZENML_ANALYTICS_OPT_IN: false
22+
ZENML_LOGGING_VERBOSITY: INFO
23+
ZENML_PROJECT_SECRET_NAME: llm-complete
24+
ZENML_DISABLE_CLIENT_SERVER_MISMATCH_WARNING: True
25+
26+
steps:
27+
- name: Check out repository code
28+
uses: actions/checkout@v3
29+
30+
- uses: actions/setup-python@v4
31+
with:
32+
python-version: '3.11'
33+
34+
- name: Install requirements
35+
working-directory: ./llm-complete-guide
36+
run: |
37+
pip3 install -r requirements.txt
38+
pip3 install -r requirements-argilla.txt
39+
zenml integration install aws s3 -y
40+
41+
- name: Connect to ZenML server
42+
working-directory: ./llm-complete-guide
43+
run: |
44+
zenml init
45+
zenml connect --url $ZENML_HOST --api-key $ZENML_API_KEY
46+
47+
- name: Set stack (Staging)
48+
working-directory: ./llm-complete-guide
49+
run: |
50+
zenml stack set ${{ env.ZENML_STAGING_STACK }}
51+
52+
- name: Run pipeline (Staging)
53+
working-directory: ./llm-complete-guide
54+
run: |
55+
python gh_action_rag.py --no-cache --config rag_local_dev.yaml

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ llm-lora-finetuning/configs/shopify.yaml
162162
finetuned-matryoshka/
163163
finetuned-all-MiniLM-L6-v2/
164164
finetuned-snowflake-arctic-embed-m/
165+
finetuned-snowflake-arctic-embed-m-v1.5/
166+
.gradio/
165167

166168
# ollama ignores
167169
nohup.out

huggingface-sagemaker/steps/deploying/sagemaker_deployment.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@
1515
# limitations under the License.
1616
#
1717

18+
import os
1819
from typing import Optional
1920

2021
from gradio.aws_helper import get_sagemaker_role, get_sagemaker_session
2122
from sagemaker.huggingface import HuggingFaceModel
2223
from typing_extensions import Annotated
23-
from zenml import get_step_context, step
24+
from zenml import get_step_context, log_artifact_metadata, step
2425
from zenml.logger import get_logger
2526

2627
# Initialize logger
@@ -35,7 +36,7 @@ def deploy_hf_to_sagemaker(
3536
pytorch_version: str = "1.13.1",
3637
py_version: str = "py39",
3738
hf_task: str = "text-classification",
38-
instance_type: str = "ml.g5.2xlarge",
39+
instance_type: str = "ml.t2.medium",
3940
container_startup_health_check_timeout: int = 300,
4041
) -> Annotated[str, "sagemaker_endpoint_name"]:
4142
"""
@@ -83,4 +84,18 @@ def deploy_hf_to_sagemaker(
8384
)
8485
endpoint_name = predictor.endpoint_name
8586
logger.info(f"Model deployed to SageMaker: {endpoint_name}")
87+
88+
# get region from env variable
89+
region = os.environ["AWS_REGION"] or "eu-central-1"
90+
invocation_url = f"https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{endpoint_name}/invocations"
91+
92+
log_artifact_metadata(
93+
artifact_name="sagemaker_endpoint_name",
94+
metadata={
95+
"invocation_url": invocation_url,
96+
"endpoint_name": endpoint_name,
97+
},
98+
)
99+
100+
86101
return endpoint_name

huggingface-sagemaker/steps/promotion/promote_metric_compare_promoter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828

2929
@step
3030
def promote_metric_compare_promoter(
31-
latest_metrics: Dict[str, str],
32-
current_metrics: Dict[str, str],
31+
latest_metrics: Dict[str, float],
32+
current_metrics: Dict[str, float],
3333
metric_to_compare: str = "accuracy",
3434
):
3535
"""Try to promote trained model.

huggingface-sagemaker/steps/training/model_trainer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ def model_trainer(
154154
eval_results = trainer.evaluate(metric_key_prefix="")
155155

156156
# Log the evaluation results in model control plane
157-
log_artifact_metadata(output_name="model", metrics=eval_results)
157+
log_artifact_metadata(
158+
artifact_name="model", metadata={"metrics": eval_results}
159+
)
158160

159161
return model, tokenizer
37.7 KB
Loading
193 KB
Loading

llm-complete-guide/README.md

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,23 @@ environment and install the dependencies using the following command:
4343
pip install -r requirements.txt
4444
```
4545

46+
Depending on your hardware you may run into some issues when running the `pip install` command with the
47+
`flash_attn` package. In that case running `FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE pip install flash-attn --no-build-isolation`
48+
could help you.
49+
4650
In order to use the default LLM for this query, you'll need an account and an
47-
API key from OpenAI specified as another environment variable:
51+
API key from OpenAI specified as a ZenML secret:
4852

4953
```shell
50-
export OPENAI_API_KEY=<your-openai-api-key>
54+
zenml secret create llm-complete --openai_api_key=<your-openai-api-key>
55+
export ZENML_PROJECT_SECRET_NAME=llm-complete
5156
```
5257

5358
### Setting up Supabase
5459

55-
[Supabase](https://supabase.com/) is a cloud provider that provides a PostgreSQL
60+
[Supabase](https://supabase.com/) is a cloud provider that offers a PostgreSQL
5661
database. It's simple to use and has a free tier that should be sufficient for
57-
this project. Once you've created a Supabase account and organisation, you'll
62+
this project. Once you've created a Supabase account and organization, you'll
5863
need to create a new project.
5964

6065
![](.assets/supabase-create-project.png)
@@ -63,22 +68,15 @@ You'll want to save the Supabase database password as a ZenML secret so that it
6368
isn't stored in plaintext. You can do this by running the following command:
6469

6570
```shell
66-
zenml secret create supabase_postgres_db --password="YOUR_PASSWORD"
71+
zenml secret update llm-complete -v '{"supabase_password": "YOUR_PASSWORD", "supabase_user": "YOUR_USER", "supabase_host": "YOUR_HOST", "supabase_port": "YOUR_PORT"}'
6772
```
6873

69-
You'll then want to connect to this database instance by getting the connection
74+
You can get the user, host and port for this database instance by getting the connection
7075
string from the Supabase dashboard.
7176

7277
![](.assets/supabase-connection-string.png)
7378

74-
You can use these details to populate some environment variables where the
75-
pipeline code expects them:
76-
77-
```shell
78-
export ZENML_POSTGRES_USER=<your-supabase-user>
79-
export ZENML_POSTGRES_HOST=<your-supabase-host>
80-
export ZENML_POSTGRES_PORT=<your-supabase-port>
81-
```
79+
In case Supabase is not an option for you, you can use a different database as the backend.
8280

8381
### Running the RAG pipeline
8482

@@ -116,6 +114,51 @@ Note that Claude will require a different API key from Anthropic. See [the
116114
`litellm` docs](https://docs.litellm.ai/docs/providers/anthropic) on how to set
117115
this up.
118116

117+
### Deploying the RAG pipeline
118+
119+
![](.assets/huggingface-space-rag-deployment.png)
120+
121+
You'll need to update and add some secrets to make this work with your Hugging
122+
Face account. To get your ZenML service account API token and store URL, you can
123+
first create a new service account:
124+
125+
```bash
126+
zenml service-account create <SERVICE_ACCOUNT_NAME>
127+
```
128+
129+
For more information on this part of the process, please refer to the [ZenML
130+
documentation](https://docs.zenml.io/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account).
131+
132+
Once you have your service account API token and store URL (the URL of your
133+
deployed ZenML tenant), you can update the secrets with the following command:
134+
135+
```bash
136+
zenml secret update llm-complete --zenml_api_token=<YOUR_ZENML_SERVICE_ACCOUNT_API_TOKEN> --zenml_store_url=<YOUR_ZENML_STORE_URL>
137+
```
138+
139+
To set the Hugging Face user space that gets used for the Gradio app deployment,
140+
you should set an environment variable with the following command:
141+
142+
```bash
143+
export ZENML_HF_USERNAME=<YOUR_HF_USERNAME>
144+
export ZENML_HF_SPACE_NAME=<YOUR_HF_SPACE_NAME> # optional, defaults to "llm-complete-guide-rag"
145+
```
146+
147+
To deploy the RAG pipeline, you can use the following command:
148+
149+
```shell
150+
python run.py --deploy
151+
```
152+
153+
Alternatively, you can run the basic RAG pipeline *and* deploy it in one go:
154+
155+
```shell
156+
python run.py --rag --deploy
157+
```
158+
159+
This will open a Hugging Face space in your browser where you can interact with
160+
the RAG pipeline.
161+
119162
### Run the LLM RAG evaluation pipeline
120163

121164
To run the evaluation pipeline, you can use the following command:
@@ -151,16 +194,16 @@ documentation](https://docs.zenml.io/v/docs/stack-components/annotators/argilla)
151194
will guide you through the process of connecting to your instance as a stack
152195
component.
153196

154-
### Finetune the embeddings
197+
Please use the secret from above to track all the secrets. Here we are also
198+
setting a Huggingface write key. In order to make the rest of the pipeline work for you, you
199+
will need to change the hf repo urls to a space you have permissions to.
155200

156-
To run the pipeline for finetuning the embeddings, you can use the following
157-
commands:
158-
159-
```shell
160-
pip install -r requirements-argilla.txt # special requirements
161-
python run.py --embeddings
201+
```bash
202+
zenml secret update llm-complete -v '{"argilla_api_key": "YOUR_ARGILLA_API_KEY", "argilla_api_url": "YOUR_ARGILLA_API_URL", "hf_token": "YOUR_HF_TOKEN"}'
162203
```
163204

205+
### Finetune the embeddings
206+
164207
As with the previous pipeline, you will need to have set up and connected to an Argilla instance for this
165208
to work. Please follow the instructions in the [Argilla
166209
documentation](https://docs.argilla.io/latest/getting_started/quickstart/)
@@ -170,6 +213,17 @@ documentation](https://docs.zenml.io/v/docs/stack-components/annotators/argilla)
170213
will guide you through the process of connecting to your instance as a stack
171214
component.
172215

216+
The pipeline assumes that your argilla secret is stored within a ZenML secret called `argilla_secrets`.
217+
![Argilla Secret](.assets/argilla_secret.png)
218+
219+
To run the pipeline for finetuning the embeddings, you can use the following
220+
commands:
221+
222+
```shell
223+
pip install -r requirements-argilla.txt # special requirements
224+
python run.py --embeddings
225+
```
226+
173227
*Credit to Phil Schmid for his [tutorial on embeddings finetuning with Matryoshka
174228
loss function](https://www.philschmid.de/fine-tune-embedding-model-for-rag) which we adapted for this project.*
175229

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# enable_cache: False
2+
3+
# environment configuration
4+
settings:
5+
docker:
6+
parent_image: "zenmldocker/prepare-release:base-0.68.0"
7+
requirements:
8+
- langchain-community
9+
- ratelimit
10+
- langchain>=0.0.325
11+
- langchain-openai
12+
- pgvector
13+
- psycopg2-binary
14+
- beautifulsoup4
15+
- unstructured
16+
- pandas
17+
- numpy
18+
- sentence-transformers>=3
19+
- transformers[torch]==4.43.1
20+
- litellm
21+
- ollama
22+
- tiktoken
23+
- umap-learn
24+
- matplotlib
25+
- pyarrow
26+
- rerankers[flashrank]
27+
- datasets
28+
- torch
29+
- pygithub
30+
environment:
31+
ZENML_PROJECT_SECRET_NAME: llm_complete
32+
33+
34+
# configuration of the Model Control Plane
35+
model:
36+
name: finetuned-zenml-docs-embeddings
37+
version: latest
38+
license: Apache 2.0
39+
description: Finetuned LLM on ZenML docs
40+
tags: ["rag", "finetuned"]

0 commit comments

Comments
 (0)