Skip to content

Commit 695ebbd

Browse files
committed
Add vllm deployment example
1 parent d3759d4 commit 695ebbd

File tree

9 files changed

+298
-0
lines changed

9 files changed

+298
-0
lines changed

llm-vllm-deployer/LICENSE

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Apache Software License 2.0
2+
3+
Copyright (c) ZenML GmbH 2024. All rights reserved.
4+
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use this file except in compliance with the License.
7+
You may obtain a copy of the License at
8+
9+
http://www.apache.org/licenses/LICENSE-2.0
10+
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.

llm-vllm-deployer/README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# ☮️ Deploying open source LLMs using MLOps pipelines with vLLM
2+
3+
Welcome to your newly generated "ZenML LLM vLLM deployment project" project! This is
4+
a great way to get hands-on with ZenML using production-like template.
5+
The project contains a collection of ZenML steps, pipelines and other artifacts
6+
and useful resources that can serve as a solid starting point for deploying open-source LLMs using ZenML.
7+
8+
Using these pipelines, we can run the data-preparation and model finetuning with a single command while using YAML files for [configuration](https://docs.zenml.io/user-guide/production-guide/configure-pipeline) and letting ZenML take care of tracking our metadata and [containerizing our pipelines](https://docs.zenml.io/how-to/customize-docker-builds).
9+
10+
<TODO: Add image from ZenML Cloud for pipeline here>
11+
12+
## 🏃 How to run
13+
14+
In this project, we will deploy the [gpt-2](https://huggingface.co/openai-community/gpt2) model using [vLLM](https://github.com/vllm-project/vllm). Before we're able to run any pipeline, we need to set up our environment as follows:
15+
16+
```bash
17+
# Set up a Python virtual environment, if you haven't already
18+
python3 -m venv .venv
19+
source .venv/bin/activate
20+
21+
# Install requirements
22+
pip install -r requirements.txt
23+
```
24+
25+
Run the deployment pipeline
26+
27+
```bash
28+
python run.py
29+
```
30+
31+
## 📜 Project Structure
32+
33+
The project loosely follows [the recommended ZenML project structure](https://docs.zenml.io/how-to/setting-up-a-project-repository/best-practices):
34+
35+
```
36+
.
37+
├── configs # pipeline configuration files
38+
│ ├── default_vllm_deploy.yaml # default local or remote orchestrator configuration
39+
├── pipelines # `zenml.pipeline` implementations
40+
│ └── deploy_pipeline.py # vllm deployment pipeline
41+
├── steps # logically grouped `zenml.steps` implementations
42+
│ ├── vllm_deployer.py # deploy model using vllm
43+
├── README.md # this file
44+
├── requirements.txt # extra Python dependencies
45+
└── run.py # CLI tool to run pipelines on ZenML Stack
46+
```
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
model:
2+
name: gpt2
3+
description: "Deploy `openai-community/gpt2` using vllm."
4+
tags:
5+
- llm
6+
- vllm
7+
- openai-community/gpt2
8+
9+
steps:
10+
vllm_model_deployer_step:
11+
parameters:
12+
config:
13+
model: openai-community/gpt2

llm-vllm-deployer/pipelines/__init__.py

Whitespace-only changes.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Apache Software License 2.0
2+
#
3+
# Copyright (c) ZenML GmbH 2024. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
from zenml import pipeline
19+
from zenml import Model
20+
from typing import Annotated
21+
from steps.vllm_deployer import vllm_model_deployer_step
22+
from zenml.integrations.vllm.services.vllm_deployment import VLLMDeploymentService
23+
24+
25+
@pipeline(model=Model(name="gpt2"))
26+
def deploy_vllm_pipeline(
27+
model: str = "gpt2",
28+
timeout: int = 1200,
29+
) -> Annotated[VLLMDeploymentService, "GPT2"]:
30+
service = vllm_model_deployer_step(
31+
model=model,
32+
timeout=timeout,
33+
)
34+
return service

llm-vllm-deployer/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zenml>=0.66.0

llm-vllm-deployer/run.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Apache Software License 2.0
2+
#
3+
# Copyright (c) ZenML GmbH 2024. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
import os
19+
from typing import Optional
20+
21+
import click
22+
23+
24+
@click.command(
25+
help="""
26+
ZenML LLM VLLM deployer project CLI.
27+
28+
Run the ZenML LLM Finetuning project LLM PEFT finetuning pipelines.
29+
30+
Examples:
31+
32+
\b
33+
# Run the pipeline
34+
python run.py
35+
36+
\b
37+
# Run the pipeline with custom config
38+
python run.py --config default_vllm_deploy.yaml
39+
"""
40+
)
41+
@click.option(
42+
"--config",
43+
type=str,
44+
default="default_vllm_deploy.yaml",
45+
help="Path to the YAML config file.",
46+
)
47+
@click.option(
48+
"--no-cache",
49+
is_flag=True,
50+
default=False,
51+
help="Disable caching for the pipeline run.",
52+
)
53+
def main(
54+
config: Optional[str] = None,
55+
no_cache: bool = False,
56+
):
57+
"""Main entry point for the pipeline execution.
58+
59+
Args:
60+
config: Path to the YAML config file.
61+
no_cache: If `True` cache will be disabled.
62+
"""
63+
config_folder = os.path.join(
64+
os.path.dirname(os.path.realpath(__file__)),
65+
"configs",
66+
)
67+
pipeline_args = {"enable_cache": not no_cache}
68+
if not config:
69+
raise RuntimeError("Config file is required to run a pipeline.")
70+
71+
pipeline_args["config_path"] = os.path.join(config_folder, config)
72+
73+
from pipelines.deploy_pipeline import deploy_vllm_pipeline
74+
deploy_vllm_pipeline.with_options(**pipeline_args)()
75+
76+
77+
if __name__ == "__main__":
78+
main()

llm-vllm-deployer/steps/__init__.py

Whitespace-only changes.
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# Copyright (c) ZenML GmbH 2024. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at:
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
# or implied. See the License for the specific language governing
13+
# permissions and limitations under the License.
14+
"""Implementation of the vllm model deployer pipeline step."""
15+
16+
from typing import Optional, cast
17+
18+
from zenml import get_step_context, step
19+
from zenml.integrations.vllm.model_deployers.vllm_model_deployer import (
20+
VLLMModelDeployer,
21+
)
22+
from zenml.integrations.vllm.services.vllm_deployment import (
23+
VLLMDeploymentService,
24+
VLLMServiceConfig,
25+
)
26+
from zenml.logger import get_logger
27+
28+
logger = get_logger(__name__)
29+
30+
31+
@step(enable_cache=False)
32+
def vllm_model_deployer_step(
33+
model: str,
34+
tokenizer: Optional[str] = None,
35+
timeout: int = 1200,
36+
deploy_decision: bool = True,
37+
) -> VLLMDeploymentService:
38+
"""Model deployer pipeline step for vLLM.
39+
40+
This step deploys a given Bento to a local vLLM http prediction server.
41+
42+
Args:
43+
model: Name or path to huggingface model
44+
tokenizer: Name or path of the huggingface tokenizer to use.
45+
If unspecified, model name or path will be used.
46+
timeout: the number of seconds to wait for the service to start/stop.
47+
deploy_decision: whether to deploy the model or not
48+
49+
Returns:
50+
vLLM deployment service
51+
"""
52+
# get the current active model deployer
53+
model_deployer = cast(
54+
VLLMModelDeployer, VLLMModelDeployer.get_active_model_deployer()
55+
)
56+
57+
# get pipeline name, step name and run id
58+
step_context = get_step_context()
59+
pipeline_name = step_context.pipeline.name
60+
step_name = step_context.step_run.name
61+
62+
# create a config for the new model service
63+
predictor_cfg = VLLMServiceConfig(
64+
model=model,
65+
tokenizer=tokenizer,
66+
model_name="default", # Required for ServiceConfig
67+
)
68+
69+
# update the step configuration with the real pipeline runtime information
70+
predictor_cfg = predictor_cfg.model_copy()
71+
predictor_cfg.pipeline_name = pipeline_name
72+
predictor_cfg.pipeline_step_name = step_name
73+
74+
# fetch existing services with same pipeline name, step name and model name
75+
existing_services = model_deployer.find_model_server(
76+
config=predictor_cfg.model_dump(),
77+
service_type=VLLMDeploymentService.SERVICE_TYPE,
78+
)
79+
80+
# Creating a new service with inactive state and status by default
81+
if existing_services:
82+
service = cast(VLLMDeploymentService, existing_services[0])
83+
84+
if not deploy_decision and existing_services:
85+
logger.info(
86+
f"Skipping model deployment because the model quality does not "
87+
f"meet the criteria. Reusing last model server deployed by step "
88+
f"'{step_name}' and pipeline '{pipeline_name}' for model "
89+
f"'{model}'..."
90+
)
91+
if not service.is_running:
92+
service.start(timeout=timeout)
93+
return service
94+
95+
# create a new model deployment and replace an old one if it exists
96+
new_service = cast(
97+
VLLMDeploymentService,
98+
model_deployer.deploy_model(
99+
replace=True,
100+
config=predictor_cfg,
101+
timeout=timeout,
102+
service_type=VLLMDeploymentService.SERVICE_TYPE,
103+
),
104+
)
105+
106+
logger.info(
107+
f"VLLM deployment service started and reachable at:\n"
108+
f" {new_service.prediction_url}\n"
109+
)
110+
111+
return new_service

0 commit comments

Comments
 (0)