Skip to content

Commit dfe2f01

Browse files
committed
Enhance local deployment pipeline with chat interface integration and Kubernetes configuration updates
1 parent dd6ab06 commit dfe2f01

File tree

9 files changed

+369
-17
lines changed

9 files changed

+369
-17
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from steps.bento_builder import bento_builder
22
from steps.bento_deployment import bento_deployment
3+
from steps.visualize_chat import create_chat_interface
34
from zenml import pipeline
45

56

67
@pipeline(enable_cache=False)
78
def local_deployment():
89
bento = bento_builder()
910
bento_deployment(bento)
11+
create_chat_interface()
1012

1113
#vllm_model_deployer_step()

llm-complete-guide/run.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
rag_deployment,
5151
llm_index_and_evaluate,
5252
local_deployment,
53+
production_deployment,
5354
)
5455
from structures import Document
5556
from zenml.materializers.materializer_registry import materializer_registry
@@ -144,6 +145,12 @@
144145
default=None,
145146
help="Path to config",
146147
)
148+
@click.option(
149+
"--env",
150+
"env",
151+
default="local",
152+
help="The environment to use for the completion.",
153+
)
147154
def main(
148155
pipeline: str,
149156
query_text: Optional[str] = None,
@@ -154,6 +161,7 @@ def main(
154161
use_argilla: bool = False,
155162
use_reranker: bool = False,
156163
config: Optional[str] = None,
164+
env: str = "local",
157165
):
158166
"""Main entry point for the pipeline execution.
159167
@@ -167,6 +175,7 @@ def main(
167175
use_argilla (bool): If True, Argilla an notations will be used
168176
use_reranker (bool): If True, rerankers will be used
169177
config (Optional[str]): Path to config file
178+
env (str): The environment to use for the deployment (local, huggingface space, k8s etc.)
170179
"""
171180
pipeline_args = {"enable_cache": not no_cache}
172181
embeddings_finetune_args = {
@@ -259,9 +268,18 @@ def main(
259268
)()
260269

261270
elif pipeline == "deploy":
262-
#rag_deployment.with_options(model=zenml_model, **pipeline_args)()
263-
local_deployment.with_options(model=zenml_model, **pipeline_args)()
264-
271+
if env == "local":
272+
local_deployment.with_options(
273+
model=zenml_model, config_path=config_path, **pipeline_args
274+
)()
275+
elif env == "huggingface":
276+
rag_deployment.with_options(
277+
model=zenml_model, config_path=config_path, **pipeline_args
278+
)()
279+
elif env == "k8s":
280+
production_deployment.with_options(
281+
model=zenml_model, config_path=config_path, **pipeline_args
282+
)()
265283
elif pipeline == "evaluation":
266284
pipeline_args["enable_cache"] = False
267285
llm_eval.with_options(model=zenml_model, config_path=config_path)()

llm-complete-guide/service.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@
2626
"timeout": 300,
2727
"concurrency": 256,
2828
},
29+
http={
30+
"cors": {
31+
"enabled": True,
32+
"access_control_allow_origins": ["https://cloud.zenml.io"], # Add your allowed origins
33+
"access_control_allow_methods": ["GET", "OPTIONS", "POST", "HEAD", "PUT"],
34+
"access_control_allow_credentials": True,
35+
"access_control_allow_headers": ["*"],
36+
# "access_control_allow_origin_regex": "https://.*\.my_org\.com", # Optional regex
37+
"access_control_max_age": 1200,
38+
"access_control_expose_headers": ["Content-Length"],
39+
}
40+
}
2941
)
3042
class RAGService:
3143
"""RAG service for generating responses using LLM and RAG."""

llm-complete-guide/steps/bento_builder.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
)
3232
from zenml.integrations.bentoml.steps import bento_builder_step
3333
from zenml.logger import get_logger
34+
from zenml.orchestrators.utils import get_config_environment_vars
3435
from zenml.utils import source_utils
3536

3637
logger = get_logger(__name__)
@@ -64,6 +65,7 @@ def bento_builder() -> (
6465
if Client().active_stack.orchestrator.flavor == "local":
6566
model = get_step_context().model
6667
version_to_deploy = Model(name=model.name, version="production")
68+
logger.info(f"Building BentoML bundle for model: {version_to_deploy.name}")
6769
# Build the BentoML bundle
6870
bento = bentos.build(
6971
service="service.py:RAGService",

llm-complete-guide/steps/bento_dockerizer.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
logger = get_logger(__name__)
3030

31-
@step
31+
@step(enable_cache=False)
3232
def bento_dockerizer() -> (
3333
Annotated[
3434
str,
@@ -40,12 +40,11 @@ def bento_dockerizer() -> (
4040
This step is responsible for dockerizing the BentoML model.
4141
"""
4242
### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
43+
zenml_client = Client()
4344
model = get_step_context().model
44-
version_to_deploy = Model(name=model.name, version="production")
45-
bentoml_deployment = version_to_deploy.get_model_artifact(name="bentoml_rag_deployment")
45+
version_to_deploy = Model(name=model.name)
46+
bentoml_deployment = zenml_client.get_artifact_version(name_id_or_prefix="bentoml_rag_deployment")
4647
bento_tag = f'{bentoml_deployment.run_metadata["bento_tag_name"]}:{bentoml_deployment.run_metadata["bento_info_version"]}'
47-
48-
zenml_client = Client()
4948
container_registry = zenml_client.active_stack.container_registry
5049
assert container_registry, "Container registry is not configured."
5150
image_name = f"{container_registry.config.uri}/{bento_tag}"

llm-complete-guide/steps/k8s_deployment.py

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,21 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
1212
# or implied. See the License for the specific language governing
1313
# permissions and limitations under the License.
14-
from pathlib import Path
15-
from typing import Dict, Optional
1614
import re
15+
from pathlib import Path
16+
from typing import Dict, Optional, cast
17+
1718
import yaml
1819
from kubernetes import client, config
1920
from kubernetes.client.rest import ApiException
2021
from zenml import get_step_context, step
2122
from zenml.client import Client
23+
from zenml.integrations.bentoml.services.bentoml_local_deployment import (
24+
BentoMLLocalDeploymentConfig,
25+
BentoMLLocalDeploymentService,
26+
)
2227
from zenml.logger import get_logger
28+
from zenml.orchestrators.utils import get_config_environment_vars
2329

2430
logger = get_logger(__name__)
2531

@@ -93,7 +99,7 @@ def apply_kubernetes_configuration(k8s_configs: list) -> None:
9399
logger.error(f"Error applying {kind} {name}: {e}")
94100
raise e
95101

96-
@step
102+
@step(enable_cache=False)
97103
def k8s_deployment(
98104
docker_image_tag: str,
99105
namespace: str = "default"
@@ -103,6 +109,17 @@ def k8s_deployment(
103109
# Sanitize the model name
104110
model_name = sanitize_name(raw_model_name)
105111

112+
# Get environment variables
113+
environment_vars = get_config_environment_vars()
114+
115+
# Get current deployment
116+
zenml_client = Client()
117+
model_deployer = zenml_client.active_stack.model_deployer
118+
services = model_deployer.find_model_server(
119+
model_name=model_name,
120+
model_version="production",
121+
)
122+
106123
# Read the K8s template
107124
template_path = Path(__file__).parent / "k8s_template.yaml"
108125
with open(template_path, "r") as f:
@@ -120,6 +137,23 @@ def k8s_deployment(
120137
if config["kind"] == "Service":
121138
# Update service selector
122139
config["spec"]["selector"]["app"] = model_name
140+
141+
# Update metadata annotations with SSL certificate ARN
142+
config["metadata"]["annotations"] = {
143+
"service.beta.kubernetes.io/aws-load-balancer-ssl-cert": "arn:aws:acm:eu-central-1:339712793861:certificate/0426ace8-5fa3-40dd-bd81-b0fb1064bd85",
144+
"service.beta.kubernetes.io/aws-load-balancer-backend-protocol": "http",
145+
"service.beta.kubernetes.io/aws-load-balancer-ssl-ports": "443",
146+
"service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout": "3600"
147+
}
148+
149+
# Update ports
150+
config["spec"]["ports"] = [
151+
{
152+
"name": "https",
153+
"port": 443,
154+
"targetPort": 3000
155+
}
156+
]
123157

124158
elif config["kind"] == "Deployment":
125159
# Update deployment selector and template
@@ -131,6 +165,12 @@ def k8s_deployment(
131165
for container in containers:
132166
container["name"] = model_name
133167
container["image"] = docker_image_tag
168+
169+
# Add environment variables to the container
170+
env_vars = []
171+
for key, value in environment_vars.items():
172+
env_vars.append({"name": key, "value": value})
173+
container["env"] = env_vars
134174

135175
# Apply the configurations
136176
try:
@@ -149,9 +189,22 @@ def k8s_deployment(
149189
"namespace": namespace,
150190
"status": deployment_status,
151191
"service_port": 3000,
152-
"configurations": k8s_configs
192+
"configurations": k8s_configs,
193+
"url": "chat-rag.staging.cloudinfra.zenml.io"
153194
}
154195

196+
if services:
197+
bentoml_deployment= cast(BentoMLLocalDeploymentService, services[0])
198+
zenml_client.update_service(
199+
id=bentoml_deployment.uuid,
200+
prediction_url="https://chat-rag.staging.cloudinfra.zenml.io",
201+
health_check_url="https://chat-rag.staging.cloudinfra.zenml.io/healthz",
202+
labels={
203+
"docker_image": docker_image_tag,
204+
"namespace": namespace,
205+
}
206+
)
207+
155208
return deployment_info
156209

157210

llm-complete-guide/k8s_template.yaml renamed to llm-complete-guide/steps/k8s_template.yaml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
apiVersion: v1
22
kind: Service
33
metadata:
4+
name: placeholder
45
labels:
56
app: placeholder
6-
name: placeholder
7+
annotations:
8+
service.beta.kubernetes.io/aws-load-balancer-ssl-cert: arn:aws:acm:region:account-id:certificate/certificate-id
9+
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
10+
service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "443"
11+
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
712
spec:
8-
ports:
9-
- name: http # Changed from 'predict' to 'http' for clarity
10-
port: 80 # External port exposed by LoadBalancer
11-
targetPort: 3000 # Internal container port
1213
selector:
1314
app: placeholder
1415
type: LoadBalancer
16+
ports:
17+
- name: https
18+
port: 443 # External port exposed by LoadBalancer (HTTPS)
19+
targetPort: 3000 # Internal container port
1520
---
1621
apiVersion: apps/v1
1722
kind: Deployment

0 commit comments

Comments
 (0)