Skip to content

Commit ea591ac

Browse files
authored
Merge pull request #8 from stackhpc/fix/api-deployment-rollout
Default to 'recreate' strategy for API deployment updates
2 parents 657c71f + 4e52006 commit ea591ac

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

chart/values.yaml

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ huggingface:
77
# Use a yaml anchor to avoid duplication elsewhere
88
model: &model-name ise-uiuc/Magicoder-S-DS-6.7B
99

10-
# For private/gated huggingface models (e.g. Meta's Llama models)
10+
# For private/gated huggingface models (e.g. Meta's Llama models)
1111
# you must provide your own huggingface token, for details see:
1212
# https://huggingface.co/docs/hub/security-tokens
13-
13+
1414
# To do this, either provide the name of an existing secret on the cluster,
15-
# which should be created before installing this chart by running
15+
# which should be created before installing this chart by running
1616
# `kubectl create secret generic huggingface-token --from-env-file <file-name>`
1717
# where <file-name> is a file with the following contents:
1818
# HUGGING_FACE_HUB_TOKEN=<token-value>
@@ -27,7 +27,7 @@ api:
2727
image:
2828
repository: vllm/vllm-openai
2929
version: v0.2.7
30-
# Service config
30+
# Service config
3131
service:
3232
name: llm-backend
3333
type: ClusterIP
@@ -45,25 +45,23 @@ api:
4545
path: /tmp/llm/huggingface-cache
4646
# Number of gpus to requests for each api pod instance
4747
# NOTE: This must be in the range 1 <= value <= N, where
48-
# 'N' is the number of GPUs available in a single
48+
# 'N' is the number of GPUs available in a single
4949
# worker node on the target Kubernetes cluster.
5050
# NOTE: According to the vLLM docs found here
5151
# https://docs.vllm.ai/en/latest/serving/distributed_serving.html
52-
# distributed / multi-GPU support should be available, though it
52+
# distributed / multi-GPU support should be available, though it
5353
# has not been tested against this app.
5454
gpus: 1
5555
# The update strategy to use for the deployment
5656
# See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
5757
# NOTE: Changing this has implications for the number of additional GPU worker nodes required
5858
# to preform a rolling zero-downtime update
5959
updateStrategy:
60-
rollingUpdate:
61-
maxSurge: 0%
62-
maxUnavailable: 100%
60+
type: recreate
6361
# Extra args to supply to the vLLM backend, see
6462
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/api_server.py
6563
extraArgs: []
66-
64+
6765
# Configuration for the frontend web interface
6866
ui:
6967
# The file from the UI config map to execute as the entrypoint to the frontend app
@@ -77,7 +75,7 @@ ui:
7775
image:
7876
repository: ghcr.io/stackhpc/azimuth-llm-ui-base
7977
version: "984c499"
80-
# Service config
78+
# Service config
8179
service:
8280
name: web-app
8381
type: ClusterIP

0 commit comments

Comments
 (0)