@@ -15,7 +15,6 @@ huggingface:
15
15
# repo files yet. This chart value provides a hook to manually apply the
16
16
# correct chat template for such models.
17
17
chatTemplate :
18
-
19
18
# For private/gated huggingface models (e.g. Meta's Llama models)
20
19
# you must provide your own huggingface token, for details see:
21
20
# https://huggingface.co/docs/hub/security-tokens
@@ -29,7 +28,6 @@ huggingface:
29
28
# OR FOR TESTING PURPOSES ONLY, you can instead provide the secret directly
30
29
# as a chart value here (if secretName is set above then it will take priority)
31
30
token :
32
-
33
31
# Configuration for the backend model serving API
34
32
api :
35
33
# Container image config
51
49
iconUrl : https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
52
50
description : |
53
51
The raw inference API endpoints for the deployed LLM.
54
-
55
52
# Config for huggingface model cache volume
56
53
# This is mounted at /root/.cache/huggingface in the api deployment
57
54
cacheVolume :
58
55
hostPath :
59
56
path : /tmp/llm/huggingface-cache
60
-
61
57
# Number of gpus to requests for each api pod instance
62
58
# NOTE: This must be in the range 1 <= value <= N, where
63
59
# 'N' is the number of GPUs available in a single
73
69
# to preform a rolling zero-downtime update
74
70
updateStrategy :
75
71
type : Recreate
76
-
77
72
# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
78
73
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
79
74
modelMaxContextLength :
80
-
81
75
# Extra args to supply to the vLLM backend, see
82
76
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
83
77
extraArgs : []
84
-
85
78
# Configuration for the frontend web interface
86
79
ui :
87
80
# Toggles installation of the gradio web UI
124
117
rollingUpdate :
125
118
maxSurge : 25%
126
119
maxUnavailable : 25%
127
-
128
120
# Settings for configuring ingress resources
129
121
# to make the UI and/or backend API accessible
130
122
# outside the cluster.
@@ -155,6 +147,5 @@ ingress:
155
147
# Annotations to apply to the ingress resource
156
148
# e.g. for cert-manager integration
157
149
annotations :
158
-
159
150
reloader :
160
151
watchGlobally : false
0 commit comments