@@ -15,7 +15,6 @@ huggingface:
1515 # repo files yet. This chart value provides a hook to manually apply the
1616 # correct chat template for such models.
1717 chatTemplate :
18-
1918 # For private/gated huggingface models (e.g. Meta's Llama models)
2019 # you must provide your own huggingface token, for details see:
2120 # https://huggingface.co/docs/hub/security-tokens
@@ -29,7 +28,6 @@ huggingface:
2928 # OR FOR TESTING PURPOSES ONLY, you can instead provide the secret directly
3029 # as a chart value here (if secretName is set above then it will take priority)
3130 token :
32-
3331# Configuration for the backend model serving API
3432api :
3533 # Container image config
5149 iconUrl : https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
5250 description : |
5351 The raw inference API endpoints for the deployed LLM.
54-
5552 # Config for huggingface model cache volume
5653 # This is mounted at /root/.cache/huggingface in the api deployment
5754 cacheVolume :
5855 hostPath :
5956 path : /tmp/llm/huggingface-cache
60-
6157 # Number of gpus to requests for each api pod instance
6258 # NOTE: This must be in the range 1 <= value <= N, where
6359 # 'N' is the number of GPUs available in a single
7369 # to preform a rolling zero-downtime update
7470 updateStrategy :
7571 type : Recreate
76-
7772 # The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
7873 # https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
7974 modelMaxContextLength :
80-
8175 # Extra args to supply to the vLLM backend, see
8276 # https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
8377 extraArgs : []
84-
8578# Configuration for the frontend web interface
8679ui :
8780 # Toggles installation of the gradio web UI
124117 rollingUpdate :
125118 maxSurge : 25%
126119 maxUnavailable : 25%
127-
128120# Settings for configuring ingress resources
129121# to make the UI and/or backend API accessible
130122# outside the cluster.
@@ -155,6 +147,5 @@ ingress:
155147 # Annotations to apply to the ingress resource
156148 # e.g. for cert-manager integration
157149 annotations :
158-
159150reloader :
160151 watchGlobally : false
0 commit comments