Skip to content

Commit 311b3a4

Browse files
authored
Merge pull request #31 from stackhpc/customisation-options
Additional customisation options for Azimuth app
2 parents 42ed6f6 + 76e0833 commit 311b3a4

File tree

5 files changed

+48
-2
lines changed

5 files changed

+48
-2
lines changed

Tiltfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Toggles whether UI should be run locally using gradio hot-reloading
22
# or should be included in the remote Helm install
3-
run_ui_locally = True
3+
run_ui_locally = os.getenv("AZIMUTH_LLM_TILT_LOCAL_UI", True)
44

55
# Allow non-local contexts
66
allow_k8s_contexts(k8s_context())

chart/azimuth-ui.schema.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,25 @@ controls:
1010
type: MirrorControl
1111
path: /huggingface/model
1212
visuallyHidden: true
13+
# Azimuth UI doesn't handle json type ["integer","null"]
14+
# properly so we allow any type in JSON schema then
15+
# constrain to (optional) integer here.
16+
/api/modelMaxContextLength:
17+
type: IntegerControl
18+
minimum: 100
19+
step: 100
20+
required: false
1321

1422
sortOrder:
1523
- /huggingface/model
1624
- /huggingface/token
1725
- /ui/appSettings/hf_model_instruction
1826
- /ui/appSettings/page_title
27+
- /api/image/version
1928
- /ui/appSettings/llm_temperature
29+
- /ui/appSettings/llm_max_tokens
30+
- /ui/appSettings/llm_frequency_penalty
31+
- /ui/appSettings/llm_presence_penalty
32+
- /ui/appSettings/llm_top_p
33+
- /ui/appSettings/llm_top_k
34+
- /api/modelMaxContextLength

chart/templates/api/deployment.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ spec:
2929
- --model
3030
- {{ .Values.huggingface.model }}
3131
{{- include "azimuth-llm.chatTemplate" . | nindent 10 }}
32+
{{- if .Values.api.modelMaxContextLength -}}
33+
- --max-model-len
34+
- {{ .Values.api.modelMaxContextLength | quote }}
35+
{{- end -}}
3236
{{- if .Values.api.extraArgs -}}
3337
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
3438
{{- end -}}

chart/values.schema.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,26 @@
9292
"required": ["hf_model_name", "hf_model_instruction"]
9393
}
9494
}
95+
},
96+
"api": {
97+
"type": "object",
98+
"properties": {
99+
"modelMaxContextLength": {
100+
"title": "Model Context Length",
101+
"description": "An override for the maximum context length to allow, if the model's default is not suitable."
102+
},
103+
"image": {
104+
"type": "object",
105+
"properties": {
106+
"version": {
107+
"type": "string",
108+
"title": "Backend vLLM version",
109+
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
110+
"default": "v0.4.3"
111+
}
112+
}
113+
}
114+
}
95115
}
96116
}
97117
}

chart/values.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,13 @@ api:
5151
iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
5252
description: |
5353
The raw inference API endpoints for the deployed LLM.
54+
5455
# Config for huggingface model cache volume
5556
# This is mounted at /root/.cache/huggingface in the api deployment
5657
cacheVolume:
5758
hostPath:
5859
path: /tmp/llm/huggingface-cache
60+
5961
# Number of gpus to requests for each api pod instance
6062
# NOTE: This must be in the range 1 <= value <= N, where
6163
# 'N' is the number of GPUs available in a single
@@ -71,8 +73,13 @@ api:
7173
# to preform a rolling zero-downtime update
7274
updateStrategy:
7375
type: Recreate
76+
77+
# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
78+
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
79+
modelMaxContextLength:
80+
7481
# Extra args to supply to the vLLM backend, see
75-
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/api_server.py
82+
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
7683
extraArgs: []
7784

7885
# Configuration for the frontend web interface

0 commit comments

Comments
 (0)