Skip to content

Commit 94f07d5

Browse files
authored
Merge pull request #11 from stackhpc/feat/chat-template
Adds chat template config option to chart
2 parents 9aca5a0 + 0381fdf commit 94f07d5

File tree

8 files changed

+39
-19
lines changed

8 files changed

+39
-19
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
__pycache__/
55
**/*.secret
66
.DS_Store
7+
.tox/
78

89
# Ignore local dev helpers
910
test-values.y[a]ml

Tiltfile

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,3 @@
1-
# The HuggingFace model to use for testing
2-
# hf_model = "ise-uiuc/Magicoder-S-DS-6.7B" # Good lightweight model for testing
3-
# hf_model = "TheBloke/WizardCoder-Python-34B-V1.0-AWQ" # Poor performance, missing chat_template in repo
4-
hf_model = "TheBloke/SauerkrautLM-70B-v1-AWQ"
5-
# hf_model = "TheBloke/SauerkrautLM-Mixtral-8x7B-Instruct-AWQ" # Works well
6-
# hf_model = "abacusai/Smaug-Mixtral-v0.1" # GPU OOM
7-
# hf_model = "LoneStriker/Smaug-72B-v0.1-AWQ" # Works but produces nonsense responses
8-
91
# Toggles whether UI should be run locally using gradio hot-reloading
102
# or should be included in the remote Helm install
113
run_ui_locally = True
@@ -19,15 +11,18 @@ allow_k8s_contexts('production-llm-service-admin@production-llm-service')
1911

2012
chart_yaml = helm(
2113
"chart/",
22-
values="hu-dev-values.yml",
14+
values="dev-values.yml",
2315
# Enable/disable remote UI install depending on if we're running it locally
2416
set=[
25-
"huggingface.model={}".format(hf_model),
2617
"ui.enabled={}".format(not str(run_ui_locally).lower())
2718
],
2819
)
2920
k8s_yaml(chart_yaml)
3021

22+
# Parse LLM name from templated deployment
23+
api_deployment, _ = filter_yaml(chart_yaml, kind='Deployment', name='chart-api')
24+
hf_model = decode_yaml(api_deployment)['spec']['template']['spec']['containers'][0]['args'][1]
25+
3126
if not run_ui_locally:
3227
# Port-forward web app to localhost:8080
3328
k8s_resource("chart-ui", port_forwards="8080:7680")
@@ -56,7 +51,8 @@ if run_ui_locally:
5651
deps=["chart/web-app/"],
5752
resource_deps=["gradio-app-venv"],
5853
serve_cmd=" && ".join([
54+
"source {}/bin/activate".format(venv_name),
5955
"cd chart/web-app",
60-
"python app.py {}".format(hf_model),
56+
"python3 app.py {}".format(hf_model),
6157
])
6258
)

chart/azimuth-ui.schema.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ controls:
55
/huggingface/token:
66
type: TextControl
77
secret: true
8-
/ui/appSettings/model_instruction:
8+
/ui/appSettings/hf_model_instruction:
99
type: TextControl
1010
/ui/appSettings/page_title:
1111
type: TextControl

chart/templates/api/deployment.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ spec:
2828
args:
2929
- --model
3030
- {{ .Values.huggingface.model }}
31+
{{- if .Values.huggingface.chatTemplate }}
32+
- --chat-template
33+
- {{ quote .Values.huggingface.chatTemplate }}
34+
{{- end -}}
3135
{{- if .Values.api.extraArgs -}}
3236
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
3337
{{- end -}}

chart/values.schema.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@
2525
"appSettings": {
2626
"type": "object",
2727
"properties": {
28-
"model_name": {
28+
"hf_model_name": {
2929
"type": "string",
3030
"title": "Model Name",
3131
"description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above.",
3232
"default": "mistralai/Mistral-7B-Instruct-v0.2"
3333
},
34-
"model_instruction": {
34+
"hf_model_instruction": {
3535
"type": "string",
3636
"title": "Instruction",
3737
"description": "The initial model prompt (i.e. the hidden instructions) to use when generating responses.",
@@ -75,7 +75,7 @@
7575
}
7676

7777
},
78-
"required": ["model_name", "model_instruction"]
78+
"required": ["hf_model_name", "hf_model_instruction"]
7979
}
8080
}
8181
}

chart/values.yaml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ huggingface:
66
# The name of the HuggingFace model to use
77
# Use a yaml anchor to avoid duplication elsewhere
88
model: &model-name ise-uiuc/Magicoder-S-DS-6.7B
9+
# A Jinja formatted chat template to provide to the language model.
10+
# See https://huggingface.co/blog/chat-templates for background info.
11+
# If not provided, the default template specified in the HuggingFace
12+
# model repository's tokenizer_config.json file is used. As explained
13+
# in the above blog post, the HF template key in tokenizer_config.json
14+
# is relatively new and not all HF models include a template in their
15+
# repo files yet. This chart value provides a hook to manually apply the
16+
# correct chat template for such models.
17+
chatTemplate:
918

1019
# For private/gated huggingface models (e.g. Meta's Llama models)
1120
# you must provide your own huggingface token, for details see:
@@ -71,8 +80,8 @@ ui:
7180
# The values to be written to settings.yml for parsing as frontend app setting
7281
# (see example_app.py and config.py for example using pydantic-settings to configure app)
7382
appSettings:
74-
model_name: *model-name
75-
model_instruction: "You are a helpful AI assistant. Please response appropriately."
83+
hf_model_name: *model-name
84+
hf_model_instruction: "You are a helpful AI assistant. Please response appropriately."
7685
# Container image config
7786
image:
7887
repository: ghcr.io/stackhpc/azimuth-llm-ui-base

chart/web-app/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ def inference(latest_message, history):
100100
if not BACKEND_INITIALISED:
101101
logger.info("Backend API not yet ready")
102102
gr.Info(
103-
"Backend not ready - model may still be initialising - please try again later"
103+
"Backend not ready - model may still be initialising - please try again later."
104104
)
105105
else:
106106
logger.error("Failed to connect to backend API: %s", err)
107-
gr.Warning("Failed to connect to backend API")
107+
gr.Warning("Failed to connect to backend API.")
108108

109109
except openai.InternalServerError as err:
110110
gr.Warning(

tox.ini

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[tox]
2+
env_list =
3+
format
4+
minversion = 4.11.3
5+
6+
[testenv:format]
7+
description = run code formatter on web-app
8+
deps = black==23.12.1
9+
skip_install = true
10+
commands = black chart/web-app

0 commit comments

Comments
 (0)