Merge pull request #11 from stackhpc/feat/chat-template

sd109 · web-flow · commit 94f07d51cd1b · 2024-03-18T15:48:43.000Z
Adds chat template config option to chart
diff --git a/.gitignore b/.gitignore
@@ -4,6 +4,7 @@
 __pycache__/
 **/*.secret
 .DS_Store
+.tox/
 
 # Ignore local dev helpers
 test-values.y[a]ml
diff --git a/Tiltfile b/Tiltfile
@@ -1,11 +1,3 @@
-# The HuggingFace model to use for testing
-# hf_model = "ise-uiuc/Magicoder-S-DS-6.7B" # Good lightweight model for testing
-# hf_model = "TheBloke/WizardCoder-Python-34B-V1.0-AWQ" # Poor performance, missing chat_template in repo
-hf_model = "TheBloke/SauerkrautLM-70B-v1-AWQ"
-# hf_model = "TheBloke/SauerkrautLM-Mixtral-8x7B-Instruct-AWQ" # Works well
-# hf_model = "abacusai/Smaug-Mixtral-v0.1" # GPU OOM
-# hf_model = "LoneStriker/Smaug-72B-v0.1-AWQ" # Works but produces nonsense responses
-
 # Toggles whether UI should be run locally using gradio hot-reloading
 # or should be included in the remote Helm install
 run_ui_locally = True
@@ -19,15 +11,18 @@ allow_k8s_contexts('production-llm-service-admin@production-llm-service')
 
 chart_yaml = helm(
     "chart/",
-    values="hu-dev-values.yml",
+    values="dev-values.yml",
     # Enable/disable remote UI install depending on if we're running it locally
     set=[
-        "huggingface.model={}".format(hf_model),
         "ui.enabled={}".format(not str(run_ui_locally).lower())
     ],
 )
 k8s_yaml(chart_yaml)
 
+# Parse LLM name from templated deployment
+api_deployment, _ = filter_yaml(chart_yaml, kind='Deployment', name='chart-api')
+hf_model = decode_yaml(api_deployment)['spec']['template']['spec']['containers'][0]['args'][1]
+
 if not run_ui_locally:
     # Port-forward web app to localhost:8080
     k8s_resource("chart-ui", port_forwards="8080:7680")
@@ -56,7 +51,8 @@ if run_ui_locally:
         deps=["chart/web-app/"],
         resource_deps=["gradio-app-venv"],
         serve_cmd=" && ".join([
+            "source {}/bin/activate".format(venv_name),
             "cd chart/web-app",
-            "python app.py {}".format(hf_model),
+            "python3 app.py {}".format(hf_model),
         ])
     )
diff --git a/chart/azimuth-ui.schema.yaml b/chart/azimuth-ui.schema.yaml
@@ -5,7 +5,7 @@ controls:
   /huggingface/token:
     type: TextControl
     secret: true
-  /ui/appSettings/model_instruction:
+  /ui/appSettings/hf_model_instruction:
     type: TextControl
   /ui/appSettings/page_title:
     type: TextControl
diff --git a/chart/templates/api/deployment.yml b/chart/templates/api/deployment.yml
@@ -28,6 +28,10 @@ spec:
         args:
           - --model
           - {{ .Values.huggingface.model }}
+          {{- if .Values.huggingface.chatTemplate }}
+          - --chat-template
+          - {{ quote .Values.huggingface.chatTemplate }}
+          {{- end -}}
           {{- if .Values.api.extraArgs -}}
           {{- .Values.api.extraArgs | toYaml | nindent 10 }}
           {{- end -}}
diff --git a/chart/values.schema.json b/chart/values.schema.json
@@ -25,13 +25,13 @@
                 "appSettings": {
                     "type": "object",
                     "properties": {
-                        "model_name": {
+                        "hf_model_name": {
                             "type": "string",
                             "title": "Model Name",
                             "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above.",
                             "default": "mistralai/Mistral-7B-Instruct-v0.2"
                         },
-                        "model_instruction": {
+                        "hf_model_instruction": {
                             "type": "string",
                             "title": "Instruction",
                             "description": "The initial model prompt (i.e. the hidden instructions) to use when generating responses.",
@@ -75,7 +75,7 @@
                         }
 
                     },
-                    "required": ["model_name", "model_instruction"]
+                    "required": ["hf_model_name", "hf_model_instruction"]
                 }
             }
         }
diff --git a/chart/values.yaml b/chart/values.yaml
@@ -6,6 +6,15 @@ huggingface:
   # The name of the HuggingFace model to use
   # Use a yaml anchor to avoid duplication elsewhere
   model: &model-name ise-uiuc/Magicoder-S-DS-6.7B
+  # A Jinja formatted chat template to provide to the language model.
+  # See https://huggingface.co/blog/chat-templates for background info.
+  # If not provided, the default template specified in the HuggingFace
+  # model repository's tokenizer_config.json file is used. As explained
+  # in the above blog post, the HF template key in tokenizer_config.json
+  # is relatively new and not all HF models include a template in their
+  # repo files yet. This chart value provides a hook to manually apply the
+  # correct chat template for such models.
+  chatTemplate:
 
   # For private/gated huggingface models (e.g. Meta's Llama models)
   # you must provide your own huggingface token, for details see:
@@ -71,8 +80,8 @@ ui:
   # The values to be written to settings.yml for parsing as frontend app setting
   # (see example_app.py and config.py for example using pydantic-settings to configure app)
   appSettings:
-    model_name: *model-name
-    model_instruction: "You are a helpful AI assistant. Please response appropriately."
+    hf_model_name: *model-name
+    hf_model_instruction: "You are a helpful AI assistant. Please response appropriately."
   # Container image config
   image:
     repository: ghcr.io/stackhpc/azimuth-llm-ui-base
diff --git a/chart/web-app/app.py b/chart/web-app/app.py
@@ -100,11 +100,11 @@ def inference(latest_message, history):
         if not BACKEND_INITIALISED:
             logger.info("Backend API not yet ready")
             gr.Info(
-                "Backend not ready - model may still be initialising - please try again later"
+                "Backend not ready - model may still be initialising - please try again later."
             )
         else:
             logger.error("Failed to connect to backend API: %s", err)
-            gr.Warning("Failed to connect to backend API")
+            gr.Warning("Failed to connect to backend API.")
 
     except openai.InternalServerError as err:
         gr.Warning(
diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,10 @@
+[tox]
+env_list =
+    format
+minversion = 4.11.3
+
+[testenv:format]
+description = run code formatter on web-app
+deps = black==23.12.1
+skip_install = true
+commands = black chart/web-app

Original file line number	Diff line number	Diff line change
`@@ -25,13 +25,13 @@`
`25`	`25`	`"appSettings": {`
`26`	`26`	`"type": "object",`
`27`	`27`	`"properties": {`
`28`		`- "model_name": {`
	`28`	`+ "hf_model_name": {`
`29`	`29`	`"type": "string",`
`30`	`30`	`"title": "Model Name",`
`31`	`31`	`"description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above.",`
`32`	`32`	`"default": "mistralai/Mistral-7B-Instruct-v0.2"`
`33`	`33`	`},`
`34`		`- "model_instruction": {`
	`34`	`+ "hf_model_instruction": {`
`35`	`35`	`"type": "string",`
`36`	`36`	`"title": "Instruction",`
`37`	`37`	`"description": "The initial model prompt (i.e. the hidden instructions) to use when generating responses.",`
`@@ -75,7 +75,7 @@`
`75`	`75`	`}`
`76`	`76`
`77`	`77`	`},`
`78`		`- "required": ["model_name", "model_instruction"]`
	`78`	`+ "required": ["hf_model_name", "hf_model_instruction"]`
`79`	`79`	`}`
`80`	`80`	`}`
`81`	`81`	`}`