1
- # The HuggingFace model to use for testing
2
- # hf_model = "ise-uiuc/Magicoder-S-DS-6.7B" # Good lightweight model for testing
3
- # hf_model = "TheBloke/WizardCoder-Python-34B-V1.0-AWQ" # Poor performance, missing chat_template in repo
4
- hf_model = "TheBloke/SauerkrautLM-70B-v1-AWQ"
5
- # hf_model = "TheBloke/SauerkrautLM-Mixtral-8x7B-Instruct-AWQ" # Works well
6
- # hf_model = "abacusai/Smaug-Mixtral-v0.1" # GPU OOM
7
- # hf_model = "LoneStriker/Smaug-72B-v0.1-AWQ" # Works but produces nonsense responses
8
-
9
1
# Toggles whether UI should be run locally using gradio hot-reloading
10
2
# or should be included in the remote Helm install
11
3
run_ui_locally = True
@@ -19,15 +11,18 @@ allow_k8s_contexts('production-llm-service-admin@production-llm-service')
19
11
20
12
chart_yaml = helm (
21
13
"chart/" ,
22
- values = "hu- dev-values.yml" ,
14
+ values = "dev-values.yml" ,
23
15
# Enable/disable remote UI install depending on if we're running it locally
24
16
set = [
25
- "huggingface.model={}" .format (hf_model ),
26
17
"ui.enabled={}" .format (not str (run_ui_locally ).lower ())
27
18
],
28
19
)
29
20
k8s_yaml (chart_yaml )
30
21
22
+ # Parse LLM name from templated deployment
23
+ api_deployment , _ = filter_yaml (chart_yaml , kind = 'Deployment' , name = 'chart-api' )
24
+ hf_model = decode_yaml (api_deployment )['spec' ]['template' ]['spec' ]['containers' ][0 ]['args' ][1 ]
25
+
31
26
if not run_ui_locally :
32
27
# Port-forward web app to localhost:8080
33
28
k8s_resource ("chart-ui" , port_forwards = "8080:7680" )
@@ -56,7 +51,8 @@ if run_ui_locally:
56
51
deps = ["chart/web-app/" ],
57
52
resource_deps = ["gradio-app-venv" ],
58
53
serve_cmd = " && " .join ([
54
+ "source {}/bin/activate" .format (venv_name ),
59
55
"cd chart/web-app" ,
60
- "python app.py {}" .format (hf_model ),
56
+ "python3 app.py {}" .format (hf_model ),
61
57
])
62
58
)
0 commit comments