File tree Expand file tree Collapse file tree 1 file changed +11
-3
lines changed
baseten-inference-stack-v2-templates/glm47 Expand file tree Collapse file tree 1 file changed +11
-3
lines changed Original file line number Diff line number Diff line change 1- model_name : GLM47 TRT
1+ model_name : GLM47 BISLLM
22python_version : py39
33resources :
44 accelerator : B200:4
@@ -8,7 +8,7 @@ resources:
88model_metadata :
99 example_model_input :
1010 {
11- " model " : " deepseek-ai/DeepSeek-V3-0324 " ,
11+ " model " : " glm47 " ,
1212 " messages " :
1313 [
1414 {
@@ -20,10 +20,17 @@ model_metadata:
2020 " max_tokens " : 2048,
2121 " temperature " : 0.5,
2222 }
23+ model_cache :
24+ - repo_id : baseten-admin/glm-4.7-fp4
25+ revision : main
26+ use_volume : true
27+ volume_folder : glm47
28+ runtime_secret_name : " hf_access_token"
2329trt_llm :
2430 build :
2531 checkpoint_repository :
26- repo : baseten-admin/glm-4.7-fp4
32+ # repo: baseten-admin/glm-4.7-fp4
33+ repo : michaelfeil/empty-model
2734 revision : main
2835 source : HF
2936 inference_stack : v2
@@ -49,3 +56,4 @@ trt_llm:
4956 enable_padding : false
5057 enable_iter_perf_stats : true
5158 autotuner_enabled : false
59+ model_path : /app/model_cache/glm47
You can’t perform that action at this time.
0 commit comments