Skip to content

Commit 01d5f86

Browse files
committed
glm fix
1 parent b984605 commit 01d5f86

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

baseten-inference-stack-v2-templates/glm47/config.yaml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
model_name: GLM47 TRT
1+
model_name: GLM47 BISLLM
22
python_version: py39
33
resources:
44
accelerator: B200:4
@@ -8,7 +8,7 @@ resources:
88
model_metadata:
99
example_model_input:
1010
{
11-
"model": "deepseek-ai/DeepSeek-V3-0324",
11+
"model": "glm47",
1212
"messages":
1313
[
1414
{
@@ -20,10 +20,17 @@ model_metadata:
2020
"max_tokens": 2048,
2121
"temperature": 0.5,
2222
}
23+
model_cache:
24+
- repo_id: baseten-admin/glm-4.7-fp4
25+
revision: main
26+
use_volume: true
27+
volume_folder: glm47
28+
runtime_secret_name: "hf_access_token"
2329
trt_llm:
2430
build:
2531
checkpoint_repository:
26-
repo: baseten-admin/glm-4.7-fp4
32+
# repo: baseten-admin/glm-4.7-fp4
33+
repo: michaelfeil/empty-model
2734
revision: main
2835
source: HF
2936
inference_stack: v2
@@ -49,3 +56,4 @@ trt_llm:
4956
enable_padding: false
5057
enable_iter_perf_stats: true
5158
autotuner_enabled: false
59+
model_path: /app/model_cache/glm47

0 commit comments

Comments
 (0)