File tree Expand file tree Collapse file tree 1 file changed +33
-0
lines changed
Expand file tree Collapse file tree 1 file changed +33
-0
lines changed Original file line number Diff line number Diff line change 1+ model_metadata :
2+ example_model_input :
3+ messages :
4+ - role : system
5+ content : " You are a helpful assistant."
6+ - role : user
7+ content : " What is the meaning of life?"
8+ stream : true
9+ model : zai-org/GLM-4.7-Flash
10+ max_tokens : 32768
11+ temperature : 0.7
12+ tags :
13+ - openai-compatible
14+ base_image :
15+ image : lmsysorg/sglang:nightly-dev-20260122-e6ccb294
16+
17+ build_commands :
18+ - pip uninstall -y transformers
19+ - pip install git+https://github.com/huggingface/transformers.git@76732b4e7120808ff989edbd16401f61fa6a0afa
20+
21+ docker_server :
22+ start_command : python3 -m sglang.launch_server --model-path zai-org/GLM-4.7-Flash --tp-size 2 --tool-call-parser glm47 --reasoning-parser glm45 --speculative-algorithm EAGLE --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --mem-fraction-static 0.8 --served-model-name zai-org/GLM-4.7-Flash --host 0.0.0.0 --port 8000
23+ readiness_endpoint : /health_generate
24+ liveness_endpoint : /health_generate
25+ predict_endpoint : /v1/chat/completions
26+ server_port : 8000
27+ resources :
28+ accelerator : H100:2
29+ use_gpu : true
30+ runtime :
31+ predict_concurrency : 32
32+
33+ model_name : GLM 4.7 Flash
You can’t perform that action at this time.
0 commit comments