We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b0910f0 commit 65df760Copy full SHA for 65df760
GLM-4.7.yaml
@@ -75,8 +75,6 @@ services:
75
command: >
76
zai-org/GLM-4.7
77
--tensor-parallel-size 8
78
- --speculative-config '{"method":"mtp","num_speculative_tokens":1}'
79
- --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
80
--max-model-len 128K
81
--max-num-batched-tokens 32K
82
--max-num-seqs 128
@@ -95,9 +93,7 @@ services:
95
93
- NCCL_DEBUG=INFO
96
94
- VLLM_CACHE_ROOT=/root/.cache/vllm
97
- TORCH_FLOAT32_MATMUL_PRECISION=high
98
- - LMCACHE_CHUNK_SIZE=256
99
- - LMCACHE_LOCAL_CPU=True
100
- - LMCACHE_MAX_LOCAL_CPU_SIZE=100
+ - LMCACHE_LOCAL_CPU=False
101
- PYTHONHASHSEED=0
102
- VLLM_RPC_TIMEOUT=60000
103
deploy:
0 commit comments