fix

wangzaijun · wangzaijun · commit 5c71cce2cd04 · 2025-12-04T05:30:26.000Z
diff --git a/test/start_scripts/draft.sh b/test/start_scripts/draft.sh
@@ -0,0 +1,28 @@
+# 使能 cpu cache 功能，扩大kv cache 复用的可能性。
+LOADWORKER=18 python -m lightllm.server.api_server \
+--model_dir /mtc/models/qwen3-8b --tp 2 --dp 1 --enable_cpu_cache  --cpu_cache_storage_size 66 --cpu_cache_token_page_size 128 \
+--batch_max_tokens 4096 --chunked_prefill_size 2048 \
+--max_total_token_num 20000 \
+--mode "ppl_int8kv_flashdecoding" | tee log.txt
+
+
+# 精度评测命令
+HF_ALLOW_CODE_EVAL=1 HF_DATASETS_OFFLINE=0 lm_eval --model local-completions \
+--model_args '{"model":"Qwen/Qwen3-8B", "base_url":"http://localhost:8000/v1/completions", "max_length": 16384}' --tasks gsm8k --batch_size 500 --confirm_run_unsafe_code
+
+
+
+# H200 single node deepseek R1 tp mode
+LOADWORKER=18 python -m lightllm.server.api_server \
+--model_dir /mtc/DeepSeek-R1 \
+--tp 8 \
+--enable_fa3 \
+--batch_max_tokens 4096 --chunked_prefill_size 2048 \
+--max_total_token_num 20000 \
+--enable_cpu_cache  --cpu_cache_storage_size 66 --cpu_cache_token_page_size 128
+
+# if you want to enable microbatch overlap, you can uncomment the following lines
+#--enable_prefill_microbatch_overlap \
+#--enable_decode_microbatch_overlap \
+# 精度测试。
+HF_ALLOW_CODE_EVAL=1 HF_DATASETS_OFFLINE=0 lm_eval --model local-completions --model_args '{"model":"deepseek-ai/DeepSeek-R1", "base_url":"http://localhost:8000/v1/completions", "max_length": 16384}' --tasks gsm8k --batch_size 500 --confirm_run_unsafe_code