Skip to content

Commit a386a6c

Browse files
authored
Add INC dynamic quant test (#65)
Add INC dynamic quant test for deepseek-v2. cc @hshen14 @thuang6 Signed-off-by: yiliu30 <[email protected]>
1 parent 746cd72 commit a386a6c

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

tests/full_tests/ci_tests.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,14 @@ if [ $? -ne 0 ]; then
2727
exit -1
2828
fi
2929
echo "Test with deepseek v2 lite passed"
30+
31+
# deepseek v2 + inc + dynamic quantization + tp2
32+
echo "Testing deepseek_v2 + inc with vllm-hpu plugin v1"
33+
echo QUANT_CONFIG=vllm-fork/tests/models/language/generation/inc_dynamic_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-fork/tests/full_tests/generate.py --model $model_path --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc
34+
QUANT_CONFIG=vllm-fork/tests/models/language/generation/inc_dynamic_quant.json \
35+
HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-fork/tests/full_tests/generate.py --model $model_path --trust-remote-code --quantization inc --tensor-parallel-size 2
36+
if [ $? -ne 0 ]; then
37+
echo "Error: Test failed for deepseek_v2 + inc dynamic quantization" >&2
38+
exit -1
39+
fi
40+
echo "Test with deepseek_v2 + inc dynamic quantization + tp 2"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"mode": "QUANTIZE",
3+
"observer": "maxabs",
4+
"scale_method": "ACT_MAXABS_PCS_POW2_WEIGHT_MAXABS_PTS_POW2_HW",
5+
"dynamic_quantization": "True",
6+
"scale_format": "CONST",
7+
"allowlist": {
8+
"types": [],
9+
"names": [
10+
"q_a_proj",
11+
"q_b_proj",
12+
"kv_a_proj_with_mqa",
13+
"o_proj",
14+
"qkv_proj",
15+
"mlp"
16+
]
17+
},
18+
"blocklist": {
19+
"types": [
20+
],
21+
"names": [
22+
"lm_head"
23+
]
24+
},
25+
"dump_stats_path": "./inc_output_dynamic_quant"
26+
}

0 commit comments

Comments
 (0)