File tree Expand file tree Collapse file tree 2 files changed +37
-0
lines changed
models/language/generation Expand file tree Collapse file tree 2 files changed +37
-0
lines changed Original file line number Diff line number Diff line change @@ -27,3 +27,14 @@ if [ $? -ne 0 ]; then
27
27
exit -1
28
28
fi
29
29
echo " Test with deepseek v2 lite passed"
30
+
31
+ # deepseek v2 + inc + dynamic quantization + tp2
32
+ echo " Testing deepseek_v2 + inc with vllm-hpu plugin v1"
33
+ echo QUANT_CONFIG=vllm-fork/tests/models/language/generation/inc_dynamic_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-fork/tests/full_tests/generate.py --model $model_path --trust-remote-code --quantization inc --kv_cache_dtype fp8_inc
34
+ QUANT_CONFIG=vllm-fork/tests/models/language/generation/inc_dynamic_quant.json \
35
+ HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-fork/tests/full_tests/generate.py --model $model_path --trust-remote-code --quantization inc --tensor-parallel-size 2
36
+ if [ $? -ne 0 ]; then
37
+ echo " Error: Test failed for deepseek_v2 + inc dynamic quantization" >&2
38
+ exit -1
39
+ fi
40
+ echo " Test with deepseek_v2 + inc dynamic quantization + tp 2"
Original file line number Diff line number Diff line change
1
+ {
2
+ "mode" : " QUANTIZE" ,
3
+ "observer" : " maxabs" ,
4
+ "scale_method" : " ACT_MAXABS_PCS_POW2_WEIGHT_MAXABS_PTS_POW2_HW" ,
5
+ "dynamic_quantization" : " True" ,
6
+ "scale_format" : " CONST" ,
7
+ "allowlist" : {
8
+ "types" : [],
9
+ "names" : [
10
+ " q_a_proj" ,
11
+ " q_b_proj" ,
12
+ " kv_a_proj_with_mqa" ,
13
+ " o_proj" ,
14
+ " qkv_proj" ,
15
+ " mlp"
16
+ ]
17
+ },
18
+ "blocklist" : {
19
+ "types" : [
20
+ ],
21
+ "names" : [
22
+ " lm_head"
23
+ ]
24
+ },
25
+ "dump_stats_path" : " ./inc_output_dynamic_quant"
26
+ }
You can’t perform that action at this time.
0 commit comments