Add INC dynamic quant test (#65)

yiliu30 · web-flow · commit a386a6c135b0 · 2025-08-07T20:34:14.000-05:00
Add INC dynamic quant test for deepseek-v2. cc @hshen14 @thuang6 Signed-off-by: yiliu30 <yi4.liu@intel.com>
diff --git a/tests/full_tests/ci_tests.sh b/tests/full_tests/ci_tests.sh
@@ -27,3 +27,14 @@ if [ $? -ne 0 ]; then
     exit -1
 fi
 echo "Test with deepseek v2 lite passed"
+
+# deepseek v2 + inc + dynamic quantization + tp2
+echo "Testing deepseek_v2 + inc with vllm-hpu plugin v1"
+echo QUANT_CONFIG=vllm-fork/tests/models/language/generation/inc_dynamic_quant.json HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-fork/tests/full_tests/generate.py --model $model_path --trust-remote-code  --quantization inc --kv_cache_dtype fp8_inc
+QUANT_CONFIG=vllm-fork/tests/models/language/generation/inc_dynamic_quant.json \
+HABANA_VISIBLE_DEVICES=all VLLM_SKIP_WARMUP=true PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 python -u vllm-fork/tests/full_tests/generate.py --model $model_path --trust-remote-code --quantization inc --tensor-parallel-size 2
+if [ $? -ne 0 ]; then
+    echo "Error: Test failed for deepseek_v2 + inc dynamic quantization" >&2
+    exit -1
+fi
+echo "Test with deepseek_v2 + inc dynamic quantization + tp 2"
diff --git a/tests/models/language/generation/inc_dynamic_quant.json b/tests/models/language/generation/inc_dynamic_quant.json
@@ -0,0 +1,26 @@
+{
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "ACT_MAXABS_PCS_POW2_WEIGHT_MAXABS_PTS_POW2_HW",
+    "dynamic_quantization": "True",
+    "scale_format": "CONST",
+    "allowlist": {
+        "types": [],
+        "names": [
+            "q_a_proj",
+            "q_b_proj",
+            "kv_a_proj_with_mqa",
+            "o_proj",
+            "qkv_proj",
+            "mlp"
+        ]
+    },
+    "blocklist": {
+        "types": [
+        ],
+        "names": [
+            "lm_head"
+        ]
+    },
+    "dump_stats_path": "./inc_output_dynamic_quant"
+}