Fix VLM

cjluo-nv · cjluo-nv · commit a3559998e90b · 2025-09-16T23:46:15.000Z
Signed-off-by: Chenjie Luo &lt;chenjiel@nvidia.com&gt;
diff --git a/examples/vlm_ptq/scripts/huggingface_example.sh b/examples/vlm_ptq/scripts/huggingface_example.sh
@@ -69,6 +69,9 @@ if $TRUST_REMOTE_CODE; then
     PTQ_ARGS+=" --trust_remote_code "
 fi
 
+if [ -n "$KV_CACHE_QUANT" ]; then
+    PTQ_ARGS+=" --kv_cache_qformat=$KV_CACHE_QUANT "
+fi
 
 if [ "${MODEL_TYPE}" = "vila" ]; then
     # Install required dependency for VILA
@@ -98,6 +101,20 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
     fi
 fi
 
+if [[ "$QFORMAT" != "fp8" ]]; then
+    echo "For quant format $QFORMAT, please refer to the TensorRT-LLM documentation for deployment. Checkpoint saved to $SAVE_PATH."
+    exit 0
+fi
+
+if [[ "$QFORMAT" == *"nvfp4"* ]] || [[ "$KV_CACHE_QUANT" == *"nvfp4"* ]]; then
+    cuda_major=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader -i 0 | cut -d. -f1)
+
+    if [ "$cuda_major" -lt 10 ]; then
+        echo "Please deploy the NVFP4 checkpoint on a Blackwell GPU. Checkpoint export_path: $SAVE_PATH"
+        exit 0
+    fi
+fi
+
 # Prepare datasets for TRT-LLM benchmark
 if [ -z "$TRT_LLM_CODE_PATH" ]; then
     TRT_LLM_CODE_PATH=/app/tensorrt_llm # default path for the TRT-LLM release docker image
diff --git a/tests/examples/vlm_ptq/test_qwen_vl.py b/tests/examples/vlm_ptq/test_qwen_vl.py
@@ -20,7 +20,7 @@
 from _test_utils.torch_misc import minimum_gpu
 
 
-@pytest.mark.parametrize("quant", ["fp8"])
+@pytest.mark.parametrize("quant", ["fp8", "int8_sq", "nvfp4"])
 @minimum_gpu(2)
 def test_qwen_vl_multi_gpu(quant):
     run_vlm_ptq_command(model=QWEN_VL_PATH, quant=quant)