NVIDIA · kevalmorabia97 · Sep 18, 2025 · Sep 17, 2025 · coderabbitai · Sep 17, 2025
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -9,7 +9,6 @@ Model Optimizer Changelog (Linux)
 - Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
 - Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. For performance evaluation, please use ``trtllm-bench`` directly.
 - ``--export_fmt`` flag in ``examples/llm_ptq`` is removed. By default we export to the unified Hugging Face checkpoint format.
-- ``int8_sq`` quantization format is deprecated from the ``examples/vlm_ptq`` with respect to the TensorRT-LLM's torch backend switch. Please refer to the previous releases if this quantization format is needed.
 - Deprecated ``examples/vlm_eval`` as it depends on the deprecated TRT-LLM's TRT backend.
 
 **New Features**

@@ -35,10 +35,10 @@ if [ -z "$MODEL_PATH" ]; then
 fi
 
 case $QFORMAT in
-    fp8|int4_awq|w4a8_awq|nvfp4)
+    fp8|int8_sq|int4_awq|w4a8_awq|nvfp4)
         ;;
     *)
-        echo "Unknown quant argument: Expected one of: [fp8, int4_awq, w4a8_awq, nvfp4]" >&2
+        echo "Unknown quant argument: Expected one of: [fp8, int8_sq, int4_awq, w4a8_awq, nvfp4]" >&2
         exit 1
 esac
 
@@ -95,6 +95,8 @@ if [[ $TASKS =~ "quant" ]] || [[ ! -d "$SAVE_PATH" ]] || [[ ! $(ls -A $SAVE_PATH
             --qformat=$QFORMAT \
             --calib_size=$CALIB_SIZE \
             --batch_size=$CALIB_BATCH_SIZE \
+            --inference_tensor_parallel=$TP \
+            --inference_pipeline_parallel=$PP \
             $PTQ_ARGS
     else
         echo "Quantized model config $MODEL_CONFIG exists, skipping the quantization stage"