fix bug of deepseek gropu_size setting (NVIDIA#3860)

byshiue · web-flow · commit e9fab4f3d912 · 2025-04-27T09:10:37.000+08:00
Signed-off-by: bhsueh &lt;11360707+byshiue@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py
@@ -122,6 +122,9 @@ def from_pretrained(cls,
                             'group_size', None)
                         mixed_quant_configs[layer] = config
                 layer_quant_config = mixed_quant_configs
+            elif quant_config.quant_algo == QuantAlgo.FP8_BLOCK_SCALES:
+                if quant_config.group_size is None:
+                    quant_config.group_size = 128
 
             if kwargs.get(
                     'moe_backend'