[Bugfix] fix qwen3 moe fp8 accuracy issue (#23031)

jinzhen-lin · web-flow · commit a258ad8bcc00 · 2025-08-16T17:41:23.000-07:00
Signed-off-by: Jinzhen Lin &lt;jinzhen.ljz@antgroup.com&gt;
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -125,6 +125,10 @@ def from_config(cls, config: dict[str, Any]) -> "Fp8Config":
         ignored_layers = cls.get_from_keys_or(config, ["ignored_layers"], None)
         weight_block_size = cls.get_from_keys_or(config, ["weight_block_size"],
                                                  None)
+        if not ignored_layers:
+            ignored_layers = cls.get_from_keys_or(config,
+                                                  ["modules_to_not_convert"],
+                                                  None)
         return cls(is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized,
                    activation_scheme=activation_scheme,
                    ignored_layers=ignored_layers,