fix target_bits ut

Kaihui-intel · Kaihui-intel · commit 6e1f6d35b164 · 2025-12-04T12:49:49.000+08:00
Signed-off-by: Kaihui-intel &lt;kaihui.tang@intel.com&gt;
diff --git a/neural_compressor/torch/algorithms/weight_only/autoround.py b/neural_compressor/torch/algorithms/weight_only/autoround.py
@@ -201,7 +201,7 @@ def convert(self, model: torch.nn.Module, *args, **kwargs):
             model, weight_config = rounder.quantize()
             model.autoround_config = weight_config
             return rounder.save_quantized(output_dir=output_dir, inplace=True)
-        elif "itrex" in export_format:
+        elif "itrex" in export_format: # TODO: remove itrex related code later
             model, weight_config = rounder.quantize()
             model.autoround_config = weight_config
             model = pack_model(model, weight_config, device=device, inplace=True)
diff --git a/test/3x/torch/quantization/weight_only/test_autoround.py b/test/3x/torch/quantization/weight_only/test_autoround.py
@@ -420,10 +420,12 @@ def test_target_bits(self):
         model = prepare(model=fp32_model, quant_config=quant_config)
         model = convert(model)
         # mxfp4/8 model inference relys on autoround extension for vLLM.
-        assert "MXFP8" in model.model.decoder.layers[0].self_attn.k_proj.__class__.__name__, \
+        assert ("MXFP8" in model.model.decoder.layers[0].self_attn.k_proj.__class__.__name__ and \
+                "MXFP4" in model.model.decoder.layers[1].fc1.__class__.__name__) \
+                or \
+                ("MXFP4" in model.model.decoder.layers[0].self_attn.k_proj.__class__.__name__ and \
+                "MXFP8" in model.model.decoder.layers[1].fc1.__class__.__name__), \
             "model is not quantized correctly, please check."
-        assert "MXFP4" in model.model.decoder.layers[1].fc1.__class__.__name__, \
-                "model is not quantized correctly, please check."
 
 
     @pytest.mark.skipif(not ct_installed, reason="The compressed-tensors module is not installed.")
@@ -461,9 +463,11 @@ def eval_acc_fn(model) -> float:
         )
         best_model = autotune(model=fp32_model, tune_config=custom_tune_config, eval_fn=eval_acc_fn)
         # mxfp4/8 model inference relys on autoround extension for vLLM.
-        assert "MXFP8" in best_model.model.decoder.layers[0].self_attn.k_proj.__class__.__name__, \
-            "model is not quantized correctly, please check."
-        assert "MXFP8" in best_model.model.decoder.layers[1].fc1.__class__.__name__, \
+        assert ("MXFP8" in best_model.model.decoder.layers[0].self_attn.k_proj.__class__.__name__ and \
+                "MXFP8" in best_model.model.decoder.layers[1].fc1.__class__.__name__) \
+                or \
+                ("MXFP4" in best_model.model.decoder.layers[0].self_attn.k_proj.__class__.__name__ and \
+                "MXFP4" in best_model.model.decoder.layers[1].fc1.__class__.__name__), \
             "model is not quantized correctly, please check."
 
     def test_static_attention_dtype(self):