intel · wenhuach21 · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py
@@ -1316,7 +1316,9 @@ def _immediate_pack(self, name: str):
             tokenizer=self.tokenizer,
         )
 
-    @torch.inference_mode()
+    # Use no_grad instead of inference mode
+    # https://github.com/intel/auto-round/issues/1620
+    @torch.no_grad()
     def _quantize_rtn(self) -> tuple[torch.nn.Module, dict[str, Any]]:
         """Quantize all modules in the model using RTN (Round-To-Nearest) strategy.
 

diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py
@@ -464,7 +464,7 @@ def __getitem__(self, key):
 if importlib.util.find_spec("deepspeed"):  # check if deepspeed is installed
     deepspeed_exists = True
 
-SUPPORTED_DTYPES = ("int", "mx_fp", "fp", "nv_fp")
+SUPPORTED_DTYPES = ("int", "mx_fp", "fp", "nv_fp", "mx_int")
 SUPPORTED_FORMATS = SupportedFormats()
 SUPPORTED_LAYER_TYPES = (torch.nn.Linear, transformers.pytorch_utils.Conv1D)
 # Changed to str as it relies on triton or others lib to load this