fix gptqmodel inference issue (#813)

wenhuach21 · chensuyue · commit 4a2a28ee3fed · 2025-09-10T16:20:17.000+08:00
(cherry picked from commit c4e7fbe)
diff --git a/auto_round/inference/backend.py b/auto_round/inference/backend.py
@@ -521,8 +521,15 @@ def dynamic_import_inference_linear(backend, bits, group_size, sym):
 
 
 def get_gptqmodel_infer_linear(backend, bits=4, group_size=128, sym=False):
+    import torch
+
+    dtype = torch.get_default_dtype()
+    if dtype != torch.float32:
+        torch.set_default_dtype(torch.float32)
     import gptqmodel  # pylint: disable=E0401
 
+    torch.set_default_dtype(dtype)
+
     if "marlin" in backend:
         return auto_round_extension.cuda.gptqmodel_marlin.get_marlin_layer()
         # return gptqmodel.nn_modules.qlinear.marlin.MarlinQuantLinear