modelscope · Jintao-Huang · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/docs/source/Instruction/Export-and-push.md b/docs/source/Instruction/Export-and-push.md
@@ -11,7 +11,8 @@ SWIFT支持AWQ、GPTQ、FP8、BNB模型的量化导出。其中使用AWQ、GPTQ
 
 | 量化技术 | 多模态 | 推理加速 | 继续训练 |
 | -------- | ------ | -------- | -------- |
-| GPTQ     | ✅      | ✅        | ✅        |
+| GPTQ/GPTQ-V2     | ✅      | ✅        | ✅        |
+| FP8     | ✅      | ✅        | ✅        |
 | AWQ      | ✅      | ✅        | ✅        |
 | BNB      | ❌      | ✅        | ✅        |
 

diff --git a/docs/source_en/Instruction/Export-and-push.md b/docs/source_en/Instruction/Export-and-push.md
@@ -10,7 +10,8 @@ SWIFT supports quantization exports for AWQ, GPTQ, FP8, and BNB models. AWQ and
 
 | Quantization Technique | Multimodal | Inference Acceleration | Continued Training |
 | ---------------------- | ---------- | ---------------------- | ------------------ |
-| GPTQ                   | ✅          | ✅                      | ✅                  |
+| GPTQ/GPTQ-V2           | ✅          | ✅                      | ✅                  |
+| FP8     | ✅      | ✅        | ✅        |
 | AWQ                    | ✅          | ✅                      | ✅                  |
 | BNB                    | ❌          | ✅                      | ✅                  |
 

diff --git a/swift/pipelines/export/quant.py b/swift/pipelines/export/quant.py
@@ -280,6 +280,8 @@ def gptq_model_quantize(self, v2: bool = False):
             logger.info('Start quantizing the model...')
             logger.warning('The process of packing the model takes a long time and there is no progress bar. '
                            'Please be patient and wait...')
+            if not hasattr(self.model, 'hf_device_map'):
+                self.model.hf_device_map = {'': torch.device('cuda:0')}
-            if not hasattr(self.model, 'hf_device_map'):
-                self.model.hf_device_map = {'': torch.device('cuda:0')}
+            if not hasattr(self.model, 'hf_device_map'):
+                self.model.hf_device_map = {'': self.model.device}
-            if not hasattr(self.model, 'hf_device_map'):
-                self.model.hf_device_map = {'': torch.device('cuda:0')}
+            if not hasattr(self.model, 'hf_device_map'):
+                self.model.hf_device_map = {'': self.model.device}
             with self._patch_gptq_block(self.model, block_name_to_quantize):
                 gptq_quantizer.quantize_model(self.model, self.tokenizer)
             self.model.config.quantization_config.pop('dataset', None)