modelscope · Jintao-Huang · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/docs/source/Instruction/Export-and-push.md b/docs/source/Instruction/Export-and-push.md
@@ -11,6 +11,7 @@ SWIFT支持AWQ、GPTQ、FP8、BNB模型的量化导出。其中使用AWQ、GPTQ
 
 | 量化技术 | 多模态 | 推理加速 | 继续训练 |
 | -------- | ------ | -------- | -------- |
+| FP8     | ✅      | ✅        | ✅        |
 | GPTQ     | ✅      | ✅        | ✅        |
 | AWQ      | ✅      | ✅        | ✅        |
 | BNB      | ❌      | ✅        | ✅        |

diff --git a/docs/source_en/Instruction/Export-and-push.md b/docs/source_en/Instruction/Export-and-push.md
@@ -10,7 +10,8 @@ SWIFT supports quantization exports for AWQ, GPTQ, FP8, and BNB models. AWQ and
 
 | Quantization Technique | Multimodal | Inference Acceleration | Continued Training |
 | ---------------------- | ---------- | ---------------------- | ------------------ |
-| GPTQ                   | ✅          | ✅                      | ✅                  |
+| FP8     | ✅      | ✅        | ✅        |
+| GPTQ           | ✅          | ✅                      | ✅                  |
 | AWQ                    | ✅          | ✅                      | ✅                  |
 | BNB                    | ❌          | ✅                      | ✅                  |
 

diff --git a/swift/pipelines/export/merge_lora.py b/swift/pipelines/export/merge_lora.py
@@ -53,7 +53,7 @@ def merge_lora(args: ExportArguments, device_map=None, replace_if_exists=False)
             model_dirs=args.adapters,
             max_shard_size=args.max_shard_size,
             additional_saved_files=model.model_meta.additional_saved_files)
-        logger.info(f'Successfully merged LoRA and saved in {output_dir}.')
+        logger.info(f'Successfully merged LoRA and saved in `{output_dir}`.')
         args.device_map = origin_device_map
 
     args.model = output_dir

diff --git a/swift/pipelines/export/quant.py b/swift/pipelines/export/quant.py
@@ -68,7 +68,7 @@ def quantize(self):
             args.output_dir,
             model_dirs=[args.model_dir],
             additional_saved_files=self.model.model_meta.additional_saved_files)
-        logger.info(f'Successfully quantized the model and saved in {args.output_dir}.')
+        logger.info(f'Successfully quantized the model and saved in `{args.output_dir}`.')
 
     @torch.inference_mode()
     def _prepare_gptq_dataset(self, examples: List[Dict[str, torch.LongTensor]], batch_size: int = 1, *args, **kwargs):
@@ -280,6 +280,8 @@ def gptq_model_quantize(self, v2: bool = False):
             logger.info('Start quantizing the model...')
             logger.warning('The process of packing the model takes a long time and there is no progress bar. '
                            'Please be patient and wait...')
+            if not hasattr(self.model, 'hf_device_map'):
+                self.model.hf_device_map = {'': torch.device('cuda:0')}
-            if not hasattr(self.model, 'hf_device_map'):
-                self.model.hf_device_map = {'': torch.device('cuda:0')}
+            if not hasattr(self.model, 'hf_device_map'):
+                self.model.hf_device_map = {'': self.model.device}
-            if not hasattr(self.model, 'hf_device_map'):
-                self.model.hf_device_map = {'': torch.device('cuda:0')}
+            if not hasattr(self.model, 'hf_device_map'):
+                self.model.hf_device_map = {'': self.model.device}
             with self._patch_gptq_block(self.model, block_name_to_quantize):
                 gptq_quantizer.quantize_model(self.model, self.tokenizer)
             self.model.config.quantization_config.pop('dataset', None)