fix amd ci (#667)

helloyongyang · web-flow · commit 1a343fc8d68f · 2025-12-25T11:59:00.000+08:00
diff --git a/dockerfiles/Dockerfile.mi350 b/dockerfiles/Dockerfile.mi350
@@ -45,4 +45,3 @@ ENV HSA_FORCE_FINE_GRAIN_PCIE=1
 
 # Default command
 CMD ["python", "-c", "from lightx2v import LightX2VPipeline; print('LightX2V AMD ROCm ready!')"]
-
diff --git a/lightx2v_platform/base/__init__.py b/lightx2v_platform/base/__init__.py
@@ -1,4 +1,4 @@
-from lightx2v_platform.base.base import check_ai_device, init_ai_device
+from lightx2v_platform.base.base import check_ai_device, init_ai_device  # noqa
 from lightx2v_platform.base.amd_rocm import AmdRocmDevice
 from lightx2v_platform.base.cambricon_mlu import MluDevice
 from lightx2v_platform.base.hygon_dcu import HygonDcuDevice
diff --git a/lightx2v_platform/base/amd_rocm.py b/lightx2v_platform/base/amd_rocm.py
@@ -8,9 +8,9 @@
 """
 
 import sys
+
 import torch
 import torch.distributed as dist
-
 from loguru import logger
 
 from lightx2v_platform.registry_factory import PLATFORM_DEVICE_REGISTER
@@ -33,40 +33,40 @@
 class AiterSglKernelCompat:
     """
     Compatibility layer to use aiter with sgl_kernel interface.
-    
+
     This class wraps aiter functions to match sgl_kernel's API,
     allowing existing code to work seamlessly on AMD GPUs.
-    
+
     Note: This is REQUIRED on AMD ROCm as the original sgl_kernel
     does not support AMD GPUs.
     """
-    
+
     def __init__(self, aiter_module):
         self._aiter = aiter_module
         self._gemm_a8w8 = aiter_module.gemm_a8w8_CK
         self._pertoken_quant = aiter_module.pertoken_quant
         self._dtypes = aiter_module.dtypes
         self._rms_norm = aiter_module.rms_norm
         logger.info("Using aiter as sgl_kernel backend (AMD ROCm optimized)")
-    
+
     def rmsnorm(self, input, weight, eps):
         """RMSNorm compatible with sgl_kernel.rmsnorm(input, weight, eps)"""
         return self._rms_norm(input, weight, eps)
-    
+
     def fp8_scaled_mm(self, input_quant, weight, input_scale, weight_scale, dtype, bias=None):
         """FP8 GEMM compatible with sgl_kernel.fp8_scaled_mm"""
         return self._gemm_a8w8(input_quant, weight, input_scale, weight_scale, bias, dtype)
-    
+
     def int8_scaled_mm(self, input_quant, weight, input_scale, weight_scale, dtype, bias=None):
         """INT8 GEMM compatible with sgl_kernel.int8_scaled_mm"""
         return self._gemm_a8w8(input_quant, weight, input_scale, weight_scale, bias, dtype)
-    
+
     def sgl_per_token_quant_fp8(self, x, out, scale):
         """Per-token FP8 quantization compatible with sgl_kernel.sgl_per_token_quant_fp8"""
         q, s = self._pertoken_quant(x, quant_dtype=self._dtypes.fp8)
         out.copy_(q)
         scale.copy_(s)
-    
+
     def sgl_per_token_group_quant_fp8(self, x, out, scale, group_size=128, eps=1e-10, fp8_min=-448.0, fp8_max=448.0):
         """Per-token per-group FP8 quantization compatible with sgl_kernel.sgl_per_token_group_quant_fp8"""
         m, k = x.shape
@@ -82,20 +82,13 @@ def _get_aiter_sgl_kernel():
     """Get aiter-based sgl_kernel compatibility layer."""
     try:
         import aiter
+
         return AiterSglKernelCompat(aiter)
     except ImportError:
         logger.error(
-            f"\n{'='*60}\n"
-            f"ERROR: AMD ROCm detected but aiter is not installed.\n"
-            f"aiter is REQUIRED for LightX2V to work on AMD GPUs.\n"
-            f"\nPlease install aiter:\n"
-            f"{AITER_INSTALL_CMD}\n"
-            f"{'='*60}\n"
-        )
-        raise ImportError(
-            "aiter is required for AMD ROCm support. "
-            f"Please install: pip install git+{AITER_REPO}@{AITER_COMMIT}"
+            f"\n{'=' * 60}\nERROR: AMD ROCm detected but aiter is not installed.\naiter is REQUIRED for LightX2V to work on AMD GPUs.\n\nPlease install aiter:\n{AITER_INSTALL_CMD}\n{'=' * 60}\n"
         )
+        raise ImportError(f"aiter is required for AMD ROCm support. Please install: pip install git+{AITER_REPO}@{AITER_COMMIT}")
 
 
 @PLATFORM_DEVICE_REGISTER("amd_rocm")
@@ -113,24 +106,24 @@ class AmdRocmDevice:
     def init_device_env():
         """
         Initialize AMD ROCm optimizations.
-        
+
         This is called from lightx2v_platform.set_ai_device when platform is amd_rocm.
         1. Disable cudnn for faster VAE convolution
         2. Inject aiter as sgl_kernel compatibility layer (REQUIRED on AMD)
         """
         logger.info("AMD ROCm platform detected, initializing optimizations...")
-        
+
         # Disable cudnn for faster VAE conv computation
         torch.backends.cudnn.enabled = False
         logger.info("  - cudnn disabled for faster VAE convolution")
-        
+
         # Inject aiter as sgl_kernel compatibility layer (REQUIRED)
         sgl_kernel = _get_aiter_sgl_kernel()
         sys.modules["sgl_kernel"] = sgl_kernel
         # Update any module that already imported sgl_kernel
         for mod_name, mod in list(sys.modules.items()):
-            if mod is not None and hasattr(mod, 'sgl_kernel'):
-                setattr(mod, 'sgl_kernel', sgl_kernel)
+            if mod is not None and hasattr(mod, "sgl_kernel"):
+                setattr(mod, "sgl_kernel", sgl_kernel)
         logger.info("  - aiter sgl_kernel compatibility layer enabled (RMSNorm, GEMM)")
 
     @staticmethod
@@ -153,7 +146,7 @@ def init_parallel_env():
 # Export constants
 __all__ = [
     "IS_AMD_ROCM",
-    "AITER_REPO", 
+    "AITER_REPO",
     "AITER_COMMIT",
     "AITER_INSTALL_CMD",
     "AiterSglKernelCompat",
diff --git a/lightx2v_platform/ops/attn/amd_rocm/__init__.py b/lightx2v_platform/ops/attn/amd_rocm/__init__.py
@@ -1,2 +1 @@
 from .flash_attn import *
-
diff --git a/lightx2v_platform/ops/attn/amd_rocm/flash_attn.py b/lightx2v_platform/ops/attn/amd_rocm/flash_attn.py
@@ -37,10 +37,7 @@
 except ImportError as e:
     AITER_IMPORT_ERROR = str(e)
     if IS_AMD_ROCM:
-        logger.warning(
-            f"aiter not found on AMD ROCm platform. "
-            f"For optimal performance, please install aiter:\n{AITER_INSTALL_CMD}"
-        )
+        logger.warning(f"aiter not found on AMD ROCm platform. For optimal performance, please install aiter:\n{AITER_INSTALL_CMD}")
     else:
         logger.debug("aiter not found (only available on AMD ROCm platform)")
 
@@ -61,22 +58,18 @@ class AiterAttnWeight(AttnWeightTemplate):
 
     def __init__(self):
         self.config = {}
-        
+
         # Check platform first
         if not IS_AMD_ROCM:
             raise RuntimeError(
                 "aiter_attn is only available on AMD ROCm platform.\n"
                 "Current platform is not AMD ROCm (torch.version.hip is not set).\n"
                 "For NVIDIA GPUs, please use 'flash_attn2' or 'flash_attn3' instead."
             )
-        
+
         # Check aiter availability
         if not AITER_AVAILABLE:
-            raise ImportError(
-                f"aiter is not installed on AMD ROCm platform.\n"
-                f"Import error: {AITER_IMPORT_ERROR}\n"
-                f"Please install aiter for optimal performance:\n{AITER_INSTALL_CMD}"
-            )
+            raise ImportError(f"aiter is not installed on AMD ROCm platform.\nImport error: {AITER_IMPORT_ERROR}\nPlease install aiter for optimal performance:\n{AITER_INSTALL_CMD}")
 
     def apply(
         self,
@@ -107,4 +100,3 @@ def apply(
             max_seqlen_kv,
         ).reshape(bs * max_seqlen_q, -1)
         return x
-

Original file line number	Diff line number	Diff line change
`@@ -45,4 +45,3 @@ ENV HSA_FORCE_FINE_GRAIN_PCIE=1`
`45`	`45`
`46`	`46`	`# Default command`
`47`	`47`	`CMD ["python", "-c", "from lightx2v import LightX2VPipeline; print('LightX2V AMD ROCm ready!')"]`
`48`		`-`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from lightx2v_platform.base.base import check_ai_device, init_ai_device`
	`1`	`+from lightx2v_platform.base.base import check_ai_device, init_ai_device # noqa`
`2`	`2`	`from lightx2v_platform.base.amd_rocm import AmdRocmDevice`
`3`	`3`	`from lightx2v_platform.base.cambricon_mlu import MluDevice`
`4`	`4`	`from lightx2v_platform.base.hygon_dcu import HygonDcuDevice`