remove torch.accelerator apis

xuechendi · xuechendi · commit d34a7332f663 · 2026-02-27T01:53:51.000Z
Signed-off-by: Chendi Xue &lt;chendi.xue@intel.com&gt;
diff --git a/tests/e2e/offline_inference/test_diffusion_cpu_offload.py b/tests/e2e/offline_inference/test_diffusion_cpu_offload.py
@@ -21,11 +21,11 @@
 
 def inference(model_name: str, offload: bool = True):
     current_omni_platform.empty_cache()
-    device_index = torch.accelerator.current_device_index()
+    device_index = current_omni_platform.current_device_index()
     monitor = DeviceMemoryMonitor.instantiate(device_index=device_index, interval=0.02)
     monitor.start()
     m = Omni(model=model_name, enable_cpu_offload=offload)
-    torch.accelerator.reset_peak_memory_stats()
+    current_omni_platform.reset_peak_memory_stats()
     height = 256
     width = 256
 
diff --git a/tests/e2e/offline_inference/test_diffusion_layerwise_offload.py b/tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
@@ -29,7 +29,7 @@ def run_inference(
     num_inference_steps: int = 3,
 ) -> float:
     current_omni_platform.empty_cache()
-    device_index = torch.accelerator.current_device_index()
+    device_index = current_omni_platform.current_device_index()
     monitor = DeviceMemoryMonitor.instantiate(device_index=device_index, interval=0.02)
     monitor.start()
 
@@ -40,7 +40,7 @@ def run_inference(
         flow_shift=5.0,
     )
 
-    torch.accelerator.reset_peak_memory_stats()
+    current_omni_platform.reset_peak_memory_stats()
 
     # Refer to tests/e2e/offline_inference/test_t2v_model.py
     # Use minimal settings for testing
diff --git a/vllm_omni/platforms/cuda/platform.py b/vllm_omni/platforms/cuda/platform.py
@@ -115,3 +115,11 @@ def get_free_memory(cls, device: torch.device | None = None) -> int:
     @classmethod
     def get_device_name(cls, device_id: int = 0) -> str:
         return torch.cuda.get_device_name(device_id)
+
+    @classmethod
+    def reset_peak_memory_stats(cls, device: torch.device | None = None) -> None:
+        torch.xpu.reset_peak_memory_stats(device)
+
+    @classmethod
+    def current_device_index(cls) -> int:
+        return torch.xpu.current_device()
diff --git a/vllm_omni/platforms/interface.py b/vllm_omni/platforms/interface.py
@@ -98,6 +98,14 @@ def synchronize(cls) -> None:
     def get_free_memory(cls, device: torch.device | None = None) -> int:
         raise NotImplementedError
 
+    @classmethod
+    def reset_peak_memory_stats(cls, device: torch.device | None = None) -> None:
+        raise NotImplementedError
+
+    @classmethod
+    def current_device_index(cls) -> int:
+        raise NotImplementedError
+
 
 class UnspecifiedOmniPlatform(OmniPlatform):
     _omni_enum = OmniPlatformEnum.UNSPECIFIED
diff --git a/vllm_omni/platforms/npu/platform.py b/vllm_omni/platforms/npu/platform.py
@@ -82,6 +82,14 @@ def get_free_memory(cls, device: torch.device | None = None) -> int:
         free, _ = torch.npu.mem_get_info(device)
         return free
 
+    @classmethod
+    def reset_peak_memory_stats(cls, device: torch.device | None = None) -> None:
+        torch.xpu.reset_peak_memory_stats(device)
+
+    @classmethod
+    def current_device_index(cls) -> int:
+        return torch.xpu.current_device()
+
     @classmethod
     def get_device_total_memory(cls, device_id: int = 0) -> int:
         device_props = torch.npu.get_device_properties(device_id)
diff --git a/vllm_omni/platforms/rocm/platform.py b/vllm_omni/platforms/rocm/platform.py
@@ -99,3 +99,11 @@ def synchronize(cls) -> None:
     def get_free_memory(cls, device: torch.device | None = None) -> int:
         free, _ = torch.cuda.mem_get_info(device)
         return free
+
+    @classmethod
+    def reset_peak_memory_stats(cls, device: torch.device | None = None) -> None:
+        torch.xpu.reset_peak_memory_stats(device)
+
+    @classmethod
+    def current_device_index(cls) -> int:
+        return torch.xpu.current_device()
diff --git a/vllm_omni/platforms/xpu/platform.py b/vllm_omni/platforms/xpu/platform.py
@@ -71,6 +71,14 @@ def get_device_version(cls) -> str | None:
     def synchronize(cls) -> None:
         torch.xpu.synchronize()
 
+    @classmethod
+    def reset_peak_memory_stats(cls, device: torch.device | None = None) -> None:
+        torch.xpu.reset_peak_memory_stats(device)
+
+    @classmethod
+    def current_device_index(cls) -> int:
+        return torch.xpu.current_device()
+
     @classmethod
     def get_free_memory(cls, device: torch.device | None = None) -> int:
         if device is None: