fix for online apply

xuechendi · xuechendi · commit 6e8e32a32a53 · 2026-02-27T01:16:44.000Z
Signed-off-by: Chendi Xue &lt;chendi.xue@intel.com&gt;
diff --git a/tests/e2e/offline_inference/test_diffusion_layerwise_offload.py b/tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
@@ -7,6 +7,7 @@
 
 from tests.utils import DeviceMemoryMonitor
 from vllm_omni.inputs.data import OmniDiffusionSamplingParams
+from vllm_omni.platforms import current_omni_platform
 
 # ruff: noqa: E402
 REPO_ROOT = Path(__file__).resolve().parents[2]
diff --git a/tests/utils.py b/tests/utils.py
@@ -525,19 +525,6 @@ def __init__(self, device_index: int, interval: float = 0.05):
         self._stop_event = threading.Event()
         self._thread: threading.Thread | None = None
 
-    def start(self) -> None:
-        def monitor_loop() -> None:
-            while not self._stop_event.is_set():
-                try:
-                    with torch.cuda.device(self.device_index):
-                        free_bytes, total_bytes = torch.cuda.mem_get_info()
-                    used_mb = (total_bytes - free_bytes) / (1024**2)
-                    self._peak_used_mb = max(self._peak_used_mb, used_mb)
-                except Exception:
-                    pass
-                time.sleep(self.interval)
-
-        self._thread = threading.Thread(target=monitor_loop, daemon=False)
     def start(self) -> None:
         def monitor_loop() -> None:
             while not self._stop_event.is_set():
@@ -570,21 +557,6 @@ def __del__(self):
 
 
 class NPUMemoryMonitor(DeviceMemoryMonitor):
-    def start(self) -> None:
-        def monitor_loop() -> None:
-            while not self._stop_event.is_set():
-                try:
-                    with torch.npu.device(self.device_index):
-                        free_bytes, total_bytes = torch.npu.mem_get_info()
-                    used_mb = (total_bytes - free_bytes) / (1024**2)
-                    self._peak_used_mb = max(self._peak_used_mb, used_mb)
-                except Exception:
-                    pass
-                time.sleep(self.interval)
-
-        self._thread = threading.Thread(target=monitor_loop, daemon=False)
-        self._thread.start()
-
     @property
     def peak_used_mb(self) -> float:
         fallback_alloc = torch.npu.max_memory_allocated(device=self.device_index) / (1024**2)
@@ -593,21 +565,6 @@ def peak_used_mb(self) -> float:
 
 
 class XPUMemoryMonitor(DeviceMemoryMonitor):
-    def start(self) -> None:
-        def monitor_loop() -> None:
-            while not self._stop_event.is_set():
-                try:
-                    with torch.xpu.device(self.device_index):
-                        free_bytes, total_bytes = torch.xpu.mem_get_info()
-                    used_mb = (total_bytes - free_bytes) / (1024**2)
-                    self._peak_used_mb = max(self._peak_used_mb, used_mb)
-                except Exception:
-                    pass
-                time.sleep(self.interval)
-
-        self._thread = threading.Thread(target=monitor_loop, daemon=False)
-        self._thread.start()
-
     @property
     def peak_used_mb(self) -> float:
         fallback_alloc = torch.xpu.max_memory_allocated(device=self.device_index) / (1024**2)