[V0.9.1][BugFix] Fix the bug in decoraotor patch (#2199)

rjg-lyh · web-flow · commit a704967d8b74 · 2025-08-05T17:19:57.000+08:00
### What this PR does / why we need it? Fix the bug in the decorator patch, which would have a negative impact on performance with mini batch. Now fix it by enabling this patch just when engine_v1 and flashcomm_v1 enabled together. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed with new added/existing test. Related: #2191 (comment) Signed-off-by: rjg-lyh <1318825571@qq.com>
diff --git a/vllm_ascend/__init__.py b/vllm_ascend/__init__.py
@@ -25,9 +25,14 @@ def register():
 def register_model():
     # fix pytorch schema check error, remove this line after pytorch
     # is upgraded to 2.7.0
+    import vllm.envs as envs
+
     import vllm_ascend.patch.worker.patch_common.patch_utils  # noqa: F401
 
     from .models import register_model
 
-    import vllm_ascend.patch.platform.patch_0_9_1.patch_decorator  # isort: skip  # noqa: F401
+    import vllm_ascend.envs as envs_ascend  # isort: skip  # noqa: F401
+    if envs.VLLM_USE_V1 and \
+        envs_ascend.VLLM_ASCEND_ENABLE_FLASHCOMM != 0:
+        import vllm_ascend.patch.platform.patch_0_9_1.patch_decorator  # isort: skip  # noqa: F401
     register_model()
diff --git a/vllm_ascend/patch/platform/patch_0_9_1/__init__.py b/vllm_ascend/patch/platform/patch_0_9_1/__init__.py
@@ -19,4 +19,3 @@
 # patch files.
 import vllm_ascend.patch.worker.patch_common.patch_utils  # noqa isort:skip
 import vllm_ascend.patch.platform.patch_0_9_1.patch_cache_manager  # noqa
-import vllm_ascend.patch.platform.patch_0_9_1.patch_decorator  # noqa