Skip to content

Commit 0b6bf66

Browse files
committed
Splitting ops set to empty array unconditionally
1 parent 48e5320 commit 0b6bf66

File tree

1 file changed

+1
-21
lines changed

1 file changed

+1
-21
lines changed

vllm/config/compilation.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -548,27 +548,7 @@ def set_splitting_ops_for_v1(self):
548548
"set_splitting_ops_for_v1 should only be called when "
549549
"level is CompilationLevel.PIECEWISE")
550550

551-
if self.splitting_ops is None:
552-
# NOTE: When using full cudagraph, instead of setting an empty
553-
# list and capture the full cudagraph inside the flattened fx
554-
# graph, we keep the piecewise fx graph structure but capture the
555-
# full cudagraph outside the fx graph. This reduces some cpu
556-
# overhead when the runtime batch_size is not cudagraph captured.
557-
# see https://github.com/vllm-project/vllm/pull/20059 for details.
558-
self.splitting_ops = self._attention_ops
559-
elif len(self.splitting_ops) == 0:
560-
logger.warning_once("Using piecewise compilation with empty "
561-
"splitting_ops.")
562-
if self.cudagraph_mode == CUDAGraphMode.PIECEWISE:
563-
logger.warning_once(
564-
"When compilation level is piecewise with empty "
565-
"splitting_ops, PIECEWISE cudagraph_mode will be "
566-
"treated as FULL cudagraph_mode. Please ensure you are "
567-
"using attention backends that support cudagraph or set "
568-
"cudagraph_mode to NONE explicitly if encountering "
569-
"any problems.")
570-
self.cudagraph_mode = CUDAGraphMode.FULL
571-
self.splitting_ops = []
551+
self.splitting_ops = []
572552

573553
def splitting_ops_contain_attention(self) -> bool:
574554
return self.splitting_ops is not None and all(

0 commit comments

Comments
 (0)