|
5 | 5 | import torchair
|
6 | 6 | from torchair import patch_for_hcom
|
7 | 7 | from vllm.attention.layer import Attention
|
8 |
| -from vllm.config import (VllmConfig, get_layers_from_vllm_config, |
9 |
| - set_current_vllm_config) |
| 8 | +from vllm.config import (CUDAGraphMode, VllmConfig, |
| 9 | + get_layers_from_vllm_config, set_current_vllm_config) |
10 | 10 | from vllm.forward_context import BatchDescriptor, get_forward_context
|
11 | 11 | from vllm.model_executor.model_loader import get_model_loader
|
12 | 12 | from vllm.model_executor.model_loader.utils import (
|
@@ -110,7 +110,9 @@ def dummy_run(self,
|
110 | 110 | with_prefill: bool = False,
|
111 | 111 | skip_attn: bool = False,
|
112 | 112 | num_reqs: int = 0,
|
113 |
| - num_tokens_across_dp=None) -> None: |
| 113 | + num_tokens_across_dp=None, |
| 114 | + aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, |
| 115 | + batch_descriptor=None) -> None: |
114 | 116 | if not self.torchair_graph_enabled:
|
115 | 117 | # TODO: adapt enable_dbo later
|
116 | 118 | (num_tokens, num_tokens_across_dp, with_prefill,
|
@@ -152,7 +154,9 @@ def dummy_run(self,
|
152 | 154 | reserved_mc2_mask=self.runner.reserved_mc2_mask,
|
153 | 155 | moe_comm_type=moe_comm_type,
|
154 | 156 | in_profile_run=self.runner.in_profile_run,
|
155 |
| - num_actual_tokens=0): |
| 157 | + num_actual_tokens=0, |
| 158 | + aclgraph_runtime_mode=aclgraph_runtime_mode, |
| 159 | + batch_descriptor=batch_descriptor): |
156 | 160 | if is_running_torchair:
|
157 | 161 | assert attn_metadata is not None
|
158 | 162 | torch._dynamo.mark_static(input_ids)
|
@@ -446,6 +450,7 @@ def _propose(
|
446 | 450 | reserved_mc2_mask=self.runner.reserved_mc2_mask,
|
447 | 451 | moe_comm_type=moe_comm_type,
|
448 | 452 | aclgraph_runtime_mode=aclgraph_runtime_mode,
|
| 453 | + batch_descriptor=batch_descriptor, |
449 | 454 | in_profile_run=self.runner.in_profile_run,
|
450 | 455 | num_actual_tokens=num_tokens):
|
451 | 456 | with ProfileExecuteDuration().capture_async('mtp_forward'):
|
|
0 commit comments