Skip to content

Commit e73ed0f

Browse files
authored
[Bugfix] Fix type annotations in CPU model runner (#4256)
1 parent 296cdf8 commit e73ed0f

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

vllm/worker/cpu_model_runner.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def load_model(self) -> None:
7373
def _prepare_prompt(
7474
self,
7575
seq_group_metadata_list: List[SequenceGroupMetadata],
76-
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int]]:
76+
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int],
77+
Optional[torch.Tensor]]:
7778
assert len(seq_group_metadata_list) > 0
7879
input_tokens: List[int] = []
7980
input_positions: List[int] = []
@@ -347,8 +348,8 @@ def _prepare_sample(
347348
def prepare_input_tensors(
348349
self,
349350
seq_group_metadata_list: List[SequenceGroupMetadata],
350-
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata,
351-
SamplingMetadata]:
351+
) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, SamplingMetadata,
352+
Optional[torch.Tensor]]:
352353
multi_modal_input = None
353354
if self.is_driver_worker:
354355
# NOTE: We assume that all sequences in the group are all prompts or

0 commit comments

Comments
 (0)