Skip to content

Commit b46e4a0

Browse files
authored
[Core][Bookkeeping Optimization] Update against numpy view of is_token_ids tensor (#27618)
Signed-off-by: Jialin Ouyang <[email protected]>
1 parent d34f5fe commit b46e4a0

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

vllm/v1/worker/gpu_input_batch.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,10 @@ def __init__(
108108
pin_memory=False,
109109
)
110110
self.token_ids_cpu = self.token_ids_cpu_tensor.numpy()
111-
self.is_token_ids = torch.zeros(
111+
self.is_token_ids_tensor = torch.zeros(
112112
(max_num_reqs, max_model_len), device="cpu", dtype=bool, pin_memory=False
113113
)
114+
self.is_token_ids = self.is_token_ids_tensor.numpy()
114115
# Store prompt embeddings per request to avoid OOM from large upfront
115116
# allocation if max_model_len is big.
116117
# Maps req_index -> tensor of shape (num_prompt_tokens, hidden_size)

vllm/v1/worker/gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,7 @@ def _prepare_inputs(
11031103
out=self.input_ids.cpu[:total_num_scheduled_tokens],
11041104
)
11051105
if self.enable_prompt_embeds:
1106-
is_token_ids = self.input_batch.is_token_ids.flatten()
1106+
is_token_ids = self.input_batch.is_token_ids_tensor.flatten()
11071107
torch.index_select(
11081108
is_token_ids,
11091109
0,

0 commit comments

Comments
 (0)