We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 87ff41e commit ec8bcb9Copy full SHA for ec8bcb9
vllm/v1/spec_decode/eagle.py
@@ -522,13 +522,9 @@ def prepare_next_token_ids_padded(
522
)
523
524
# Generate a mask for all valid tokens within those requests
525
- max_gen_len = sampled_token_ids.shape[-1]
526
- if max_gen_len == 1:
527
- valid_mask = torch.ones_like(valid_sampled_token_ids_gpu, dtype=torch.bool)
528
- else:
529
- valid_mask = (valid_sampled_token_ids_gpu != -1) & (
530
- valid_sampled_token_ids_gpu < gpu_input_batch.vocab_size
531
- )
+ valid_mask = (valid_sampled_token_ids_gpu != -1) & (
+ valid_sampled_token_ids_gpu < gpu_input_batch.vocab_size
+ )
532
533
# Count the number of valid tokens in each request
534
valid_sampled_tokens_count = valid_mask.sum(dim=1)
0 commit comments