Skip to content

Commit 1c3ffdb

Browse files
authored
[V0 Deprecation] Remove V0 sampling metadata (#25345)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent c438b29 commit 1c3ffdb

File tree

141 files changed

+172
-583
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+172
-583
lines changed

tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
LlavaForConditionalGeneration,
1010
LlavaMultiModalProcessor,
1111
LlavaProcessingInfo)
12-
from vllm.model_executor.sampling_metadata import SamplingMetadata
1312
from vllm.multimodal import MULTIMODAL_REGISTRY
1413

1514

@@ -18,11 +17,10 @@
1817
dummy_inputs=LlavaDummyInputsBuilder)
1918
class MyLlava(LlavaForConditionalGeneration):
2019

21-
def compute_logits(
22-
self, hidden_states: torch.Tensor,
23-
sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]:
20+
def compute_logits(self,
21+
hidden_states: torch.Tensor) -> Optional[torch.Tensor]:
2422
# this dummy model always predicts the first token
25-
logits = super().compute_logits(hidden_states, sampling_metadata)
23+
logits = super().compute_logits(hidden_states)
2624
if logits is not None:
2725
logits.zero_()
2826
logits[:, 0] += 1.0

tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_opt.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@
66
import torch
77

88
from vllm.model_executor.models.opt import OPTForCausalLM
9-
from vllm.model_executor.sampling_metadata import SamplingMetadata
109

1110

1211
class MyOPTForCausalLM(OPTForCausalLM):
1312

14-
def compute_logits(
15-
self, hidden_states: torch.Tensor,
16-
sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]:
13+
def compute_logits(self,
14+
hidden_states: torch.Tensor) -> Optional[torch.Tensor]:
1715
# this dummy model always predicts the first token
18-
logits = super().compute_logits(hidden_states, sampling_metadata)
16+
logits = super().compute_logits(hidden_states)
1917
if logits is not None:
2018
logits.zero_()
2119
logits[:, 0] += 1.0

vllm/model_executor/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@
33

44
from vllm.model_executor.parameter import (BasevLLMParameter,
55
PackedvLLMParameter)
6-
from vllm.model_executor.sampling_metadata import SamplingMetadata
76
from vllm.model_executor.utils import set_random_seed
87

98
__all__ = [
10-
"SamplingMetadata",
119
"set_random_seed",
1210
"BasevLLMParameter",
1311
"PackedvLLMParameter",

vllm/model_executor/layers/logits_processor.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from vllm.model_executor.custom_op import CustomOp
1111
from vllm.model_executor.layers.vocab_parallel_embedding import (
1212
VocabParallelEmbedding)
13-
from vllm.model_executor.sampling_metadata import SamplingMetadata
1413
from vllm.platforms import current_platform
1514

1615

@@ -50,7 +49,6 @@ def forward(
5049
self,
5150
lm_head: VocabParallelEmbedding,
5251
hidden_states: torch.Tensor,
53-
sampling_metadata: Optional[SamplingMetadata] = None,
5452
embedding_bias: Optional[torch.Tensor] = None,
5553
) -> Optional[torch.Tensor]:
5654
if self.logits_as_input:

vllm/model_executor/models/apertus.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
4949
from vllm.model_executor.model_loader.weight_utils import (
5050
default_weight_loader, maybe_remap_kv_scale_name)
51-
from vllm.model_executor.sampling_metadata import SamplingMetadata
5251
from vllm.sequence import IntermediateTensors
5352

5453
from .interfaces import SupportsLoRA, SupportsPP
@@ -566,10 +565,8 @@ def forward(
566565
def compute_logits(
567566
self,
568567
hidden_states: torch.Tensor,
569-
sampling_metadata: SamplingMetadata,
570568
) -> Optional[torch.Tensor]:
571-
logits = self.logits_processor(self.lm_head, hidden_states,
572-
sampling_metadata)
569+
logits = self.logits_processor(self.lm_head, hidden_states)
573570
return logits
574571

575572
def load_weights(self, weights: Iterable[tuple[str,

vllm/model_executor/models/arcee.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -399,11 +399,10 @@ def forward(
399399
inputs_embeds=inputs_embeds)
400400
return model_output
401401

402-
def compute_logits(self, hidden_states: torch.Tensor,
403-
sampling_metadata) -> Optional[torch.Tensor]:
402+
def compute_logits(self,
403+
hidden_states: torch.Tensor) -> Optional[torch.Tensor]:
404404
# Compute final logits from hidden states (last pipeline rank only)
405-
logits = self.logits_processor(self.lm_head, hidden_states,
406-
sampling_metadata)
405+
logits = self.logits_processor(self.lm_head, hidden_states)
407406
return logits
408407

409408
def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:

vllm/model_executor/models/arctic.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from vllm.model_executor.layers.vocab_parallel_embedding import (
3131
ParallelLMHead, VocabParallelEmbedding)
3232
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
33-
from vllm.model_executor.sampling_metadata import SamplingMetadata
3433
from vllm.model_executor.utils import set_weight_attrs
3534
from vllm.platforms import current_platform
3635
from vllm.sequence import IntermediateTensors
@@ -456,10 +455,8 @@ def forward(
456455
def compute_logits(
457456
self,
458457
hidden_states: torch.Tensor,
459-
sampling_metadata: SamplingMetadata,
460458
) -> Optional[torch.Tensor]:
461-
logits = self.logits_processor(self.lm_head, hidden_states,
462-
sampling_metadata)
459+
logits = self.logits_processor(self.lm_head, hidden_states)
463460
return logits
464461

465462
def load_weights(self, weights: Iterable[tuple[str,

vllm/model_executor/models/aria.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
2020
from vllm.model_executor.model_loader.weight_utils import (
2121
default_weight_loader, maybe_remap_kv_scale_name)
22-
from vllm.model_executor.sampling_metadata import SamplingMetadata
2322
from vllm.multimodal import MULTIMODAL_REGISTRY
2423
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
2524
MultiModalKwargsItems)
@@ -644,10 +643,8 @@ def forward(
644643

645644
return hidden_states
646645

647-
def compute_logits(self, hidden_states: torch.Tensor,
648-
sampling_metadata: SamplingMetadata) -> torch.Tensor:
649-
logits = self.logits_processor(self.lm_head, hidden_states,
650-
sampling_metadata)
646+
def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor:
647+
logits = self.logits_processor(self.lm_head, hidden_states)
651648
return logits
652649

653650
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):

vllm/model_executor/models/aya_vision.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
get_optimal_tiled_canvas)
1717

1818
from vllm.config import VllmConfig
19-
from vllm.model_executor.sampling_metadata import SamplingMetadata
2019
from vllm.multimodal import MULTIMODAL_REGISTRY
2120
from vllm.multimodal.inputs import MultiModalDataDict, MultiModalKwargsItems
2221
from vllm.multimodal.parse import (ImageProcessorItems, ImageSize,
@@ -464,7 +463,5 @@ def forward(
464463
def compute_logits(
465464
self,
466465
hidden_states: torch.Tensor,
467-
sampling_metadata: SamplingMetadata,
468466
) -> Optional[torch.Tensor]:
469-
return self.language_model.compute_logits(hidden_states,
470-
sampling_metadata)
467+
return self.language_model.compute_logits(hidden_states)

vllm/model_executor/models/baichuan.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
ParallelLMHead, VocabParallelEmbedding)
4747
from vllm.model_executor.model_loader.weight_utils import (
4848
default_weight_loader, row_parallel_weight_loader)
49-
from vllm.model_executor.sampling_metadata import SamplingMetadata
5049
from vllm.sequence import IntermediateTensors
5150

5251
from .interfaces import SupportsLoRA, SupportsPP, SupportsQuant
@@ -421,10 +420,8 @@ def forward(
421420
def compute_logits(
422421
self,
423422
hidden_states: torch.Tensor,
424-
sampling_metadata: SamplingMetadata,
425423
) -> Optional[torch.Tensor]:
426-
logits = self.logits_processor(self.lm_head, hidden_states,
427-
sampling_metadata)
424+
logits = self.logits_processor(self.lm_head, hidden_states)
428425
return logits
429426

430427
def load_weights(self, weights: Iterable[tuple[str,

0 commit comments

Comments
 (0)