|
30 | 30 | get_logits_processors as get_openai_logits_processors)
|
31 | 31 | from vllm.executor.executor_base import ExecutorBase
|
32 | 32 | from vllm.inputs import (INPUT_REGISTRY, InputRegistry, ProcessorInputs,
|
33 |
| - PromptType, SingletonInputsAdapter) |
34 |
| -from vllm.inputs.parse import is_encoder_decoder_inputs, is_token_prompt |
| 33 | + PromptType) |
| 34 | +from vllm.inputs.parse import is_token_prompt, split_enc_dec_inputs |
35 | 35 | from vllm.inputs.preprocess import InputPreprocessor
|
36 | 36 | from vllm.logger import init_logger
|
37 | 37 | from vllm.logits_process import get_bad_words_logits_processors
|
@@ -609,12 +609,7 @@ def _add_processed_request(
|
609 | 609 | seq_id = next(self.seq_counter)
|
610 | 610 | eos_token_id = self.input_preprocessor.get_eos_token_id(lora_request)
|
611 | 611 |
|
612 |
| - if is_encoder_decoder_inputs(processed_inputs): |
613 |
| - decoder_inputs = processed_inputs["decoder"] |
614 |
| - encoder_inputs = processed_inputs["encoder"] |
615 |
| - else: |
616 |
| - decoder_inputs = processed_inputs |
617 |
| - encoder_inputs = None |
| 612 | + encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) |
618 | 613 |
|
619 | 614 | seq = Sequence(seq_id, decoder_inputs, block_size, eos_token_id,
|
620 | 615 | lora_request, prompt_adapter_request)
|
@@ -2031,15 +2026,16 @@ def create_trace_span(self, seq_group: SequenceGroup) -> None:
|
2031 | 2026 |
|
2032 | 2027 | def _validate_model_inputs(self, inputs: ProcessorInputs,
|
2033 | 2028 | lora_request: Optional[LoRARequest]):
|
2034 |
| - if is_encoder_decoder_inputs(inputs): |
2035 |
| - # For encoder-decoder multimodal models, the max_prompt_len |
2036 |
| - # restricts the decoder prompt length |
2037 |
| - prompt_inputs = inputs["decoder" if self.model_config. |
2038 |
| - is_multimodal_model else "encoder"] |
| 2029 | + encoder_inputs, decoder_inputs = split_enc_dec_inputs(inputs) |
| 2030 | + |
| 2031 | + # For encoder-decoder multimodal models, the max_prompt_len |
| 2032 | + # restricts the decoder prompt length |
| 2033 | + if self.model_config.is_multimodal_model: |
| 2034 | + prompt_inputs = decoder_inputs |
2039 | 2035 | else:
|
2040 |
| - prompt_inputs = inputs |
| 2036 | + prompt_inputs = encoder_inputs or decoder_inputs |
2041 | 2037 |
|
2042 |
| - prompt_ids = SingletonInputsAdapter(prompt_inputs).prompt_token_ids |
| 2038 | + prompt_ids = prompt_inputs["prompt_token_ids"] |
2043 | 2039 |
|
2044 | 2040 | if prompt_ids is None or len(prompt_ids) == 0:
|
2045 | 2041 | raise ValueError("Prompt cannot be empty")
|
|
0 commit comments