|
12 | 12 | from vllm.outputs import RequestOutput
|
13 | 13 | from vllm.sampling_params import SamplingParams
|
14 | 14 | from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup,
|
15 |
| - SequenceGroupMetadata, SequenceOutputs, |
16 |
| - SequenceStatus) |
| 15 | + SequenceGroupMetadata, SequenceGroupOutputs, |
| 16 | + SequenceOutputs, SequenceStatus) |
17 | 17 | from vllm.transformers_utils.tokenizer import (detokenize_incrementally,
|
18 | 18 | get_tokenizer)
|
19 | 19 | from vllm.utils import Counter
|
@@ -350,9 +350,15 @@ def _check_beam_search_early_stopping(
|
350 | 350 | eos_token_id=self.tokenizer.eos_token_id))
|
351 | 351 | return current_worst_score >= highest_attainable_score
|
352 | 352 |
|
353 |
| - def _process_sequence_group_samples( |
354 |
| - self, seq_group: SequenceGroup, |
355 |
| - samples: List[SequenceOutputs]) -> None: |
| 353 | + def _process_sequence_group_outputs(self, seq_group: SequenceGroup, |
| 354 | + outputs: SequenceGroupOutputs) -> None: |
| 355 | + # Process prompt logprobs |
| 356 | + prompt_logprobs = outputs.prompt_logprobs |
| 357 | + if prompt_logprobs is not None: |
| 358 | + seq_group.prompt_logprobs = prompt_logprobs |
| 359 | + |
| 360 | + # Process samples |
| 361 | + samples = outputs.samples |
356 | 362 | parent_seqs = seq_group.get_seqs(status=SequenceStatus.RUNNING)
|
357 | 363 | existing_finished_seqs = seq_group.get_finished_seqs()
|
358 | 364 | parent_child_dict = {
|
@@ -520,8 +526,8 @@ def _process_model_outputs(
|
520 | 526 | scheduler_outputs: SchedulerOutputs) -> List[RequestOutput]:
|
521 | 527 | # Update the scheduled sequence groups with the model outputs.
|
522 | 528 | scheduled_seq_groups = scheduler_outputs.scheduled_seq_groups
|
523 |
| - for seq_group, samples in zip(scheduled_seq_groups, output): |
524 |
| - self._process_sequence_group_samples(seq_group, samples) |
| 529 | + for seq_group, outputs in zip(scheduled_seq_groups, output): |
| 530 | + self._process_sequence_group_outputs(seq_group, outputs) |
525 | 531 |
|
526 | 532 | # Free the finished sequence groups.
|
527 | 533 | self.scheduler.free_finished_seq_groups()
|
|
0 commit comments