|
23 | 23 | from vllm.inputs import TextPrompt |
24 | 24 | from vllm.inputs.preprocess import InputPreprocessor |
25 | 25 | from vllm.logger import init_logger |
26 | | -from vllm.outputs import RequestOutput |
27 | 26 | from vllm.sampling_params import SamplingParams |
28 | 27 | from vllm.tokenizers import TokenizerLike |
29 | 28 | from vllm.usage.usage_lib import UsageContext |
@@ -1424,7 +1423,7 @@ async def generation_single_request(task: dict[str, Any]): |
1424 | 1423 | batch_request_ids, batch_request_outputs, _gen_ms_list, batch_metrics |
1425 | 1424 | ): |
1426 | 1425 | try: |
1427 | | - r_outputs = [output_strip(output, omni_stage)] |
| 1426 | + r_outputs = [output] |
1428 | 1427 | use_shm, payload = maybe_dump_to_shm(r_outputs, shm_threshold_bytes) |
1429 | 1428 | if use_shm: |
1430 | 1429 | out_q.put( |
@@ -1510,23 +1509,3 @@ def make_stage_stats(_agg_total_tokens: int, _agg_total_gen_time_ms: float): |
1510 | 1509 | from vllm_omni.entrypoints.log_utils import StageStats |
1511 | 1510 |
|
1512 | 1511 | return StageStats(total_token=_agg_total_tokens, total_gen_time=_agg_total_gen_time_ms) |
1513 | | - |
1514 | | - |
1515 | | -def output_strip(r_output: RequestOutput, omni_stage: OmniStage): |
1516 | | - if omni_stage.final_output and omni_stage.final_output_type != "text": |
1517 | | - return r_output |
1518 | | - |
1519 | | - if getattr(r_output, "finished", False): |
1520 | | - return r_output |
1521 | | - |
1522 | | - mm_output = getattr(r_output, "multimodal_output", None) |
1523 | | - if mm_output is not None: |
1524 | | - r_output.multimodal_output = {} |
1525 | | - |
1526 | | - outputs = getattr(r_output, "outputs", None) |
1527 | | - if outputs is not None: |
1528 | | - for out in outputs: |
1529 | | - if getattr(out, "multimodal_output", None): |
1530 | | - out.multimodal_output = {} |
1531 | | - |
1532 | | - return r_output |
0 commit comments