-
Notifications
You must be signed in to change notification settings - Fork 92
Description
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:69] Dumping input data for V1 LLM engine (v0.1.dev8932+g9ecabf22f) with config: model='/home/ubuntu/project/Step-Audio2/Step-Audio-2-mini', speculative_config=None, tokenizer='/home/ubuntu/project/Step-Audio2/Step-Audio-2-mini', skip_tokenizer_init=False, tokenizer_mode=step_audio_2, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=step-audio-2-mini, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, pooler_config=None, compilation_config={"level":3,"debug_dump_path":"","cache_dir":"/home/ubuntu/.cache/vllm/torch_compile_cache/e4e30a22fc","backend":"","custom_ops":[],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output","vllm.mamba_mixer2","vllm.mamba_mixer","vllm.short_conv"],"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":1,"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[64,56,48,40,32,24,16,8,4,2,1],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"pass_config":{},"max_capture_size":64,"local_cache_dir":"/home/ubuntu/.cache/vllm/torch_compile_cache/e4e30a22fc/rank_0_0/backbone"}, audio_parser='step_audio_2_tts_ta4',
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] Dumping scheduler output for model execution: SchedulerOutput(scheduled_new_reqs=[NewRequestData(req_id=chatcmpl-9d3de8e572b24ae590bf7ec7f896cf98,prompt_token_ids_len=1055,mm_kwargs=[{'audio_lens': MultiModalFieldElem(modality='audio', key='audio_lens', data=tensor(6362), field=MultiModalBatchedField()), 'audio_mels': MultiModalFieldElem(modality='audio', key='audio_mels', data=tensor([[-0.9258, -0.9258, -0.9258, ..., -0.9258, -0.9258, -0.9258],
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] [-0.9258, -0.9258, -0.9258, ..., -0.9258, -0.9258, -0.9258],
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] [-0.9258, -0.9258, -0.9258, ..., -0.9258, -0.9258, -0.9258],
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] ...,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] [-0.7773, -0.6797, -0.6758, ..., -0.9258, -0.9258, -0.9258],
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] [-0.9258, -0.9258, -0.9258, ..., -0.9258, -0.9258, -0.9258],
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] [-0.9258, -0.9258, -0.9258, ..., -0.9258, -0.9258, -0.9258]],
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] dtype=torch.bfloat16), field=MultiModalFlatField(slices=[[slice(0, 6362, None)]], dim=0))}],mm_hashes=['1c79c81b9c207317ab090035fc8e21ec0934972726f6afab24b4847d5317f877'],mm_positions=[PlaceholderRange(offset=253, length=797, is_embed=tensor([False, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, True, True, True, True,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:76] True, True, True, True, True, True, False]))],sampling_params=SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.0, top_p=1.0, top_k=0, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=4096, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, guided_decoding=None, extra_args=None),block_ids=([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463],),num_computed_tokens=240,lora_request=None)], scheduled_cached_reqs=CachedRequestData(req_ids=[], resumed_from_preemption=[], new_token_ids=[], new_block_ids=[], num_computed_tokens=[]), num_scheduled_tokens={chatcmpl-9d3de8e572b24ae590bf7ec7f896cf98: 815}, total_num_scheduled_tokens=815, scheduled_spec_decode_tokens={}, scheduled_encoder_inputs={chatcmpl-9d3de8e572b24ae590bf7ec7f896cf98: [0]}, num_common_prefix_blocks=[66], finished_req_ids=[], free_encoder_mm_hashes=[], structured_output_request_ids={}, grammar_bitmask=null, kv_connector_metadata=null)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [dump_input.py:79] Dumping scheduler stats: SchedulerStats(num_running_reqs=1, num_waiting_reqs=0, step_counter=0, current_wave=0, kv_cache_usage=0.04400000000000004, prefix_cache_stats=PrefixCacheStats(reset=False, requests=1, queries=1055, hits=240), spec_decoding_stats=None, num_corrupted_reqs=0)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] EngineCore encountered a fatal error.
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] Traceback (most recent call last):
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 701, in run_engine_core
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] engine_core.run_busy_loop()
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 728, in run_busy_loop
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] self._process_engine_step()
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 753, in _process_engine_step
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] outputs, model_executed = self.step_fn()
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 289, in step
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] model_output = self.execute_model_with_error_logging(
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 275, in execute_model_with_error_logging
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] raise err
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 266, in execute_model_with_error_logging
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return model_fn(scheduler_output)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/executor/abstract.py", line 95, in execute_model
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] output = self.collective_rpc("execute_model",
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] answer = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/utils/init.py", line 3031, in run_method
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return func(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return func(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/worker/gpu_worker.py", line 362, in execute_model
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] output = self.model_runner.execute_model(scheduler_output,
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return func(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/worker/gpu_model_runner.py", line 1516, in execute_model
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] self._execute_mm_encoder(scheduler_output)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/v1/worker/gpu_model_runner.py", line 1166, in _execute_mm_encoder
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] curr_group_outputs = self.model.get_multimodal_embeddings(
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 701, in get_multimodal_embeddings
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] audio_embeddings = self._process_audio_input(audio_input)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 683, in _process_audio_input
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] audio_features, audio_lens = self.encoder(audio_mels, audio_lens)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return self._call_impl(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return forward_call(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 524, in forward
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] x = block(x, mask.unsqueeze(1))
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return self._call_impl(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return forward_call(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 443, in forward
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] x = x + self.attn(self.attn_ln(x), mask=mask)[0]
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return self._call_impl(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] return forward_call(*args, **kwargs)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 402, in forward
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] wv, qk = self.qkv_attention(q, k, v, mask)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 421, in qkv_attention
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] w = F.softmax(qk, dim=-1).to(q.dtype)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/functional.py", line 2140, in softmax
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] ret = input.softmax(dim)
(EngineCore_0 pid=955818) ERROR 11-01 17:53:05 [core.py:710] torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 774.00 MiB. GPU 0 has a total capacity of 47.38 GiB of which 766.75 MiB is free. Process 2129 has 390.70 MiB memory in use. Process 751182 has 868.00 MiB memory in use. Process 752462 has 20.05 GiB memory in use. Including non-PyTorch memory, this process has 18.49 GiB memory in use. Process 956964 has 6.10 GiB memory in use. Of the allocated memory 17.72 GiB is allocated by PyTorch, with 17.88 MiB allocated in private pools (e.g., CUDA Graphs), and 244.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
(EngineCore_0 pid=955818) Process EngineCore_0:
(EngineCore_0 pid=955818) Traceback (most recent call last):
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_0 pid=955818) self.run()
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/multiprocessing/process.py", line 108, in run
(EngineCore_0 pid=955818) self._target(*self._args, **self._kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 712, in run_engine_core
(EngineCore_0 pid=955818) raise e
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 701, in run_engine_core
(EngineCore_0 pid=955818) engine_core.run_busy_loop()
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 728, in run_busy_loop
(EngineCore_0 pid=955818) self._process_engine_step()
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 753, in _process_engine_step
(EngineCore_0 pid=955818) outputs, model_executed = self.step_fn()
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 289, in step
(EngineCore_0 pid=955818) model_output = self.execute_model_with_error_logging(
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 275, in execute_model_with_error_logging
(EngineCore_0 pid=955818) raise err
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core.py", line 266, in execute_model_with_error_logging
(EngineCore_0 pid=955818) return model_fn(scheduler_output)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/executor/abstract.py", line 95, in execute_model
(EngineCore_0 pid=955818) output = self.collective_rpc("execute_model",
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
(EngineCore_0 pid=955818) answer = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/utils/init.py", line 3031, in run_method
(EngineCore_0 pid=955818) return func(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(EngineCore_0 pid=955818) return func(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/worker/gpu_worker.py", line 362, in execute_model
(EngineCore_0 pid=955818) output = self.model_runner.execute_model(scheduler_output,
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
(EngineCore_0 pid=955818) return func(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/worker/gpu_model_runner.py", line 1516, in execute_model
(EngineCore_0 pid=955818) self._execute_mm_encoder(scheduler_output)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/v1/worker/gpu_model_runner.py", line 1166, in _execute_mm_encoder
(EngineCore_0 pid=955818) curr_group_outputs = self.model.get_multimodal_embeddings(
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 701, in get_multimodal_embeddings
(EngineCore_0 pid=955818) audio_embeddings = self._process_audio_input(audio_input)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 683, in _process_audio_input
(EngineCore_0 pid=955818) audio_features, audio_lens = self.encoder(audio_mels, audio_lens)
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_0 pid=955818) return self._call_impl(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_0 pid=955818) return forward_call(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 524, in forward
(EngineCore_0 pid=955818) x = block(x, mask.unsqueeze(1))
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_0 pid=955818) return self._call_impl(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_0 pid=955818) return forward_call(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 443, in forward
(EngineCore_0 pid=955818) x = x + self.attn(self.attn_ln(x), mask=mask)[0]
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1751, in _wrapped_call_impl
(EngineCore_0 pid=955818) return self._call_impl(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1762, in _call_impl
(EngineCore_0 pid=955818) return forward_call(*args, **kwargs)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 402, in forward
(EngineCore_0 pid=955818) wv, qk = self.qkv_attention(q, k, v, mask)
(EngineCore_0 pid=955818) File "/home/ubuntu/project/tests/vllm/vllm/model_executor/models/mm_step_audio.py", line 421, in qkv_attention
(EngineCore_0 pid=955818) w = F.softmax(qk, dim=-1).to(q.dtype)
(EngineCore_0 pid=955818) File "/home/ubuntu/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/nn/functional.py", line 2140, in softmax
(EngineCore_0 pid=955818) ret = input.softmax(dim)
(EngineCore_0 pid=955818) torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 774.00 MiB. GPU 0 has a total capacity of 47.38 GiB of which 766.75 MiB is free. Process 2129 has 390.70 MiB memory in use. Process 751182 has 868.00 MiB memory in use. Process 752462 has 20.05 GiB memory in use. Including non-PyTorch memory, this process has 18.49 GiB memory in use. Process 956964 has 6.10 GiB memory in use. Of the allocated memory 17.72 GiB is allocated by PyTorch, with 17.88 MiB allocated in private pools (e.g., CUDA Graphs), and 244.36 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] AsyncLLM output_handler failed.
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] Traceback (most recent call last):
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/async_llm.py", line 412, in output_handler
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] outputs = await engine_core.get_output_async()
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] File "/home/ubuntu/project/tests/vllm/vllm/v1/engine/core_client.py", line 843, in get_output_async
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] raise self._format_exception(outputs) from None
(APIServer pid=955616) ERROR 11-01 17:53:05 [async_llm.py:453] vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue. See stack trace (above) for the root cause.
(APIServer pid=955616) INFO: 127.0.0.1:47224 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
[rank0]:[W1101 17:53:05.749313387 ProcessGroupNCCL.cpp:1479] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
(APIServer pid=955616) INFO: Shutting down
(APIServer pid=955616) INFO: Waiting for application shutdown.
(APIServer pid=955616) INFO: Application shutdown complete.
(APIServer pid=955616) INFO: Finished server process [955616]
4090显卡,48G显存。vllm参数:
python -m vllm.entrypoints.openai.api_server
--model /home/ubuntu/project/Step-Audio2/Step-Audio-2-mini
--served-model-name step-audio-2-mini
--host 0.0.0.0
--port 12002
--gpu-memory-utilization 0.4
--trust-remote-code
--max-model-len 16384
--max-num-seqs 32
--tensor-parallel-size 1
--enable-auto-tool-choice
--tool-call-parser step_audio_2
--tokenizer-mode step_audio_2
--chat_template_content_format string
--audio-parser step_audio_2_tts_ta4 \