@@ -117,6 +117,31 @@ def run_glmasr(question: str, audio_count: int) -> ModelRequestData:
117117 )
118118
119119
120+ # FunAudioChat
121+ def run_funaudiochat (question : str , audio_count : int ) -> ModelRequestData :
122+ # NOTE: FunAudioChat is not available on the HuggingFace Hub at the time of
123+ # writing. Pass a local model path via `--model`.
124+ model_name = "funaudiochat"
125+
126+ engine_args = EngineArgs (
127+ model = model_name ,
128+ max_model_len = 4096 ,
129+ max_num_seqs = 2 ,
130+ limit_mm_per_prompt = {"audio" : audio_count },
131+ enforce_eager = True ,
132+ )
133+
134+ audio_in_prompt = "" .join (
135+ ["<|audio_bos|><|AUDIO|><|audio_eos|>\n " for _ in range (audio_count )]
136+ )
137+ prompt = f"{ audio_in_prompt } { question } "
138+
139+ return ModelRequestData (
140+ engine_args = engine_args ,
141+ prompt = prompt ,
142+ )
143+
144+
120145# Granite Speech
121146def run_granite_speech (question : str , audio_count : int ) -> ModelRequestData :
122147 # NOTE - the setting in this example are somewhat different from what is
@@ -410,6 +435,7 @@ def run_whisper(question: str, audio_count: int) -> ModelRequestData:
410435 "audioflamingo3" : run_audioflamingo3 ,
411436 "gemma3n" : run_gemma3n ,
412437 "glmasr" : run_glmasr ,
438+ "funaudiochat" : run_funaudiochat ,
413439 "granite_speech" : run_granite_speech ,
414440 "midashenglm" : run_midashenglm ,
415441 "minicpmo" : run_minicpmo ,
@@ -435,6 +461,12 @@ def parse_args():
435461 choices = model_example_map .keys (),
436462 help = 'Huggingface "model_type".' ,
437463 )
464+ parser .add_argument (
465+ "--model" ,
466+ type = str ,
467+ default = None ,
468+ help = "Model ID or local path override. Required for funaudiochat." ,
469+ )
438470 parser .add_argument (
439471 "--num-prompts" , type = int , default = 1 , help = "Number of prompts to run."
440472 )
@@ -467,6 +499,9 @@ def main(args):
467499 if model not in model_example_map :
468500 raise ValueError (f"Model type { model } is not supported." )
469501
502+ if model == "funaudiochat" and not args .model :
503+ raise ValueError ("--model is required when --model-type=funaudiochat" )
504+
470505 if args .tensor_parallel_size is not None and args .tensor_parallel_size < 1 :
471506 raise ValueError (
472507 f"tensor_parallel_size must be a positive integer, "
@@ -477,6 +512,8 @@ def main(args):
477512 req_data = model_example_map [model ](
478513 question_per_audio_count [audio_count ], audio_count
479514 )
515+ if model == "funaudiochat" :
516+ req_data .engine_args .model = args .model
480517
481518 # Disable other modalities to save memory
482519 default_limits = {"image" : 0 , "video" : 0 , "audio" : 0 }
0 commit comments