@@ -25,64 +25,75 @@ filter:
2525 num_samples : 100 # number of samples to run(remove for all)
2626 length_filter : [0.0, 30.0] # optional - filters for only audio samples in this length(seconds) - only supported for general and callhome preprocessors
2727
28- judge_properties :
28+ judge_settings :
2929 judge_concurrency : 8 # judge call(optional)
30- judge_model : " gpt-4o-mini" # optional
31- judge_type : " openai" # mandatory (vllm or openai)
32- judge_api_version : " ${API_VERSION}" # optional(needed for openai)
33- judge_api_endpoint : " ${ENDPOINT_URL}" # mandatory
34- judge_api_key : " ${AUTH_TOKEN}" # mandatory
30+ judge_model : gpt-4o-mini # optional
31+ judge_type : openai # mandatory (vllm or openai)
32+ judge_api_version : ${API_VERSION} # optional(needed for openai)
33+ judge_api_endpoint : ${ENDPOINT_URL} # mandatory
34+ judge_api_key : ${AUTH_TOKEN} # mandatory
3535 judge_temperature : 0.1 # optional
36- judge_prompt_model_override : " gpt-4o-mini-enhanced" # optional
36+ judge_prompt_model_override : gpt-4o-mini-enhanced # optional
3737
3838logging :
3939 log_file : " audiobench.log" # Path to the main log file
4040
41-
4241models :
43- - name : " gpt-4o-mini-audio-preview-1" # mandatory - must be unique
44- inference_type : " openai" # mandatory - you can use vllm(vllm) , openai(openai), (chat completion) or audio transcription endpoint(transcription)
45- url : " ${ENDPOINT_URL}" # mandatory - endpoint url
42+ - name : gpt-4o-mini-audio-preview-1 # must be unique
43+ inference_type : openai # you can use vllm, openai, gemini or transcription
44+ url : ${ENDPOINT_URL} # endpoint url
4645 delay : 100
4746 retry_attempts : 8
4847 timeout : 30
49- model : " gpt-4o-mini-audio-preview" # mandatory - only needed for vllm
50- auth_token : " ${AUTH_TOKEN}"
51- api_version : " ${API_VERSION}"
48+ model : gpt-4o-mini-audio-preview
49+ auth_token : ${AUTH_TOKEN}
50+ api_version : ${API_VERSION}
5251 batch_size : 300 # Optional - batch eval size
5352 chunk_size : 30 # Optional - max audio length in seconds fed to model
5453
55- - name : " gpt-4o-mini-audio-preview-2" # mandatory - must be unique
56- inference_type : " openai" # mandatory - you can use vllm(vllm) , openai(openai), (chat completion) or audio transcription endpoint(transcription)
57- url : " ${ENDPOINT_URL}" # mandatory - endpoint url
54+ - name : gpt-4o-mini-audio-preview-2 # must be unique
55+ inference_type : openai # you can use vllm, openai, gemini or transcription
56+ url : ${ENDPOINT_URL} # endpoint url
5857 delay : 100
5958 retry_attempts : 8
6059 timeout : 30
61- model : " gpt-4o-mini-audio-preview" # mandatory - only needed for vllm
62- auth_token : " ${AUTH_TOKEN}"
63- api_version : " ${API_VERSION}"
64- batch_size : 100 # Optional - batch eval size
60+ model : gpt-4o-mini-audio-preview
61+ auth_token : ${AUTH_TOKEN}
62+ api_version : ${API_VERSION}
63+ batch_size : 300 # Optional - batch eval size
6564 chunk_size : 30 # Optional - max audio length in seconds fed to model
6665
67- - name : " qwen-2.5-omni"
68- inference_type : " vllm" # mandatory - you can use vllm(vllm), openai(openai), (chat completion) or audio transcription endpoint(transcription)
69- url : " ${ENDPOINT_URL}" # mandatory - endpoint url
66+ - name : gemini-2.5-flash # must be unique
67+ inference_type : gemini # you can use vllm, openai, gemini or transcription
68+ location : ${GOOGLE_CLOUD_LOCATION} # GCP Vertex AI configureation
69+ project_id : ${GOOGLE_CLOUD_PROJECT} # GCP Vertex AI configureation
70+ reasoning_effort : medium # Optional - Reasoning effort for supported reasoning models like gemini-2.5-flash, gpt-5,...
71+ delay : 100
72+ retry_attempts : 5
73+ timeout : 300
74+ model : google/gemini-2.5-flash
75+ batch_size : 100 # Optional - batch eval size
76+ chunk_size : 30240 # Optional - max audio length in seconds fed to model
77+
78+ - name : qwen-2.5-omni # must be unique
79+ inference_type : vllm # you can use vllm, openai, gemini or transcription
80+ url : ${ENDPOINT_URL} # endpoint url
7081 delay : 100
7182 retry_attempts : 8
7283 timeout : 30
73- model : " qwen-2.5-omni" # mandatory - only needed for vllm
74- auth_token : " ${AUTH_TOKEN}"
84+ model : qwen-2.5-omni
85+ auth_token : ${AUTH_TOKEN}
7586 batch_size : 200 # Optional - batch eval size
7687 chunk_size : 40 # Optional - max audio length in seconds fed to model
7788
78- - name : " whisper-large-3"
79- inference_type : " vllm " # mandatory - you can use vllm(vllm) , openai(openai), (chat completion) or audio transcription endpoint(transcription)
80- url : " ${ENDPOINT_URL}" # mandatory - endpoint url
89+ - name : whisper-large-3 # must be unique
90+ inference_type : transcription # you can use vllm, openai, gemini or transcription
91+ url : ${ENDPOINT_URL} # endpoint url
8192 delay : 100
8293 retry_attempts : 8
8394 timeout : 30
84- model : " whisper-large-3" # mandatory - only needed for vllm
85- auth_token : " ${AUTH_TOKEN}"
95+ model : whisper-large-3
96+ auth_token : ${AUTH_TOKEN}
8697 batch_size : 100 # Optional - batch eval size
8798 chunk_size : 30 # Optional - max audio length in seconds fed to model
8899
0 commit comments