Skip to content

Commit a77e996

Browse files
pcsidnhhoang96
andauthored
added phonetics, speech_disorder, and speech_enhancement tasks - stil… (#22)
* added phonetics, speech_disorder, and speech_enhancement tasks - still in need of full model scoring. Fixed small inconsistency bug in config by changing judge_properties to judge_settings. * Update the correct HF path for noise_detection task * updated scores --------- Co-authored-by: hoang <huuhoang.nguyen@servicenow.com>
1 parent e4d2203 commit a77e996

File tree

5 files changed

+70
-11
lines changed

5 files changed

+70
-11
lines changed

evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def main(cfg_path='config.yaml'):
4444
raise
4545

4646
# 4. Load models and initialize central request controller
47-
central_request_controller, model_configs = register_models_with_controller(run_config.get("models", []), run_config.get("judge_properties", {}))
47+
central_request_controller, model_configs = register_models_with_controller(run_config.get("models", []), run_config.get("judge_settings", {}))
4848

4949
# 5. Expand task-metric pairs
5050
task_payload = expand_task_metric_pairs(run_config, task_configs, task_ancestry)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
task_name: voxangeles_phoneme_counting
2+
dataset_path: DynamicSuperb/PhoneSegmentCounting_VoxAngeles
3+
modality: audio
4+
language: en
5+
split: test
6+
preprocessor: GeneralPreprocessor
7+
postprocessor: GeneralPostprocessor
8+
audio_column: audio
9+
target_column: label
10+
instruction_column: instruction
11+
user_prompt: "You are an expert at counting phones in the context of phonemes, and always attempt to answer. You will be given an audio sample, listen carefully."
12+
long_audio_processing_logic: truncate
13+
14+
generation_kwargs:
15+
temperature: 0.0001
16+
max_completion_tokens: 64
17+
18+
metrics:
19+
- metric: llm_judge_binary
20+
- metric: detailed_judge_prompt
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
task_name: stuttering_detection
2+
dataset_path: DynamicSuperb/StutteringDetection_SEP28k
3+
modality: audio
4+
language: en
5+
split: test
6+
preprocessor: GeneralPreprocessor
7+
postprocessor: GeneralPostprocessor
8+
audio_column: audio
9+
target_column: label
10+
instruction_column: instruction
11+
user_prompt: "You are an expert at stuttering detection. Look for clear, obvious stuttering. Stuttering is obviously repeating a word or part of a word. The audio clip will ALWAYS be given after the following instructions, always attempt to answer."
12+
long_audio_processing_logic: truncate
13+
14+
generation_kwargs:
15+
temperature: 0.0001
16+
max_completion_tokens: 64
17+
18+
metrics:
19+
- metric: llm_judge_binary
20+
- metric: detailed_judge_prompt
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
task_name: noise_detection
2+
dataset_path: DynamicSuperb/NoiseDetection_LJSpeech_MUSAN-Gaussian
3+
modality: audio
4+
language: en
5+
split: test
6+
preprocessor: GeneralPreprocessor
7+
postprocessor: GeneralPostprocessor
8+
audio_column: audio
9+
target_column: label
10+
instruction_column: instruction
11+
long_audio_processing_logic: truncate
12+
13+
generation_kwargs:
14+
temperature: 0.0001
15+
max_completion_tokens: 64
16+
17+
metrics:
18+
- metric: llm_judge_binary
19+
- metric: detailed_judge_prompt

utils/util.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,12 @@ def validate_config(config: dict, task_configs: dict[Path, list[dict]]) -> Dict:
8585
raise ValueError("'filters' must be a dictionary")
8686
_validate_filter_values(config['filters'])
8787

88-
# Validate judge_properties as a dictionary
89-
logger.info("---------Validating judge properties---------")
90-
if 'judge_properties' in config:
91-
if not isinstance(config['judge_properties'], dict):
92-
raise ValueError("'judge_properties' must be a dictionary")
93-
_validate_judge_properties(config['judge_properties'])
88+
# Validate judge_settings as a dictionary
89+
logger.info("---------Validating judge settings---------")
90+
if 'judge_settings' in config:
91+
if not isinstance(config['judge_settings'], dict):
92+
raise ValueError("'judge_settings' must be a dictionary")
93+
_validate_judge_settings(config['judge_settings'])
9494

9595
# Delegate validation for complex sections
9696
logger.info("---------Validating models---------")
@@ -175,11 +175,11 @@ def _validate_filter_values(filters: Dict) -> None:
175175
raise ValueError("'language' must be a string")
176176

177177

178-
def _validate_judge_properties(judge_props: Dict) -> None:
179-
"""Validate the values in the judge_properties dictionary.
180-
178+
def _validate_judge_settings(judge_props: Dict) -> None:
179+
"""Validate the values in the judge_settings dictionary.
180+
181181
Args:
182-
judge_props: Dictionary of judge properties to validate
182+
judge_props: Dictionary of judge settings to validate
183183
184184
Raises:
185185
ValueError: If any judge property is invalid

0 commit comments

Comments
 (0)