From 770044ffad08b9b63b63e2a05a03aaa55788b1b8 Mon Sep 17 00:00:00 2001
From: Louis Jordan <louisjoecodes@gmail.com>
Date: Tue, 28 Oct 2025 15:19:32 +0000
Subject: [PATCH] Fix ElevenLabs language code extraction for multilingual ASR
 benchmarks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, language_code was hardcoded to "eng" for all ElevenLabs
transcriptions, causing poor WER on multilingual benchmarks (e.g.,
French: 26.34% WER, Portuguese: 35.98% WER).

This fix:
- Extracts language code from dataset name (e.g., "fleurs_fr" → "fr")
- Dynamically sets language_code parameter based on dataset
- Defaults to "en" for English-only datasets (ami, librispeech, etc.)

Test results:
- French: 26.34% → 3.99% WER (85% improvement)
- Portuguese: 35.98% → 4.55% WER (87% improvement)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 api/run_eval.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/api/run_eval.py b/api/run_eval.py
index 92ded64..411d916 100644
--- a/api/run_eval.py
+++ b/api/run_eval.py
@@ -65,10 +65,21 @@ def fetch_audio_urls(dataset_path, dataset, split, batch_size=100, max_retries=2
                     raise Exception("Max retries exceeded while fetching data.")
 
 
+def extract_language_code(dataset_name: str) -> str:
+    """
+    Extract language code from dataset name for multilingual evaluation.
+    Examples: 'fleurs_fr' -> 'fr', 'mls_es' -> 'es', 'ami' -> 'en'
+    """
+    if "_" in dataset_name:
+        return dataset_name.split("_")[-1]
+    return "en"
+
+
 def transcribe_with_retry(
     model_name: str,
     audio_file_path: Optional[str],
     sample: dict,
+    dataset: str,
     max_retries=10,
     use_url=False,
 ):
@@ -204,7 +215,7 @@ def transcribe_with_retry(
                     transcription = client.speech_to_text.convert(
                         file=audio_data,
                         model_id=model_name.split("/")[1],
-                        language_code="eng",
+                        language_code=extract_language_code(dataset),
                         tag_audio_events=True,
                     )
                 else:
@@ -212,7 +223,7 @@ def transcribe_with_retry(
                         transcription = client.speech_to_text.convert(
                             file=audio_file,
                             model_id=model_name.split("/")[1],
-                            language_code="eng",
+                            language_code=extract_language_code(dataset),
                             tag_audio_events=True,
                         )
                 return transcription.text
@@ -330,7 +341,7 @@ def process_sample(sample):
             start = time.time()
             try:
                 transcription = transcribe_with_retry(
-                    model_name, None, sample, use_url=True
+                    model_name, None, sample, dataset, use_url=True
                 )
             except Exception as e:
                 print(f"Failed to transcribe after retries: {e}")
@@ -353,7 +364,7 @@ def process_sample(sample):
             start = time.time()
             try:
                 transcription = transcribe_with_retry(
-                    model_name, tmp_path, sample, use_url=False
+                    model_name, tmp_path, sample, dataset, use_url=False
                 )
             except Exception as e:
                 print(f"Failed to transcribe after retries: {e}")