added fix for lhotse unique cut IDs, WER regression with batch_size increase

KunalDhawan · KunalDhawan · commit 8c776b8e1e68 · 2025-03-16T17:56:20.000-07:00
diff --git a/nemo_asr/run_eval.py b/nemo_asr/run_eval.py
@@ -52,6 +52,14 @@ def download_audio_files(batch):
         durations = []
 
         for id, sample in zip(batch["id"], batch["audio"]):
+
+            # frist step added here to make ID and wav filenames unique
+            # several datasets like earnings22 have a hierarchical structure
+            # for eg. earnings22/test/4432298/281.wav, earnings22/test/4450488/281.wav
+            # lhotse uses the filename (281.wav) here as unique ID to create and name cuts
+            # ref: https://github.com/lhotse-speech/lhotse/blob/master/lhotse/dataset/collation.py#L186
+            id = id.replace('/', '_').removesuffix('.wav')
+
             audio_path = os.path.join(CACHE_DIR, f"{id}.wav")
 
             if "array" in sample: