Skip to content

Commit 43e749d

Browse files
committed
Fix handling of earnings22
Signed-off-by: Piotr Żelasko <[email protected]>
1 parent aacbb0c commit 43e749d

File tree

2 files changed

+3
-0
lines changed

2 files changed

+3
-0
lines changed

nemo_asr/run_eval_salm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@
2727

2828
class ToAudio(torch.utils.data.Dataset):
2929
def __getitem__(self, cuts):
30+
cuts = lhotse.CutSet([c.to_mono(mono_downmix=True) if isinstance(c, lhotse.MultiCut) else c for c in cuts])
3031
audios, audio_lens = cuts.load_audio(collate=True)
3132
return {"cuts": cuts, "audios": audios, "audio_lens": audio_lens}
3233

3334

3435
def setup_dloader(audio_files, batch_size, num_workers):
3536
cuts = lhotse.CutSet([lhotse.Recording.from_file(p).to_cut() for p in audio_files])
37+
cuts = cuts.resample(16000)
3638
return torch.utils.data.DataLoader(
3739
dataset=ToAudio(),
3840
sampler=lhotse.dataset.DynamicCutSampler(cuts, max_cuts=batch_size),

nemo_asr/run_salm.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/bin/bash
22

33
export PYTHONPATH="..":$PYTHONPATH
4+
export TOKENIZERS_PARALLELISM=false
45

56
MODEL_IDs=(
67
nvidia/canary-qwen-2.5b

0 commit comments

Comments
 (0)