Skip to content

Commit c41131c

Browse files
committed
added scripts for canary-1b-flash eval
Signed-off-by: Kunal Dhawan <[email protected]>
1 parent 0dc4559 commit c41131c

File tree

3 files changed

+99
-2
lines changed

3 files changed

+99
-2
lines changed

nemo_asr/run_canary_flash.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/bash
2+
3+
export PYTHONPATH="..":$PYTHONPATH
4+
5+
MODEL_IDs=("nvidia/canary-1b-flash")
6+
BATCH_SIZE=64
7+
DEVICE_ID=0
8+
9+
num_models=${#MODEL_IDs[@]}
10+
11+
for (( i=0; i<${num_models}; i++ ));
12+
do
13+
MODEL_ID=${MODEL_IDs[$i]}
14+
15+
16+
# python run_eval.py \
17+
# --model_id=${MODEL_ID} \
18+
# --dataset_path="hf-audio/esb-datasets-test-only-sorted" \
19+
# --dataset="ami" \
20+
# --split="test" \
21+
# --device=${DEVICE_ID} \
22+
# --batch_size=${BATCH_SIZE} \
23+
# --max_eval_samples=-1
24+
25+
# python run_eval.py \
26+
# --model_id=${MODEL_ID} \
27+
# --dataset_path="hf-audio/esb-datasets-test-only-sorted" \
28+
# --dataset="earnings22" \
29+
# --split="test" \
30+
# --device=${DEVICE_ID} \
31+
# --batch_size=${BATCH_SIZE} \
32+
# --max_eval_samples=-1
33+
34+
# python run_eval.py \
35+
# --model_id=${MODEL_ID} \
36+
# --dataset_path="hf-audio/esb-datasets-test-only-sorted" \
37+
# --dataset="gigaspeech" \
38+
# --split="test" \
39+
# --device=${DEVICE_ID} \
40+
# --batch_size=${BATCH_SIZE} \
41+
# --max_eval_samples=-1
42+
43+
python run_eval.py \
44+
--model_id=${MODEL_ID} \
45+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
46+
--dataset="librispeech" \
47+
--split="test.clean" \
48+
--device=${DEVICE_ID} \
49+
--batch_size=${BATCH_SIZE} \
50+
--max_eval_samples=-1
51+
52+
python run_eval.py \
53+
--model_id=${MODEL_ID} \
54+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
55+
--dataset="librispeech" \
56+
--split="test.other" \
57+
--device=${DEVICE_ID} \
58+
--batch_size=${BATCH_SIZE} \
59+
--max_eval_samples=-1
60+
61+
# python run_eval.py \
62+
# --model_id=${MODEL_ID} \
63+
# --dataset_path="hf-audio/esb-datasets-test-only-sorted" \
64+
# --dataset="spgispeech" \
65+
# --split="test" \
66+
# --device=${DEVICE_ID} \
67+
# --batch_size=${BATCH_SIZE} \
68+
# --max_eval_samples=-1
69+
70+
# python run_eval.py \
71+
# --model_id=${MODEL_ID} \
72+
# --dataset_path="hf-audio/esb-datasets-test-only-sorted" \
73+
# --dataset="tedlium" \
74+
# --split="test" \
75+
# --device=${DEVICE_ID} \
76+
# --batch_size=${BATCH_SIZE} \
77+
# --max_eval_samples=-1
78+
79+
# python run_eval.py \
80+
# --model_id=${MODEL_ID} \
81+
# --dataset_path="hf-audio/esb-datasets-test-only-sorted" \
82+
# --dataset="voxpopuli" \
83+
# --split="test" \
84+
# --device=${DEVICE_ID} \
85+
# --batch_size=${BATCH_SIZE} \
86+
# --max_eval_samples=-1
87+
88+
# Evaluate results
89+
RUNDIR=`pwd` && \
90+
cd ../normalizer && \
91+
python -c "import eval_utils; eval_utils.score_results('${RUNDIR}/results', '${MODEL_ID}')" && \
92+
cd $RUNDIR
93+
94+
done

nemo_asr/run_eval.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def download_audio_files(batch):
5050
audio_paths = []
5151
durations = []
5252

53+
# import ipdb; ipdb.set_trace()
54+
5355
for id, sample in zip(batch["id"], batch["audio"]):
5456
audio_path = os.path.join(CACHE_DIR, f"{id}.wav")
5557
if not os.path.exists(audio_path):
@@ -118,7 +120,7 @@ def download_audio_files(batch):
118120
# normalize transcriptions with English normalizer
119121
if isinstance(transcriptions, tuple) and len(transcriptions) == 2:
120122
transcriptions = transcriptions[0]
121-
predictions = [data_utils.normalizer(pred) for pred in transcriptions]
123+
predictions = [data_utils.normalizer(pred.text) for pred in transcriptions]
122124

123125
avg_time = total_time / len(all_data["audio_filepaths"])
124126

requirements/requirements_nemo.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
git+https://github.com/NVIDIA/NeMo.git@d0efff087613ea2584e215969f289fed17414d8b#egg=nemo_toolkit[all] # This commit hash is a recent version of main at the time of testing.
1+
git+https://github.com/NVIDIA/NeMo.git@cb755f5595880a56159cc9a6f4a050c20a449d0a#egg=nemo_toolkit[all] # This commit hash is a recent version of main at the time of testing.
22
tqdm
33
soundfile
44
librosa
55
IPython # Workaround for https://github.com/NVIDIA/NeMo/pull/9890#discussion_r1701028427
66
cuda-python>=12.4 # Used for fast TDT and RNN-T inference
7+
datasets <= 2.21.0

0 commit comments

Comments
 (0)