Skip to content

Commit a0b85c8

Browse files
authored
Merge pull request #22 from huggingface/canary_run_script
canary_run_script
2 parents cc912b4 + 21800f9 commit a0b85c8

File tree

1 file changed

+103
-0
lines changed

1 file changed

+103
-0
lines changed

nemo_asr/run_canary.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/bin/bash
2+
3+
export PYTHONPATH="..":$PYTHONPATH
4+
5+
MODEL_IDs=("nvidia/canary-1b")
6+
BATCH_SIZE=64
7+
DEVICE_ID=0
8+
9+
num_models=${#MODEL_IDs[@]}
10+
11+
for (( i=0; i<${num_models}; i++ ));
12+
do
13+
MODEL_ID=${MODEL_IDs[$i]}
14+
15+
16+
python run_eval.py \
17+
--model_id=${MODEL_ID} \
18+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
19+
--dataset="ami" \
20+
--split="test" \
21+
--device=${DEVICE_ID} \
22+
--batch_size=${BATCH_SIZE} \
23+
--max_eval_samples=-1
24+
25+
python run_eval.py \
26+
--model_id=${MODEL_ID} \
27+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
28+
--dataset="earnings22" \
29+
--split="test" \
30+
--device=${DEVICE_ID} \
31+
--batch_size=${BATCH_SIZE} \
32+
--max_eval_samples=-1
33+
34+
python run_eval.py \
35+
--model_id=${MODEL_ID} \
36+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
37+
--dataset="gigaspeech" \
38+
--split="test" \
39+
--device=${DEVICE_ID} \
40+
--batch_size=${BATCH_SIZE} \
41+
--max_eval_samples=-1
42+
43+
python run_eval.py \
44+
--model_id=${MODEL_ID} \
45+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
46+
--dataset="librispeech" \
47+
--split="test.clean" \
48+
--device=${DEVICE_ID} \
49+
--batch_size=${BATCH_SIZE} \
50+
--max_eval_samples=-1
51+
52+
python run_eval.py \
53+
--model_id=${MODEL_ID} \
54+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
55+
--dataset="librispeech" \
56+
--split="test.other" \
57+
--device=${DEVICE_ID} \
58+
--batch_size=${BATCH_SIZE} \
59+
--max_eval_samples=-1
60+
61+
python run_eval.py \
62+
--model_id=${MODEL_ID} \
63+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
64+
--dataset="spgispeech" \
65+
--split="test" \
66+
--device=${DEVICE_ID} \
67+
--batch_size=${BATCH_SIZE} \
68+
--max_eval_samples=-1
69+
70+
python run_eval.py \
71+
--model_id=${MODEL_ID} \
72+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
73+
--dataset="tedlium" \
74+
--split="test" \
75+
--device=${DEVICE_ID} \
76+
--batch_size=${BATCH_SIZE} \
77+
--max_eval_samples=-1
78+
79+
python run_eval.py \
80+
--model_id=${MODEL_ID} \
81+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
82+
--dataset="voxpopuli" \
83+
--split="test" \
84+
--device=${DEVICE_ID} \
85+
--batch_size=${BATCH_SIZE} \
86+
--max_eval_samples=-1
87+
88+
python run_eval.py \
89+
--model_id=${MODEL_ID} \
90+
--dataset_path="open-asr-leaderboard/datasets-test-only" \
91+
--dataset="common_voice" \
92+
--split="test" \
93+
--device=${DEVICE_ID} \
94+
--batch_size=${BATCH_SIZE} \
95+
--max_eval_samples=-1
96+
97+
# Evaluate results
98+
RUNDIR=`pwd` && \
99+
cd ../normalizer && \
100+
python -c "import eval_utils; eval_utils.score_results('${RUNDIR}/results', '${MODEL_ID}')" && \
101+
cd $RUNDIR
102+
103+
done

0 commit comments

Comments
 (0)