Skip to content

Commit 0cfd08d

Browse files
author
Nithin Rao Koluguri
committed
move to sorted datasets
Signed-off-by: Nithin Rao Koluguri <nithinraok>
1 parent 04b4923 commit 0cfd08d

File tree

3 files changed

+29
-29
lines changed

3 files changed

+29
-29
lines changed

nemo_asr/run_canary.sh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ do
1515

1616
python run_eval.py \
1717
--model_id=${MODEL_ID} \
18-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
18+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
1919
--dataset="ami" \
2020
--split="test" \
2121
--device=${DEVICE_ID} \
@@ -24,7 +24,7 @@ do
2424

2525
python run_eval.py \
2626
--model_id=${MODEL_ID} \
27-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
27+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
2828
--dataset="earnings22" \
2929
--split="test" \
3030
--device=${DEVICE_ID} \
@@ -33,7 +33,7 @@ do
3333

3434
python run_eval.py \
3535
--model_id=${MODEL_ID} \
36-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
36+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
3737
--dataset="gigaspeech" \
3838
--split="test" \
3939
--device=${DEVICE_ID} \
@@ -42,16 +42,16 @@ do
4242

4343
python run_eval.py \
4444
--model_id=${MODEL_ID} \
45-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
45+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
4646
--dataset="librispeech" \
4747
--split="test.clean" \
4848
--device=${DEVICE_ID} \
4949
--batch_size=${BATCH_SIZE} \
50-
--max_eval_samples=-1
50+
--max_eval_samples=-1
5151

5252
python run_eval.py \
5353
--model_id=${MODEL_ID} \
54-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
54+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
5555
--dataset="librispeech" \
5656
--split="test.other" \
5757
--device=${DEVICE_ID} \
@@ -60,7 +60,7 @@ do
6060

6161
python run_eval.py \
6262
--model_id=${MODEL_ID} \
63-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
63+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
6464
--dataset="spgispeech" \
6565
--split="test" \
6666
--device=${DEVICE_ID} \
@@ -69,7 +69,7 @@ do
6969

7070
python run_eval.py \
7171
--model_id=${MODEL_ID} \
72-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
72+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
7373
--dataset="tedlium" \
7474
--split="test" \
7575
--device=${DEVICE_ID} \
@@ -78,7 +78,7 @@ do
7878

7979
python run_eval.py \
8080
--model_id=${MODEL_ID} \
81-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
81+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
8282
--dataset="voxpopuli" \
8383
--split="test" \
8484
--device=${DEVICE_ID} \
@@ -87,7 +87,7 @@ do
8787

8888
python run_eval.py \
8989
--model_id=${MODEL_ID} \
90-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
90+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
9191
--dataset="common_voice" \
9292
--split="test" \
9393
--device=${DEVICE_ID} \

nemo_asr/run_fast_conformer_ctc.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ do
1616

1717
python run_eval.py \
1818
--model_id=${MODEL_ID} \
19-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
19+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
2020
--dataset="ami" \
2121
--split="test" \
2222
--device=${DEVICE_ID} \
@@ -25,7 +25,7 @@ do
2525

2626
python run_eval.py \
2727
--model_id=${MODEL_ID} \
28-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
28+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
2929
--dataset="earnings22" \
3030
--split="test" \
3131
--device=${DEVICE_ID} \
@@ -34,7 +34,7 @@ do
3434

3535
python run_eval.py \
3636
--model_id=${MODEL_ID} \
37-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
37+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
3838
--dataset="gigaspeech" \
3939
--split="test" \
4040
--device=${DEVICE_ID} \
@@ -43,7 +43,7 @@ do
4343

4444
python run_eval.py \
4545
--model_id=${MODEL_ID} \
46-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
46+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
4747
--dataset="librispeech" \
4848
--split="test.clean" \
4949
--device=${DEVICE_ID} \
@@ -52,7 +52,7 @@ do
5252

5353
python run_eval.py \
5454
--model_id=${MODEL_ID} \
55-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
55+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
5656
--dataset="librispeech" \
5757
--split="test.other" \
5858
--device=${DEVICE_ID} \
@@ -61,7 +61,7 @@ do
6161

6262
python run_eval.py \
6363
--model_id=${MODEL_ID} \
64-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
64+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
6565
--dataset="spgispeech" \
6666
--split="test" \
6767
--device=${DEVICE_ID} \
@@ -70,7 +70,7 @@ do
7070

7171
python run_eval.py \
7272
--model_id=${MODEL_ID} \
73-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
73+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
7474
--dataset="tedlium" \
7575
--split="test" \
7676
--device=${DEVICE_ID} \
@@ -79,7 +79,7 @@ do
7979

8080
python run_eval.py \
8181
--model_id=${MODEL_ID} \
82-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
82+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
8383
--dataset="voxpopuli" \
8484
--split="test" \
8585
--device=${DEVICE_ID} \
@@ -88,7 +88,7 @@ do
8888

8989
python run_eval.py \
9090
--model_id=${MODEL_ID} \
91-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
91+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
9292
--dataset="common_voice" \
9393
--split="test" \
9494
--device=${DEVICE_ID} \

nemo_asr/run_fast_conformer_rnnt.sh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ do
1616

1717
python run_eval.py \
1818
--model_id=${MODEL_ID} \
19-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
19+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
2020
--dataset="ami" \
2121
--split="test" \
2222
--device=${DEVICE_ID} \
@@ -25,7 +25,7 @@ do
2525

2626
python run_eval.py \
2727
--model_id=${MODEL_ID} \
28-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
28+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
2929
--dataset="earnings22" \
3030
--split="test" \
3131
--device=${DEVICE_ID} \
@@ -34,7 +34,7 @@ do
3434

3535
python run_eval.py \
3636
--model_id=${MODEL_ID} \
37-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
37+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
3838
--dataset="gigaspeech" \
3939
--split="test" \
4040
--device=${DEVICE_ID} \
@@ -43,16 +43,16 @@ do
4343

4444
python run_eval.py \
4545
--model_id=${MODEL_ID} \
46-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
46+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
4747
--dataset="librispeech" \
4848
--split="test.clean" \
4949
--device=${DEVICE_ID} \
5050
--batch_size=${BATCH_SIZE} \
51-
--max_eval_samples=-1
51+
--max_eval_samples=-1
5252

5353
python run_eval.py \
5454
--model_id=${MODEL_ID} \
55-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
55+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
5656
--dataset="librispeech" \
5757
--split="test.other" \
5858
--device=${DEVICE_ID} \
@@ -61,7 +61,7 @@ do
6161

6262
python run_eval.py \
6363
--model_id=${MODEL_ID} \
64-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
64+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
6565
--dataset="spgispeech" \
6666
--split="test" \
6767
--device=${DEVICE_ID} \
@@ -70,7 +70,7 @@ do
7070

7171
python run_eval.py \
7272
--model_id=${MODEL_ID} \
73-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
73+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
7474
--dataset="tedlium" \
7575
--split="test" \
7676
--device=${DEVICE_ID} \
@@ -79,7 +79,7 @@ do
7979

8080
python run_eval.py \
8181
--model_id=${MODEL_ID} \
82-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
82+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
8383
--dataset="voxpopuli" \
8484
--split="test" \
8585
--device=${DEVICE_ID} \
@@ -88,7 +88,7 @@ do
8888

8989
python run_eval.py \
9090
--model_id=${MODEL_ID} \
91-
--dataset_path="open-asr-leaderboard/datasets-test-only" \
91+
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
9292
--dataset="common_voice" \
9393
--split="test" \
9494
--device=${DEVICE_ID} \

0 commit comments

Comments
 (0)