File tree Expand file tree Collapse file tree 4 files changed +18
-102
lines changed
Expand file tree Collapse file tree 4 files changed +18
-102
lines changed Original file line number Diff line number Diff line change 22
33export PYTHONPATH=" .." :$PYTHONPATH
44
5- MODEL_IDs=(" nvidia/canary-1b" )
5+ MODEL_IDs=(" nvidia/canary-1b-flash " ) # options: "nvidia/canary-1b" "nvidia/canary-1b-flash"
66BATCH_SIZE=64
77DEVICE_ID=0
88
@@ -11,7 +11,6 @@ num_models=${#MODEL_IDs[@]}
1111for (( i= 0 ; i< ${num_models} ; i++ )) ;
1212do
1313 MODEL_ID=${MODEL_IDs[$i]}
14-
1514
1615 python run_eval.py \
1716 --model_id=${MODEL_ID} \
Load Diff This file was deleted.
Original file line number Diff line number Diff line change 11import argparse
22
3+ import io
34import os
45import torch
56import evaluate
@@ -50,15 +51,26 @@ def download_audio_files(batch):
5051 audio_paths = []
5152 durations = []
5253
53- # import ipdb; ipdb.set_trace()
54-
5554 for id , sample in zip (batch ["id" ], batch ["audio" ]):
5655 audio_path = os .path .join (CACHE_DIR , f"{ id } .wav" )
56+
57+ if "array" in sample :
58+ audio_array = np .float32 (sample ["array" ])
59+ sample_rate = 16000
60+
61+ elif "bytes" in sample : # added to be compatible with latest datasets library (3.x.x) that produces byte stream
62+ with io .BytesIO (sample ["bytes" ]) as audio_file :
63+ audio_array , sample_rate = soundfile .read (audio_file , dtype = "float32" )
64+
65+ else :
66+ raise ValueError ("Sample must have either 'array' or 'bytes' key" )
67+
5768 if not os .path .exists (audio_path ):
5869 os .makedirs (os .path .dirname (audio_path ), exist_ok = True )
59- soundfile .write (audio_path , np .float32 (sample ["array" ]), 16_000 )
70+ soundfile .write (audio_path , audio_array , sample_rate )
71+
6072 audio_paths .append (audio_path )
61- durations .append (len (sample [ "array" ] ) / 16_000 )
73+ durations .append (len (audio_array ) / sample_rate )
6274
6375
6476 batch ["references" ] = batch ["norm_text" ]
Original file line number Diff line number Diff line change 33soundfile
44librosa
55IPython # Workaround for https://github.com/NVIDIA/NeMo/pull/9890#discussion_r1701028427
6- cuda-python>=12.4 # Used for fast TDT and RNN-T inference
7- datasets <= 2.21.0
6+ cuda-python>=12.4 # Used for fast TDT and RNN-T inference
You can’t perform that action at this time.
0 commit comments