2
2
# SPDX-License-Identifier: MIT
3
3
4
4
import argparse
5
+ import json
6
+ from pathlib import Path
5
7
6
8
import riva .client
7
9
from riva .client .argparse_utils import add_asr_config_argparse_parameters , add_connection_argparse_parameters
@@ -15,7 +17,12 @@ def parse_args() -> argparse.Namespace:
15
17
"`--play-audio` or `--output-device`." ,
16
18
formatter_class = argparse .ArgumentDefaultsHelpFormatter ,
17
19
)
18
- parser .add_argument ("--input-file" , help = "A path to a local file to stream." )
20
+ parser .add_argument (
21
+ "--input-file" ,
22
+ required = True ,
23
+ type = Path ,
24
+ help = "A path to a local file to stream or a JSONL file containing list of files. JSONL file should contain JSON entry on each line, for example: {'audio_filepath': 'audio.wav'} " ,
25
+ )
19
26
parser .add_argument ("--list-devices" , action = "store_true" , help = "List output devices indices" )
20
27
parser .add_argument (
21
28
"--interim-results" , default = False , action = 'store_true' , help = "Print intermediate transcripts" ,
@@ -63,6 +70,17 @@ def main() -> None:
63
70
if args .list_devices :
64
71
riva .client .audio_io .list_output_devices ()
65
72
return
73
+ input_files = []
74
+ if args .input_file .suffix == ".json" :
75
+ with open (args .input_file ) as f :
76
+ lines = f .read ().splitlines ()
77
+ for line in lines :
78
+ data = json .loads (line )
79
+ if "audio_filepath" in data :
80
+ input_files .append (data ["audio_filepath" ])
81
+ else :
82
+ input_files = [args .input_file ]
83
+
66
84
auth = riva .client .Auth (args .ssl_cert , args .use_ssl , args .server , args .metadata )
67
85
asr_service = riva .client .ASRService (auth )
68
86
config = riva .client .StreamingRecognitionConfig (
@@ -71,37 +89,40 @@ def main() -> None:
71
89
max_alternatives = args .max_alternatives ,
72
90
profanity_filter = args .profanity_filter ,
73
91
enable_automatic_punctuation = args .automatic_punctuation ,
74
- verbatim_transcripts = not args .no_verbatim_transcripts ,
92
+ verbatim_transcripts = args .verbatim_transcripts ,
75
93
enable_word_time_offsets = args .word_time_offsets ,
76
94
model = args .model_name ,
77
95
),
78
96
interim_results = args .interim_results ,
79
97
)
80
- riva .client .add_word_boosting_to_config (config , args .boosted_lm_words , args .boosted_lm_score )
98
+ riva .client .add_word_boosting_to_config (config , args .boosted_words_file , args .boosted_words_score )
81
99
sound_callback = None
82
- try :
83
- if args .play_audio or args .output_device is not None :
84
- wp = riva .client .get_wav_file_parameters (args .input_file )
85
- sound_callback = riva .client .audio_io .SoundCallBack (
86
- args .output_device , wp ['sampwidth' ], wp ['nchannels' ], wp ['framerate' ],
87
- )
88
- delay_callback = sound_callback
89
- else :
90
- delay_callback = riva .client .sleep_audio_length if args .simulate_realtime else None
91
- with riva .client .AudioChunkFileIterator (
92
- args .input_file , args .chunk_duration_ms , delay_callback ,
93
- ) as audio_chunk_iterator :
94
- riva .client .print_streaming (
95
- responses = asr_service .streaming_response_generator (
96
- audio_chunks = audio_chunk_iterator , streaming_config = config ,
97
- ),
98
- input_file = args .input_file ,
99
- show_intermediate = args .interim_results ,
100
- additional_info = "confidence" if args .print_confidence else "no" ,
101
- )
102
- finally :
103
- if sound_callback is not None and sound_callback .opened :
104
- sound_callback .close ()
100
+
101
+ for file in input_files :
102
+ try :
103
+ if args .play_audio or args .output_device is not None :
104
+ wp = riva .client .get_wav_file_parameters (file )
105
+ sound_callback = riva .client .audio_io .SoundCallBack (
106
+ args .output_device , wp ['sampwidth' ], wp ['nchannels' ], wp ['framerate' ],
107
+ )
108
+ delay_callback = sound_callback
109
+ else :
110
+ delay_callback = riva .client .sleep_audio_length if args .simulate_realtime else None
111
+
112
+ with riva .client .AudioChunkFileIterator (
113
+ file , args .chunk_duration_ms , delay_callback ,
114
+ ) as audio_chunk_iterator :
115
+ riva .client .print_streaming (
116
+ responses = asr_service .streaming_response_generator (
117
+ audio_chunks = audio_chunk_iterator , streaming_config = config ,
118
+ ),
119
+ input_file = file ,
120
+ show_intermediate = args .interim_results ,
121
+ additional_info = "confidence" if args .print_confidence else "no" ,
122
+ )
123
+ finally :
124
+ if sound_callback is not None and sound_callback .opened :
125
+ sound_callback .close ()
105
126
106
127
107
128
if __name__ == "__main__" :
0 commit comments