@@ -30,7 +30,7 @@ def get_wav_file_parameters(input_file: Union[str, os.PathLike]) -> Dict[str, Un
30
30
'duration' : nframes / rate ,
31
31
'nchannels' : wf .getnchannels (),
32
32
'sampwidth' : wf .getsampwidth (),
33
- 'data_offset' : wf .getfp ().size_read + wf .getfp ().offset
33
+ 'data_offset' : wf .getfp ().size_read + wf .getfp ().offset ,
34
34
}
35
35
except :
36
36
# Not a WAV file
@@ -46,11 +46,11 @@ class AudioChunkFileIterator:
46
46
def __init__ (
47
47
self ,
48
48
input_file : Union [str , os .PathLike ],
49
- chunk_n_frames : int ,
49
+ chunk_duration_ms : int ,
50
50
delay_callback : Optional [Callable [[bytes , float ], None ]] = None ,
51
51
) -> None :
52
52
self .input_file : Path = Path (input_file ).expanduser ()
53
- self .chunk_n_frames = chunk_n_frames
53
+ self .chunk_duration_ms = chunk_duration_ms
54
54
self .delay_callback = delay_callback
55
55
self .file_parameters = get_wav_file_parameters (self .input_file )
56
56
self .file_object : Optional [typing .BinaryIO ] = open (str (self .input_file ), 'rb' )
@@ -75,16 +75,21 @@ def __iter__(self):
75
75
76
76
def __next__ (self ) -> bytes :
77
77
if self .file_parameters :
78
- data = self .file_object .read (self .chunk_n_frames * self .file_parameters ['sampwidth' ] * self .file_parameters ['nchannels' ])
78
+ num_frames = int (self .chunk_duration_ms * self .file_parameters ['framerate' ] / 1000 )
79
+ data = self .file_object .read (
80
+ num_frames * self .file_parameters ['sampwidth' ] * self .file_parameters ['nchannels' ]
81
+ )
79
82
else :
80
- data = self .file_object .read (self .chunk_n_frames )
83
+ # Fixed chunk size when file_parameters is not available
84
+ data = self .file_object .read (8192 )
81
85
if not data :
82
86
self .close ()
83
87
raise StopIteration
84
88
if self .delay_callback is not None :
85
89
offset = self .file_parameters ['data_offset' ] if self .first_buffer else 0
86
90
self .delay_callback (
87
- data [offset :], (len (data ) - offset ) / self .file_parameters ['sampwidth' ] / self .file_parameters ['framerate' ]
91
+ data [offset :],
92
+ (len (data ) - offset ) / self .file_parameters ['sampwidth' ] / self .file_parameters ['framerate' ],
88
93
)
89
94
self .first_buffer = False
90
95
return data
@@ -104,8 +109,7 @@ def add_word_boosting_to_config(
104
109
105
110
106
111
def add_audio_file_specs_to_config (
107
- config : Union [rasr .StreamingRecognitionConfig , rasr .RecognitionConfig ],
108
- audio_file : Union [str , os .PathLike ],
112
+ config : Union [rasr .StreamingRecognitionConfig , rasr .RecognitionConfig ], audio_file : Union [str , os .PathLike ],
109
113
) -> None :
110
114
inner_config : rasr .RecognitionConfig = config if isinstance (config , rasr .RecognitionConfig ) else config .config
111
115
wav_parameters = get_wav_file_parameters (audio_file )
@@ -114,10 +118,7 @@ def add_audio_file_specs_to_config(
114
118
inner_config .audio_channel_count = wav_parameters ['nchannels' ]
115
119
116
120
117
- def add_speaker_diarization_to_config (
118
- config : Union [rasr .RecognitionConfig ],
119
- diarization_enable : bool ,
120
- ) -> None :
121
+ def add_speaker_diarization_to_config (config : Union [rasr .RecognitionConfig ], diarization_enable : bool ,) -> None :
121
122
inner_config : rasr .RecognitionConfig = config if isinstance (config , rasr .RecognitionConfig ) else config .config
122
123
if diarization_enable :
123
124
diarization_config = rasr .SpeakerDiarizationConfig (enable_speaker_diarization = True )
@@ -129,6 +130,7 @@ def add_speaker_diarization_to_config(
129
130
130
131
def print_streaming (
131
132
responses : Iterable [rasr .StreamingRecognizeResponse ],
133
+ input_file : str = None ,
132
134
output_file : Optional [Union [Union [os .PathLike , str , TextIO ], List [Union [os .PathLike , str , TextIO ]]]] = None ,
133
135
additional_info : str = 'no' ,
134
136
word_time_offsets : bool = False ,
@@ -194,6 +196,10 @@ def print_streaming(
194
196
output_file [i ] = Path (elem ).expanduser ().open (file_mode )
195
197
start_time = time .time () # used in 'time` additional_info
196
198
num_chars_printed = 0 # used in 'no' additional_info
199
+ final_transcript = "" # for printing best final transcript
200
+ if input_file :
201
+ for f in output_file :
202
+ f .write (f"File: { input_file } \n " )
197
203
for response in responses :
198
204
if not response .results :
199
205
continue
@@ -204,6 +210,7 @@ def print_streaming(
204
210
transcript = result .alternatives [0 ].transcript
205
211
if additional_info == 'no' :
206
212
if result .is_final :
213
+ final_transcript += transcript
207
214
if show_intermediate :
208
215
overwrite_chars = ' ' * (num_chars_printed - len (transcript ))
209
216
for i , f in enumerate (output_file ):
@@ -221,6 +228,7 @@ def print_streaming(
221
228
partial_transcript += transcript
222
229
elif additional_info == 'time' :
223
230
if result .is_final :
231
+ final_transcript += transcript
224
232
for i , alternative in enumerate (result .alternatives ):
225
233
for f in output_file :
226
234
f .write (
@@ -239,6 +247,7 @@ def print_streaming(
239
247
partial_transcript += transcript
240
248
else : # additional_info == 'confidence'
241
249
if result .is_final :
250
+ final_transcript += transcript
242
251
for f in output_file :
243
252
f .write (f'## { transcript } \n ' )
244
253
f .write (f'Confidence: { result .alternatives [0 ].confidence :9.4f} \n ' )
@@ -259,6 +268,9 @@ def print_streaming(
259
268
else :
260
269
for f in output_file :
261
270
f .write ('----\n ' )
271
+ for f in output_file :
272
+ f .write (f"Final transcripts:\n " )
273
+ f .write (f"0 : { final_transcript } \n " )
262
274
finally :
263
275
for fo , elem in zip (file_opened , output_file ):
264
276
if fo :
@@ -284,6 +296,7 @@ def streaming_request_generator(
284
296
285
297
class ASRService :
286
298
"""Provides streaming and offline recognition services. Calls gRPC stubs with authentication metadata."""
299
+
287
300
def __init__ (self , auth : Auth ) -> None :
288
301
"""
289
302
Initializes an instance of the class.
0 commit comments