@@ -141,7 +141,7 @@ def parse_filepath(fp: str):
141141 dataset_id = ds_fp .replace ("DATASET_" , "" ).rstrip (".jsonl" )
142142 return model_id , dataset_id
143143
144- # Compute results per dataset
144+ # Compute WER results per dataset, and RTFx over all datasets
145145 results = {}
146146 wer_metric = evaluate .load ("wer" )
147147
@@ -160,13 +160,14 @@ def parse_filepath(fp: str):
160160 wer = round (100 * wer , 2 )
161161
162162 if compute_rtfx :
163- rtfx = sum (duration ) / sum (time )
164- rtfx = round (rtfx , 4 )
163+ audio_length = sum (duration )
164+ inference_time = sum (time )
165+ rtfx = round (sum (duration ) / sum (time ), 4 )
165166 else :
166- rtfx = None
167+ audio_length = inference_time = rtfx = None
167168
168169 result_key = f"{ model_id_of_file } | { dataset_id } "
169- results [result_key ] = {"wer" : wer , "rtfx" : rtfx }
170+ results [result_key ] = {"wer" : wer , "audio_length" : audio_length , "inference_time" : inference_time , " rtfx" : rtfx }
170171
171172 print ("*" * 80 )
172173 print ("Results per dataset:" )
@@ -175,20 +176,22 @@ def parse_filepath(fp: str):
175176 for k , v in results .items ():
176177 metrics = f"{ k } : WER = { v ['wer' ]:0.2f} %"
177178 if v ["rtfx" ] is not None :
178- metrics += f", RTFX = { v ['rtfx' ]:0.2f} "
179+ metrics += f", RTFx = { v ['rtfx' ]:0.2f} "
179180 print (metrics )
180181
181182 # composite WER should be computed over all datasets and with the same key
182183 composite_wer = defaultdict (float )
183- composite_rtfx = defaultdict (float )
184+ composite_audio_length = defaultdict (float )
185+ composite_inference_time = defaultdict (float )
184186 count_entries = defaultdict (int )
185187 for k , v in results .items ():
186188 key = k .split ("|" )[0 ].strip ()
187189 composite_wer [key ] += v ["wer" ]
188190 if v ["rtfx" ] is not None :
189- composite_rtfx [key ] += v ["rtfx" ]
191+ composite_audio_length [key ] += v ["audio_length" ]
192+ composite_inference_time [key ] += v ["inference_time" ]
190193 else :
191- composite_rtfx [key ] = None
194+ composite_audio_length [ key ] = composite_inference_time [key ] = None
192195 count_entries [key ] += 1
193196
194197 # normalize scores & print
@@ -199,9 +202,9 @@ def parse_filepath(fp: str):
199202 for k , v in composite_wer .items ():
200203 wer = v / count_entries [k ]
201204 print (f"{ k } : WER = { wer :0.2f} %" )
202- for k , v in composite_rtfx . items () :
203- if v is not None :
204- rtfx = v / count_entries [k ]
205- print (f"{ k } : RTFX = { rtfx :0.2f} " )
205+ for k in composite_audio_length :
206+ if composite_audio_length [ k ] is not None :
207+ rtfx = composite_audio_length [ k ] / composite_inference_time [k ]
208+ print (f"{ k } : RTFx = { rtfx :0.2f} " )
206209 print ("*" * 80 )
207210 return composite_wer , results
0 commit comments