Skip to content

Commit afbfabc

Browse files
author
sanchit-gandhi
committed
un-weight rtfx
1 parent df782b3 commit afbfabc

File tree

2 files changed

+18
-20
lines changed

2 files changed

+18
-20
lines changed

normalizer/eval_utils.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def parse_filepath(fp: str):
141141
dataset_id = ds_fp.replace("DATASET_", "").rstrip(".jsonl")
142142
return model_id, dataset_id
143143

144-
# Compute results per dataset
144+
# Compute WER results per dataset, and RTFx over all datasets
145145
results = {}
146146
wer_metric = evaluate.load("wer")
147147

@@ -160,13 +160,14 @@ def parse_filepath(fp: str):
160160
wer = round(100 * wer, 2)
161161

162162
if compute_rtfx:
163-
rtfx = sum(duration) / sum(time)
164-
rtfx = round(rtfx, 4)
163+
audio_length = sum(duration)
164+
inference_time = sum(time)
165+
rtfx = round(sum(duration) / sum(time), 4)
165166
else:
166-
rtfx = None
167+
audio_length = inference_time = rtfx = None
167168

168169
result_key = f"{model_id_of_file} | {dataset_id}"
169-
results[result_key] = {"wer": wer, "rtfx": rtfx}
170+
results[result_key] = {"wer": wer, "audio_length": audio_length, "inference_time": inference_time, "rtfx": rtfx}
170171

171172
print("*" * 80)
172173
print("Results per dataset:")
@@ -175,20 +176,22 @@ def parse_filepath(fp: str):
175176
for k, v in results.items():
176177
metrics = f"{k}: WER = {v['wer']:0.2f} %"
177178
if v["rtfx"] is not None:
178-
metrics += f", RTFX = {v['rtfx']:0.2f}"
179+
metrics += f", RTFx = {v['rtfx']:0.2f}"
179180
print(metrics)
180181

181182
# composite WER should be computed over all datasets and with the same key
182183
composite_wer = defaultdict(float)
183-
composite_rtfx = defaultdict(float)
184+
composite_audio_length = defaultdict(float)
185+
composite_inference_time = defaultdict(float)
184186
count_entries = defaultdict(int)
185187
for k, v in results.items():
186188
key = k.split("|")[0].strip()
187189
composite_wer[key] += v["wer"]
188190
if v["rtfx"] is not None:
189-
composite_rtfx[key] += v["rtfx"]
191+
composite_audio_length[key] += v["audio_length"]
192+
composite_inference_time[key] += v["inference_time"]
190193
else:
191-
composite_rtfx[key] = None
194+
composite_audio_length[key] = composite_inference_time[key] = None
192195
count_entries[key] += 1
193196

194197
# normalize scores & print
@@ -199,9 +202,9 @@ def parse_filepath(fp: str):
199202
for k, v in composite_wer.items():
200203
wer = v / count_entries[k]
201204
print(f"{k}: WER = {wer:0.2f} %")
202-
for k, v in composite_rtfx.items():
203-
if v is not None:
204-
rtfx = v / count_entries[k]
205-
print(f"{k}: RTFX = {rtfx:0.2f}")
205+
for k in composite_audio_length:
206+
if composite_audio_length[k] is not None:
207+
rtfx = composite_audio_length[k] / composite_inference_time[k]
208+
print(f"{k}: RTFx = {rtfx:0.2f}")
206209
print("*" * 80)
207210
return composite_wer, results

transformers/run_eval.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,8 @@ def benchmark(batch):
8686
references=all_results["references"], predictions=all_results["predictions"]
8787
)
8888
wer = round(100 * wer, 2)
89-
print("WER:", wer, "%")
90-
91-
transcription_time = sum(all_results["transcription_time"])
92-
audio_length = sum(all_results["audio_length"])
93-
rtfx = audio_length / transcription_time
94-
rtfx = round(rtfx, 2)
95-
print("RTFX:", rtfx)
89+
rtfx = round(sum(all_results["audio_length"]) / sum(all_results["transcription_time"]), 2)
90+
print("WER:", wer, "%", "RTFx:", rtfx)
9691

9792

9893
if __name__ == "__main__":

0 commit comments

Comments
 (0)