Add back filewise metric filtering

rfejgin · rfejgin · commit 20f19f9e562b · 2026-02-20T16:25:49.000-08:00
It has been moved outside of evaluate() since in the NeMo Skills use
case we need the full metrics for chunk-wise scoring and aggregation
at the end.

Signed-off-by: Fejgin, Roy &lt;rfejgin@nvidia.com&gt;
diff --git a/examples/tts/magpietts_inference.py b/examples/tts/magpietts_inference.py
@@ -308,10 +308,21 @@ def run_inference_and_evaluation(
             with open(os.path.join(eval_dir, f"{dataset}_metrics_{repeat_idx}.json"), "w") as f:
                 json.dump(metrics, f, indent=4)
 
-            # Sort by CER descending for human-readable output (highest error first)
-            sorted_filewise = sorted(filewise_metrics, key=lambda x: x.get('cer', 0), reverse=True)
+            filewise_metrics_keys_to_save = [
+                'cer',
+                'wer',
+                'pred_context_ssim',
+                'pred_text',
+                'gt_text',
+                'gt_audio_filepath',
+                'pred_audio_filepath',
+                'context_audio_filepath',
+                'utmosv2',
+            ]
+            filtered_filewise = [{k: m[k] for k in filewise_metrics_keys_to_save if k in m} for m in filewise_metrics]
+            filtered_filewise.sort(key=lambda x: x.get('cer', 0), reverse=True)
             with open(os.path.join(eval_dir, f"{dataset}_filewise_metrics_{repeat_idx}.json"), "w") as f:
-                json.dump(sorted_filewise, f, indent=4)
+                json.dump(filtered_filewise, f, indent=4)
 
             # Append to per-run CSV
             append_metrics_to_csv(per_run_csv, full_checkpoint_name, dataset, metrics)
diff --git a/nemo/collections/tts/modules/magpietts_inference/evaluate_generated_audio.py b/nemo/collections/tts/modules/magpietts_inference/evaluate_generated_audio.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Used in infer_and_evaluate.py to obtain metrics such as ASR_WER and UTMOSV2 scores.
+Used in inference and evaluation scripts to obtain metrics such as ASR_WER and UTMOSV2 scores.
 """
 import argparse
 import json