mlcommons · superdosh · Jul 8, 2025 · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025
@@ -15,3 +15,6 @@ def log_tags(run_id: str) -> None:
             if not k.startswith("mlflow.") and k != RUN_TYPE_TAG_NAME
         }
     )
+    run_type = run.data.tags.get(RUN_TYPE_TAG_NAME, None)
+    if run_type is not None:
+        mlflow.set_tag(f"{run_type}_run_id", run_id)
@@ -23,6 +23,7 @@
 
 from modelplane.mlflow.loghelpers import log_tags
 from modelplane.runways.utils import (
+    MODELGAUGE_RUN_TAG_NAME,
     PROMPT_RESPONSE_ARTIFACT_NAME,
     RUN_TYPE_ANNOTATOR,
     RUN_TYPE_TAG_NAME,
@@ -113,6 +114,7 @@ def annotate(
             pipeline_runner.run(
                 progress_callback=mlflow.log_metrics, debug=is_debug_mode()
             )
+            mlflow.set_tag(MODELGAUGE_RUN_TAG_NAME, pipeline_runner.run_id)
 
             # log the output to mlflow's artifact store
             mlflow.log_artifact(

@@ -9,6 +9,7 @@
 from modelgauge.sut_registry import SUTS
 
 from modelplane.runways.utils import (
+    MODELGAUGE_RUN_TAG_NAME,
     RUN_TYPE_RESPONDER,
     RUN_TYPE_TAG_NAME,
     get_experiment_id,
@@ -53,6 +54,7 @@ def respond(
             pipeline_runner.run(
                 progress_callback=mlflow.log_metrics, debug=is_debug_mode()
             )
+            mlflow.set_tag(MODELGAUGE_RUN_TAG_NAME, pipeline_runner.run_id)
 
             # log the output to mlflow's artifact store
             mlflow.log_artifact(

@@ -1,6 +1,7 @@
 """Runway for measuring annotations against ground truth."""
 
 import json
+import math
 import os
 import tempfile
 from pathlib import Path
@@ -63,7 +64,12 @@ def score(
         for annotator in annotators:
             score = score_annotator(annotator, annotations_df, ground_truth_df)
             for metric in score:
-                mlflow.log_metric(f"{annotator}_{metric}", score[metric])
+                if math.isnan(score[metric]):
+                    mlflow.log_metric(f"{annotator}_{metric}_is_nan", 1.0)
+                elif math.isinf(score[metric]):
+                    mlflow.log_metric(f"{annotator}_{metric}_is_inf", 1.0)
+                else:
+                    mlflow.log_metric(f"{annotator}_{metric}", score[metric])
 
         return run.info.run_id
 

@@ -21,6 +21,7 @@
 RUN_TYPE_RESPONDER = "get-sut-responses"
 RUN_TYPE_ANNOTATOR = "annotate"
 RUN_TYPE_SCORER = "score"
+MODELGAUGE_RUN_TAG_NAME = "modelgauge_run_id"
 
 
 def is_debug_mode() -> bool: