Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/modelplane/mlflow/loghelpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ def log_tags(run_id: str) -> None:
if not k.startswith("mlflow.") and k != RUN_TYPE_TAG_NAME
}
)
run_type = run.data.tags.get(RUN_TYPE_TAG_NAME, None)
if run_type is not None:
mlflow.set_tag(f"{run_type}_run_id", run_id)
2 changes: 2 additions & 0 deletions src/modelplane/runways/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from modelplane.mlflow.loghelpers import log_tags
from modelplane.runways.utils import (
MODELGAUGE_RUN_TAG_NAME,
PROMPT_RESPONSE_ARTIFACT_NAME,
RUN_TYPE_ANNOTATOR,
RUN_TYPE_TAG_NAME,
Expand Down Expand Up @@ -113,6 +114,7 @@ def annotate(
pipeline_runner.run(
progress_callback=mlflow.log_metrics, debug=is_debug_mode()
)
mlflow.set_tag(MODELGAUGE_RUN_TAG_NAME, pipeline_runner.run_id)

# log the output to mlflow's artifact store
mlflow.log_artifact(
Expand Down
2 changes: 2 additions & 0 deletions src/modelplane/runways/responder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from modelgauge.sut_registry import SUTS

from modelplane.runways.utils import (
MODELGAUGE_RUN_TAG_NAME,
RUN_TYPE_RESPONDER,
RUN_TYPE_TAG_NAME,
get_experiment_id,
Expand Down Expand Up @@ -53,6 +54,7 @@ def respond(
pipeline_runner.run(
progress_callback=mlflow.log_metrics, debug=is_debug_mode()
)
mlflow.set_tag(MODELGAUGE_RUN_TAG_NAME, pipeline_runner.run_id)

# log the output to mlflow's artifact store
mlflow.log_artifact(
Expand Down
8 changes: 7 additions & 1 deletion src/modelplane/runways/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

import mlflow
import numpy as np
import pandas as pd
from sklearn import metrics

Expand Down Expand Up @@ -63,7 +64,12 @@ def score(
for annotator in annotators:
score = score_annotator(annotator, annotations_df, ground_truth_df)
for metric in score:
mlflow.log_metric(f"{annotator}_{metric}", score[metric])
Copy link
Contributor Author

@superdosh superdosh Jul 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forgot to add a comment in the code, so adding here for posterity: the reason we need this now (and not before) is due to a bug introduced in mlflow 3 (from a newer version of graphql). We can track it here: mlflow/mlflow#16555

Basically if we try to log a nan or inf, it works on the mlflow side with no errors, but the gui crashes.

if np.isnan(score[metric]):
mlflow.log_metric(f"{annotator}_{metric}_is_nan", 1.0)
elif np.isinf(score[metric]):
mlflow.log_metric(f"{annotator}_{metric}_is_inf", 1.0)
else:
mlflow.log_metric(f"{annotator}_{metric}", score[metric])

return run.info.run_id

Expand Down
1 change: 1 addition & 0 deletions src/modelplane/runways/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
RUN_TYPE_RESPONDER = "get-sut-responses"
RUN_TYPE_ANNOTATOR = "annotate"
RUN_TYPE_SCORER = "score"
MODELGAUGE_RUN_TAG_NAME = "modelgauge_run_id"


def is_debug_mode() -> bool:
Expand Down