open-edge-platform
diff --git a/‎deploy/charts/geti-tools/chart/charts/seaweed-fs/values.yaml‎
Lines changed: 1 addition & 0 deletions b/‎deploy/charts/geti-tools/chart/charts/seaweed-fs/values.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎interactive_ai/workflows/geti_domain/common/jobs_common/k8s_helpers/trainer_pod_definition.py‎
Lines changed: 13 additions & 17 deletions b/‎interactive_ai/workflows/geti_domain/common/jobs_common/k8s_helpers/trainer_pod_definition.py‎
Lines changed: 13 additions & 17 deletions
diff --git a/‎interactive_ai/workflows/geti_domain/common/jobs_common_extras/mlflow/adapters/geti_otx_interface.py‎
Lines changed: 10 additions & 34 deletions b/‎interactive_ai/workflows/geti_domain/common/jobs_common_extras/mlflow/adapters/geti_otx_interface.py‎
Lines changed: 10 additions & 34 deletions
diff --git a/‎interactive_ai/workflows/geti_domain/common/tests/unit/extras/mlflow/test_adapters.py‎
Lines changed: 2 additions & 24 deletions b/‎interactive_ai/workflows/geti_domain/common/tests/unit/extras/mlflow/test_adapters.py‎
Lines changed: 2 additions & 24 deletions
diff --git a/‎interactive_ai/workflows/otx_domain/trainer/otx_v2/pyproject.toml‎
Lines changed: 0 additions & 1 deletion b/‎interactive_ai/workflows/otx_domain/trainer/otx_v2/pyproject.toml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎interactive_ai/workflows/otx_domain/trainer/otx_v2/scripts/metrics.py‎
Lines changed: 43 additions & 0 deletions b/‎interactive_ai/workflows/otx_domain/trainer/otx_v2/scripts/metrics.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎interactive_ai/workflows/otx_domain/trainer/otx_v2/scripts/minio_util.py‎
Lines changed: 0 additions & 105 deletions b/‎interactive_ai/workflows/otx_domain/trainer/otx_v2/scripts/minio_util.py‎
Lines changed: 0 additions & 105 deletions
@@ -289,6 +289,7 @@ services:
       - "Read:temporaryfiles"
       - "Write:mlflowexperiments"
       - "List:mlflowexperiments"
+      - "Read:mlflowexperiments"
       - "List:vpsreferencefeatures"
       - "Read:vpsreferencefeatures"
       - "Read:pretrainedweights"
 
@@ -40,22 +40,10 @@
 
 
 def _create_sidecar_env(
-    organization_id: str,
-    workspace_id: str,
-    project_id: str,
-    job_id: str,
+    identifier_json: str,
     namespace: str,
     role: str = "training_operator",
 ) -> list[V1EnvVar]:
-    identifier_json = json.dumps(
-        {
-            "organization_id": organization_id,
-            "workspace_id": workspace_id,
-            "project_id": project_id,
-            "job_id": job_id,
-        }
-    )
-
     # NOTE: vars below is inherited by the Flyte task who renders this sidecar
     var_s3_host = V1EnvVar(
         name="S3_HOST",
@@ -165,15 +153,21 @@ def create_flyte_container_task(  # noqa: PLR0913
     sidecar_container_image = trainer_image_info.to_sidecar_image_full_name()
     logger.info(f"Create sidecar_container_image={sidecar_container_image}")
 
+    identifier_json = json.dumps(
+        {
+            "organization_id": str(session.organization_id),
+            "workspace_id": str(session.workspace_id),
+            "project_id": project_id,
+            "job_id": job_id,
+        }
+    )
+
     env_from = [
         V1EnvFromSource(config_map_ref=V1ConfigMapEnvSource(name=f"{namespace}-feature-flags")),
         V1EnvFromSource(config_map_ref=V1ConfigMapEnvSource(name=f"{namespace}-s3-bucket-names")),
     ]
     sidecar_env = _create_sidecar_env(
-        organization_id=str(session.organization_id),
-        workspace_id=str(session.workspace_id),
-        project_id=project_id,
-        job_id=job_id,
+        identifier_json=identifier_json,
         namespace=namespace,
     )
 
@@ -218,6 +212,8 @@ def create_flyte_container_task(  # noqa: PLR0913
                 name=PRIMARY_CONTAINER_NAME,
                 image=primary_container_image,
                 env=[
+                    # Identifier JSON
+                    V1EnvVar(name="IDENTIFIER_JSON", value=identifier_json),
                     V1EnvVar(name="SHARD_FILES_DIR", value="/shard_files"),
                     V1EnvVar(name="MLFLOW_TRACKING_URI", value="http://localhost:5000"),
                     V1EnvVar(name="MLFLOW_EXPERIMENT_ID", value=project_id),
 
@@ -3,7 +3,6 @@
 
 """This module defines a command to prepare MLFlow Experiment directory in the S3 bucket."""
 
-import io
 import json
 import logging
 import os
@@ -12,7 +11,6 @@
 from typing import Any
 
 import numpy as np
-import pyarrow as pa
 from geti_telemetry_tools import unified_tracing
 from geti_types import ProjectIdentifier
 from iai_core.adapters.binary_interpreters import RAWBinaryInterpreter
@@ -24,7 +22,6 @@
 from iai_core.entities.model import Model, ModelFormat, ModelOptimizationType, ModelStatus
 from iai_core.repos.model_repo import ModelRepo
 from iai_core.repos.project_repo import ProjectRepo
-from pandas import DataFrame
 
 # NOTE: workaround for CVS-156400 -> the following imports are needed for the workaround
 from jobs_common.tasks.utils.progress import report_progress
@@ -40,7 +37,6 @@
     MLFlowLifecycleStage,
     MLFlowRunStatus,
 )
-from jobs_common_extras.mlflow.adapters.metrics_mapper import PerformanceDeserializer
 from jobs_common_extras.mlflow.repos.binary_repo import MLFlowExperimentBinaryRepo
 
 logger = logging.getLogger(__name__)
@@ -445,42 +441,25 @@ def pull_metrics(self) -> Performance | None:
         :return: Performance object, or None if it cannot be loaded.
         """
 
-        # Metrics can be found either in outputs/models/performance.pickle or live_metrics/metrics.arrow
-        model_prefix = os.path.join(self.dst_path_prefix, "outputs", "models")
-        performance_filepath = os.path.join(model_prefix, "performance-json.bin")
+        # Metrics can be found in live_metrics/metrics.json
         live_metrics_prefix = os.path.join(self.dst_path_prefix, "live_metrics")
-        metrics_filepath = os.path.join(live_metrics_prefix, "metrics.arrow")
+        metrics_filepath = os.path.join(live_metrics_prefix, "metrics.json")
 
         performance: Performance | None = None
-        if self.binary_repo.exists(performance_filepath):
-            logger.info("Reading performance metrics from %s", performance_filepath)
+        if self.binary_repo.exists(metrics_filepath):
+            logger.info("Reading performance metrics from %s", metrics_filepath)
             try:
                 data = self.binary_repo.get_by_filename(
-                    filename=performance_filepath,
+                    filename=metrics_filepath,
                     binary_interpreter=RAWBinaryInterpreter(),
                 )
-                performance = PerformanceDeserializer.backward(json.loads(data.decode()))
-            except Exception:
-                logger.exception(f"Failed to extract performance metrics from {performance_filepath}")
-        elif self.binary_repo.exists(metrics_filepath):
-            logger.info("Reading performance metrics from %s", metrics_filepath)
-            try:
-                obj = self.binary_repo.storage_client.client.get_object(  # type: ignore
-                    bucket_name=self.binary_repo.storage_client.bucket_name,  # type: ignore
-                    object_name=os.path.join(
-                        self.binary_repo.storage_client.object_name_base,  # type: ignore[attr-defined]
-                        metrics_filepath,
-                    ),  # type: ignore
-                )
-                table = pa.ipc.RecordBatchFileReader(io.BytesIO(obj.data)).read_all()
-                data_frame = table.to_pandas()
-                performance = self._create_performance_from_arrow(data_frame)
+                metrics_json = json.loads(data)
+                performance = self._create_performance_from_json(metrics_json)
             except Exception:
                 logger.exception(f"Failed to extract performance metrics from {metrics_filepath}")
         else:
             logger.error(
-                "Cannot find any file to extract performance metrics; both `%s` and `%s` are missing.",
-                performance_filepath,
+                "Cannot find file to extract performance metrics; `%s` is missing.",
                 metrics_filepath,
             )
 
@@ -550,12 +529,9 @@ def _create_progress_json(self) -> dict[str, str | float]:
             "progress": 0.0,
         }
 
-    def _create_performance_from_arrow(self, data_frame: DataFrame) -> Performance:
-        grouped = data_frame.groupby("key")
-
+    def _create_performance_from_json(self, metrics_json: dict[str, list[float]]) -> Performance:
         dashboard_metrics = []
-        for name, group in grouped:
-            ys = group["value"].tolist()
+        for name, ys in metrics_json.items():
             xs = [float(x) for x in range(1, len(ys) + 1)]
             metric = CurveMetric(name=name, ys=ys, xs=xs)
 
 
@@ -23,7 +23,7 @@ class TestGetiOTXInterfaceAdapter:
     @pytest.fixture()
     def fxt_performance(self):
         return Performance(
-            score=ScoreMetric("dummy", 1.0),
+            score=ScoreMetric(name="Model accuracy", value=0.5),
             dashboard_metrics=[
                 LineMetricsGroup(
                     metrics=[CurveMetric(name="dummy", ys=[1, 2, 3], xs=[1, 2, 3])],
@@ -444,29 +444,7 @@ def test_pull_metrics(
         # Arrange
         mock_project_repo.return_value.get_by_id.return_value = fxt_project
         mock_repo.return_value.organization_id = fxt_organization_id
-        performance_dict = {
-            "dashboard_metrics": [
-                {
-                    "metrics": [
-                        {
-                            "name": "dummy",
-                            "type": "curve",
-                            "xs": [1.0, 2.0, 3.0],
-                            "ys": [1.0, 2.0, 3.0],
-                        }
-                    ],
-                    "visualization_info": {
-                        "name": "dummy",
-                        "palette": "DEFAULT",
-                        "type": "LINE",
-                        "x_axis_label": "x",
-                        "y_axis_label": "y",
-                    },
-                }
-            ],
-            "score": {"label_id": None, "name": "dummy", "type": "score", "value": 1.0},
-            "type": "Performance",
-        }
+        performance_dict = {"dummy": [1.0, 2.0, 3.0]}
         mock_repo.return_value.get_by_filename.return_value = json.dumps(performance_dict).encode()
 
         # Act
 
@@ -5,7 +5,6 @@ description = "OTX trainer"
 requires-python = ">=3.10, <3.11"
 
 dependencies = [
-    "mlflow==2.19.0",
     "minio~=7.1.0",
     "numpy==1.26.4",
     "requests==2.32.3",
 
@@ -0,0 +1,43 @@
+# Copyright (C) 2022-2025 Intel Corporation
+# LIMITED EDGE SOFTWARE DISTRIBUTION LICENSE
+import json
+import logging
+from argparse import Namespace
+from pathlib import Path
+from typing import Any
+
+from lightning.pytorch.loggers.logger import Logger
+from otx_io import upload_model_artifact
+
+logger = logging.getLogger(__name__)
+
+
+class OTXMetricsLogger(Logger):
+    def __init__(self, file_path: Path):
+        self.file_path = file_path
+        self.metrics: dict[str, list[float]] = {}
+        logger.info(f"Writing live metrics to {file_path}")
+
+    @property
+    def name(self) -> str | None:
+        return None
+
+    @property
+    def version(self) -> int | str | None:
+        return None
+
+    def log_metrics(self, metrics: dict[str, float], step: int | None = None) -> None:  # noqa: ARG002
+        for key, value in metrics.items():
+            self.metrics.setdefault(key, []).append(value)
+
+        with open(self.file_path, "w") as f:
+            json.dump(self.metrics, f)
+
+    def log_hyperparams(self, params: dict[str, Any] | Namespace, *args: Any, **kwargs: Any) -> None:
+        pass
+
+    def save(self) -> None:
+        print(self.metrics)
+
+    def finalize(self, status: str) -> None:  # noqa: ARG002
+        upload_model_artifact(src_filepath=self.file_path, dst_filepath=Path("live_metrics/metrics.json"))