apache · christinadionysio · Nov 28, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
diff --git a/src/main/python/systemds/scuro/dataloader/json_loader.py b/src/main/python/systemds/scuro/dataloader/json_loader.py
@@ -55,5 +55,6 @@ def extract(self, file: str, index: Optional[Union[str, List[str]]] = None):
                 except:
                     text = json_file[self.field]
 
+                text = " ".join(text)
                 self.data.append(text)
                 self.metadata[idx] = self.modality_type.create_metadata(len(text), text)
diff --git a/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py b/src/main/python/systemds/scuro/drsearch/hyperparameter_tuner.py
@@ -174,9 +174,13 @@ def visit_node(node_id):
             all_results.append(result)
 
         if self.maximize_metric:
-            best_params, best_score = max(all_results, key=lambda x: x[1])
+            best_params, best_score = max(
+                all_results, key=lambda x: x[1].scores[self.scoring_metric]
+            )
         else:
-            best_params, best_score = min(all_results, key=lambda x: x[1])
+            best_params, best_score = min(
+                all_results, key=lambda x: x[1].scores[self.scoring_metric]
+            )
 
         tuning_time = time.time() - start_time
 

diff --git a/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py b/src/main/python/systemds/scuro/drsearch/multimodal_optimizer.py
@@ -24,7 +24,7 @@
 import threading
 from dataclasses import dataclass
 from typing import List, Dict, Any, Generator
-from systemds.scuro.drsearch.task import Task
+from systemds.scuro.drsearch.task import Task, PerformanceMeasure
 from systemds.scuro.drsearch.representation_dag import (
     RepresentationDag,
     RepresentationDAGBuilder,
@@ -87,7 +87,8 @@ def _evaluate_dag_worker(dag_pickle, task_pickle, modalities_pickle, debug=False
             val_score=scores[1],
             runtime=total_time,
             task_name=task_copy.model.name,
-            evaluation_time=eval_time,
+            task_time=eval_time,
+            representation_time=total_time - eval_time,
         )
     except Exception:
         if debug:
@@ -390,8 +391,9 @@ def _evaluate_dag(self, dag: RepresentationDag, task: Task) -> "OptimizationResu
                 train_score=scores[0],
                 val_score=scores[1],
                 runtime=total_time,
+                representation_time=total_time - eval_time,
                 task_name=task_copy.model.name,
-                evaluation_time=eval_time,
+                task_time=eval_time,
             )
 
         except Exception as e:
@@ -475,8 +477,10 @@ def store_results(self, file_name=None):
 @dataclass
 class OptimizationResult:
     dag: RepresentationDag
-    train_score: float
-    val_score: float
-    runtime: float
-    task_name: str
-    evaluation_time: float = 0.0
+    train_score: PerformanceMeasure = None
+    val_score: PerformanceMeasure = None
+    runtime: float = 0.0
+    task_time: float = 0.0
+    representation_time: float = 0.0
+    task_name: str = ""
+    tradeoff_score: float = 0.0
diff --git a/src/main/python/systemds/scuro/drsearch/ranking.py b/src/main/python/systemds/scuro/drsearch/ranking.py
@@ -0,0 +1,90 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+from dataclasses import replace
+from typing import Callable, Iterable, List, Optional
+
+
+def rank_by_tradeoff(
+    entries: Iterable,
+    *,
+    weights=(0.7, 0.3),
+    performance_metric_name: str = "accuracy",
+    runtime_accessor: Optional[Callable[[object], float]] = None,
+    cache_scores: bool = True,
+    score_attr: str = "tradeoff_score",
+) -> List:
+    entries = list(entries)
+    if not entries:
+        return []
+
+    performance_score_accessor = lambda entry: getattr(entry, "val_score")[
+        performance_metric_name
+    ]
+    if runtime_accessor is None:
+
+        def runtime_accessor(entry):
+            if hasattr(entry, "runtime"):
+                return getattr(entry, "runtime")
+            rep = getattr(entry, "representation_time", 0.0)
+            task = getattr(entry, "task_time", 0.0)
+            return rep + task
+
+    performance = [float(performance_score_accessor(e)) for e in entries]
+    runtimes = [float(runtime_accessor(e)) for e in entries]
+
+    perf_min, perf_max = min(performance), max(performance)
+    run_min, run_max = min(runtimes), max(runtimes)
+
+    def safe_normalize(values, vmin, vmax):
+        if vmax - vmin == 0.0:
+            return [1.0] * len(values)
+        return [(v - vmin) / (vmax - vmin) for v in values]
+
+    norm_perf = safe_normalize(performance, perf_min, perf_max)
+    norm_run = safe_normalize(runtimes, run_min, run_max)
+    norm_run = [1.0 - r for r in norm_run]
+
+    acc_w, run_w = weights
+    total_w = (acc_w or 0.0) + (run_w or 0.0)
+    if total_w == 0.0:
+        acc_w = 1.0
+        run_w = 0.0
+    else:
+        acc_w /= total_w
+        run_w /= total_w
+
+    scores = [acc_w * a + run_w * r for a, r in zip(norm_perf, norm_run)]
+
+    if cache_scores:
+        for entry, score in zip(entries, scores):
+            if hasattr(entry, score_attr):
+                try:
+                    new_entry = replace(entry, **{score_attr: score})
+                    entries[entries.index(entry)] = new_entry
+                except TypeError:
+                    setattr(entry, score_attr, score)
+            else:
+                setattr(entry, score_attr, score)
+
+    return sorted(
+        entries, key=lambda entry: getattr(entry, score_attr, 0.0), reverse=True
+    )
diff --git a/src/main/python/systemds/scuro/drsearch/task.py b/src/main/python/systemds/scuro/drsearch/task.py
@@ -28,6 +28,37 @@
 from sklearn.model_selection import KFold
 
 
+class PerformanceMeasure:
+    def __init__(self, name, metrics, higher_is_better=True):
+        self.average_scores = None
+        self.name = name
+        self.metrics = metrics
+        self.higher_is_better = higher_is_better
+        self.scores = {}
+
+        if isinstance(metrics, list):
+            for metric in metrics:
+                self.scores[metric] = []
+        else:
+            self.scores[metrics] = []
+
+    def add_scores(self, scores):
+        if isinstance(self.metrics, list):
+            for metric in self.metrics:
+                self.scores[metric].append(scores[metric])
+        else:
+            self.scores[self.metrics].append(scores[self.metrics])
+
+    def compute_averages(self):
+        self.average_scores = {}
+        if isinstance(self.metrics, list):
+            for metric in self.metrics:
+                self.average_scores[metric] = np.mean(self.scores[metric])
+        else:
+            self.average_scores[self.metrics] = np.mean(self.scores[self.metrics])
+        return self
+
+
 class Task:
     def __init__(
         self,
@@ -38,6 +69,7 @@ def __init__(
         val_indices: List,
         kfold=5,
         measure_performance=True,
+        performance_measures="accuracy",
     ):
         """
         Parent class for the prediction task that is performed on top of the aligned representation
@@ -59,8 +91,9 @@ def __init__(
         self.inference_time = []
         self.training_time = []
         self.expected_dim = 1
-        self.train_scores = []
-        self.val_scores = []
+        self.performance_measures = performance_measures
+        self.train_scores = PerformanceMeasure("train", performance_measures)
+        self.val_scores = PerformanceMeasure("val", performance_measures)
 
     def create_model(self):
         """
@@ -74,8 +107,12 @@ def create_model(self):
     def get_train_test_split(self, data):
         X_train = [data[i] for i in self.train_indices]
         y_train = [self.labels[i] for i in self.train_indices]
-        X_test = [data[i] for i in self.val_indices]
-        y_test = [self.labels[i] for i in self.val_indices]
+        if self.val_indices is None:
+            X_test = None
+            y_test = None
+        else:
+            X_test = [data[i] for i in self.val_indices]
+            y_test = [self.labels[i] for i in self.val_indices]
 
         return X_train, y_train, X_test, y_test
 
@@ -101,25 +138,28 @@ def run(self, data):
             self._run_fold(model, train_X, train_y, test_X, test_y)
             fold += 1
 
-        return [np.mean(self.train_scores), np.mean(self.val_scores)]
+        return [
+            self.train_scores.compute_averages(),
+            self.val_scores.compute_averages(),
+        ]
 
     def _reset_params(self):
         self.inference_time = []
         self.training_time = []
-        self.train_scores = []
-        self.val_scores = []
+        self.train_scores = PerformanceMeasure("train", self.performance_measures)
+        self.val_scores = PerformanceMeasure("val", self.performance_measures)
 
     def _run_fold(self, model, train_X, train_y, test_X, test_y):
         train_start = time.time()
         train_score = model.fit(train_X, train_y, test_X, test_y)
         train_end = time.time()
         self.training_time.append(train_end - train_start)
-        self.train_scores.append(train_score)
+        self.train_scores.add_scores(train_score[0])
         test_start = time.time()
         test_score = model.test(np.array(test_X), test_y)
         test_end = time.time()
         self.inference_time.append(test_end - test_start)
-        self.val_scores.append(test_score)
+        self.val_scores.add_scores(test_score[0])
 
     def create_representation_and_run(
         self,