Better cost-over-time hoover text

Sarah Krebs · Sarah Krebs · commit e3196c978df4 · 2024-06-20T15:26:39.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,12 @@
 # Version 1.2.1
 
 ## Quality of Life
-- Runs now get displayed with their parent directory for better distinguishability
+- Runs now get displayed with their parent directory for better distinguishability.
 - Increase plot font sizes.
 - Add a simple loading bar functionality for longer runs.
+- Show a run's hoover-text for the actual budget of a trial in Cost over Time with Combined budget (#154).
+- Adapt trajectory calculation for Group to be able to display hoover-text for the actual budget of a trial in Cost over Time.
+- Use highest budget as default budget for Cost over Time instead of Combined.
 
 ## General
 - Seed is now required in the Recorder.
diff --git a/deepcave/plugins/objective/cost_over_time.py b/deepcave/plugins/objective/cost_over_time.py
@@ -225,7 +225,9 @@ def load_inputs(self) -> Dict[str, Any]:
             },
             "budget_id": {
                 "options": self.budget_options,
-                "value": self.budget_options[-1]["value"],
+                "value": self.budget_options[0]["value"]
+                if len(self.budget_options) == 1
+                else self.budget_options[-2]["value"],
             },
             "xaxis": {
                 "options": [
@@ -344,8 +346,7 @@ def load_outputs(runs, inputs, outputs) -> go.Figure:  # type: ignore
                 continue
 
             objective = run.get_objective(inputs["objective_id"])
-            budget = run.get_budget(inputs["budget_id"])
-            config_ids = outputs[run.id]["config_ids"]
+            ids = outputs[run.id]["ids"]
             x = outputs[run.id]["times"]
             if inputs["xaxis"] == "trials":
                 x = outputs[run.id]["ids"]
@@ -360,9 +361,11 @@ def load_outputs(runs, inputs, outputs) -> go.Figure:  # type: ignore
             hoverinfo = "skip"
             symbol = None
             mode = "lines"
-            if len(config_ids) > 0:
+            if len(run.history) > 0:
                 hovertext = [
-                    get_hovertext_from_config(run, config_id, budget) for config_id in config_ids
+                    get_hovertext_from_config(run, trial.config_id, trial.budget)
+                    for id, trial in enumerate(run.history)
+                    if id in ids
                 ]
                 hoverinfo = "text"
                 symbol = "circle"
diff --git a/deepcave/runs/group.py b/deepcave/runs/group.py
@@ -9,13 +9,14 @@
     - Group: Can group and manage a group of runs.
 """
 
-from typing import Any, Dict, Iterator, List, Optional, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 
 from copy import deepcopy
 
 import numpy as np
 
 from deepcave.runs import AbstractRun, NotMergeableError, check_equality
+from deepcave.runs.objective import Objective
 from deepcave.utils.hash import string_to_hash
 
 
@@ -286,74 +287,91 @@ def get_model(self, config_id: int) -> Optional[Any]:
         run_id, config_id = self._original_config_mapping[config_id]
         return self.runs[run_id].get_model(config_id)
 
-    # Types dont match superclass
-    def get_trajectory(self, *args, **kwargs):  # type: ignore
+    def get_trajectory(
+        self,
+        objective: Objective,
+        budget: Optional[Union[int, float]] = None,
+        seed: Optional[int] = None,
+    ) -> Tuple[List[float], List[float], List[float], List[int], List[int]]:
         """
-        Calculate the trajectory of the given objective and budget.
-
-        This includes the times, the mean costs, and the standard deviation of the costs.
+        Calculate the trajectory of the given objective, budget, and seed.
 
         Parameters
         ----------
-        *args
-            Should be the objective to calculate the trajectory from.
-        **kwargs
-            Should be the budget to calculate the trajectory for.
+        objective : Objective
+            Objective to calculate the trajectory for.
+        budget : Optional[Union[int, float]]
+            Budget to calculate the trajectory for. If no budget is given, then the highest budget
+            is chosen. By default None.
+        seed : Optional[int], optional
+            Seed to calculate the trajectory for. If no seed is given, then all seeds are
+            considered. By default None.
 
         Returns
         -------
-        times : List[float]
-            Times of the trajectory.
-        costs_mean : List[float]
-            Costs of the trajectory.
-        costs_std : List[float]
-            Standard deviation of the costs of the trajectory.
-        ids : List[int]
-            The "global" ids of the selected trial.
-        config_ids : List[int]
-            The configuration ids of the selected trials.
+        Tuple[List[float], List[float], List[float], List[int], List[int]]
+            times : List[float]
+                Times of the trajectory.
+            costs_mean : List[float]
+                Costs of the trajectory.
+            costs_std : List[float]
+                Standard deviation of the costs of the trajectory. This is particularly useful for
+                grouped runs.
+            ids : List[int]
+                The "global" ids of the selected trials.
+            config_ids : List[int]
+                Config ids of the selected trials.
         """
-        # Cache costs
-        run_costs = []
-        run_times = []
-
-        # All x values on which y values are needed
-        all_times = []
-
-        for _, run in enumerate(self.runs):
-            times, costs_mean, _, _, _ = run.get_trajectory(*args, **kwargs)
-
-            # Cache s.t. calculate it is not calculated multiple times
-            run_costs.append(costs_mean)
-            run_times.append(times)
-
-            # Add all times
-            # Standard deviation needs to be calculated on all times
-            for time in times:
-                if time not in all_times:
-                    all_times.append(time)
-
-        all_times.sort()
-
-        # Now look for corresponding y values
-        all_costs = []
-
-        for time in all_times:
-            y = []
-
-            # Iterate over all runs
-            for costs, times in zip(run_costs, run_times):
-                # Find closest x value
-                idx = min(range(len(times)), key=lambda i: abs(times[i] - time))
-                y.append(costs[idx])
-
-            all_costs.append(y)
-
-        # Make numpy arrays
-        all_costs_array = np.array(all_costs)
-
-        times = all_times
-        costs_mean = np.mean(all_costs_array, axis=1)
-        costs_std = np.std(all_costs_array, axis=1)
-
-        return times, list(costs_mean), list(costs_std), [], []
+        if budget is None:
+            budget = self.get_highest_budget()
+
+        costs_mean = []
+        costs_std = []
+        ids = []
+        config_ids = []
+        times = []
+
+        order = []
+
+        # Sort self.history by end-time
+        for id, trial in enumerate(self.history):
+            order.append((id, trial.end_time))
+        order.sort(key=lambda tup: tup[1])
+
+        # Important: Objective can be minimized or maximized
+        if objective.optimize == "lower":
+            current_cost = np.inf
+        else:
+            current_cost = -np.inf
+
+        # Iterate over the history ordered by end-time and calculate the current incumbent
+        for i, (id, _) in enumerate(order):
+            trial = self.history[id]
+
+            # Get the incumbent over all trials up to this point
+            try:
+                _, cost = self.get_incumbent(
+                    objectives=objective,
+                    budget=budget,
+                    seed=seed,
+                    selected_ids=[selected_id for selected_id, _ in order[: i + 1]],
+                )
+            except RuntimeError:
+                continue
+
+            # Now it's important to check whether the cost was minimized or maximized
+            if objective.optimize == "lower":
+                improvement = cost < current_cost
+            else:
+                improvement = cost > current_cost
+
+            if improvement:
+                current_cost = cost
+
+                costs_mean.append(cost)
+                costs_std.append(0.0)
+                times.append(trial.end_time)
+                ids.append(id)
+                config_ids.append(trial.config_id)
+
+        return times, costs_mean, costs_std, ids, config_ids