From f6696638b6b209f94275432a44930992f01421ac Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Mon, 19 May 2025 12:36:53 +0200
Subject: [PATCH 01/12] Fallback, in case the database conenction is not good

---
 carps/loggers/database_logger.py | 34 ++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/carps/loggers/database_logger.py b/carps/loggers/database_logger.py
index b6cddd748..54b031123 100644
--- a/carps/loggers/database_logger.py
+++ b/carps/loggers/database_logger.py
@@ -8,6 +8,7 @@
 
 from carps.loggers.abstract_logger import AbstractLogger
 from carps.utils.loggingutils import CustomEncoder, get_logger, setup_logging
+import time
 
 if TYPE_CHECKING:
     from py_experimenter.result_processor import ResultProcessor
@@ -98,12 +99,20 @@ def log_trial(
         table_name: str, default "trials"
             The name of the table to log the trial to.
         """
-        info = convert_trial_info(trial_info, trial_value)
-        info["n_trials"] = n_trials
-        info["n_function_calls"] = n_function_calls if n_function_calls else n_trials
-
-        if self.result_processor:
-            self.result_processor.process_logs({table_name: info})
+        for i in range(5):
+            try:
+                info = convert_trial_info(trial_info, trial_value)
+                info["n_trials"] = n_trials
+                info["n_function_calls"] = n_function_calls if n_function_calls else n_trials
+
+                if self.result_processor:
+                    logger.info(f"Logging trial to {table_name}: {info}")
+                    self.result_processor.process_logs({table_name: info})
+                break
+            except Exception as e:
+                if i == 4:
+                    raise e
+                time.sleep(10)
 
     def log_incumbent(self, n_trials: int | float, incumbent: Incumbent, n_function_calls: int | None = None) -> None:
         """Log the incumbent.
@@ -142,5 +151,14 @@ def log_arbitrary(self, data: dict, entity: str) -> None:
         entity : str
             The entity to log the data to. This is the table name in the database.
         """
-        if self.result_processor:
-            self.result_processor.process_logs({entity: data})
+
+        for i in range(5):
+            try:
+                if self.result_processor:
+                    self.result_processor.process_logs({entity: data})
+                break
+            except Exception as e:
+                if i == 4:
+                    raise e
+                time.sleep(10)
+

From 91a60745e06ffe49a50aeb3aab2eb366f3ced12b Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Mon, 19 May 2025 15:03:11 +0200
Subject: [PATCH 02/12] calculate hypervolume

---
 carps/analysis/calc_hypervolume.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/carps/analysis/calc_hypervolume.py b/carps/analysis/calc_hypervolume.py
index 22ad04126..f5784b6c1 100644
--- a/carps/analysis/calc_hypervolume.py
+++ b/carps/analysis/calc_hypervolume.py
@@ -11,7 +11,7 @@
 import pandas as pd
 from pymoo.indicators.hv import HV
 
-from carps.analysis.gather_data import convert_mixed_types_to_str
+# from carps.analysis.gather_data import convert_mixed_types_to_str
 
 run_id = ["task_type", "benchmark_id", "task_id", "optimizer_id", "seed"]
 
@@ -57,9 +57,17 @@ def add_reference_point(x: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Dataframe with the reference point.
     """
-    costs = x["trial_value__cost_inc"].apply(lambda x: np.array([np.array(c) for c in x])).to_list()
-    costs = np.concatenate(costs)
+    # Flatten and stack all cost vectors
+    costs = np.vstack([np.array(c) for c in x["trial_value__cost_raw"]])
+
+    # Sanity check for consistent dimensionality
+    if len(set(cost.shape[0] for cost in costs)) != 1:
+        raise ValueError("Inconsistent number of objectives in cost vectors.")
+
+    # Reference point is max across all objectives
     reference_point = np.max(costs, axis=0)
+    
+    # Set reference point per row
     x["reference_point"] = [reference_point] * len(x)
     return x
 
@@ -73,7 +81,7 @@ def calc_hv(x: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Dataframe with the hypervolume.
     """
-    F = np.concatenate(np.array([np.array(p) for p in x["trial_value__cost_inc"].to_numpy()]))
+    F = np.vstack([np.array(p) for p in x["trial_value__cost_raw"]])
 
     ind = HV(ref_point=x["reference_point"].iloc[0], pf=None, nds=False)
     x["hypervolume"] = ind(F)

From 8bc78991646ad5aabc18e707a288e17073810fe4 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Mon, 19 May 2025 15:06:50 +0200
Subject: [PATCH 03/12] update gather data hypervolume

---
 carps/analysis/gather_data.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/carps/analysis/gather_data.py b/carps/analysis/gather_data.py
index 193a2ba71..838dc1679 100644
--- a/carps/analysis/gather_data.py
+++ b/carps/analysis/gather_data.py
@@ -22,6 +22,7 @@
 from carps.utils.loggingutils import get_logger, setup_logging
 from carps.utils.task import Task
 from carps.utils.trials import TrialInfo
+from carps.analysis.calc_hypervolume import calc_hv, add_reference_point, run_id
 
 if TYPE_CHECKING:
     from carps.objective_functions.objective_function import ObjectiveFunction
@@ -389,11 +390,11 @@ def maybe_postadd_task(logs: pd.DataFrame, overwrite: bool = False) -> pd.DataFr
         task_cfg = load_task_cfg(task_id=gid, task_index=task_index)
 
         task_cfg_yaml = OmegaConf.to_yaml(task_cfg)
-        if "${seed}" in task_cfg_yaml:
-            # Add seed to config to make it resolvable
-            assert gdf["seed"].nunique() == 1  # noqa: PD101
-            seed = gdf["seed"].iloc[0]
-            task_cfg.seed = int(seed)
+        # if "${seed}" in task_cfg_yaml:
+        #     # Add seed to config to make it resolvable
+        #     assert gdf["seed"].nunique() == 1  # noqa: PD101
+        #     seed = gdf["seed"].iloc[0]
+        #     task_cfg.seed = int(seed)
         task_cfg = OmegaConf.to_container(task_cfg, resolve=False)
         task_columns = [c for c in gdf.columns if c.startswith("task.")]
         if overwrite:
@@ -440,10 +441,13 @@ def maybe_convert_cost_dtype(x: int | float | str | list) -> float | list[float]
     Returns:
         float | list[float]: Cost(s).
     """
+    
     if isinstance(x, int | float):
         return float(x)
     if isinstance(x, str):
-        return eval(x)  # noqa: S307
+        x = eval(x)  # noqa: S307
+    if isinstance(x, dict):
+        x = eval(x["cost"])
     assert isinstance(x, list)
     return x
 
@@ -463,7 +467,7 @@ def maybe_convert_cost_to_so(x: float | list | np.ndarray) -> float:
         float: Single-objective cost or aggregated cost.
     """
     if isinstance(x, list | np.ndarray):
-        return np.sum(x)
+        return np.sum(x)  # TODO change to HV here
     if isinstance(x, dict):
         assert len(x.values()) == 1
         # Most likely comes from database
@@ -472,7 +476,7 @@ def maybe_convert_cost_to_so(x: float | list | np.ndarray) -> float:
         if isinstance(value, str):
             value = ast.literal_eval(value)
             if isinstance(value, list):
-                return np.sum(value)
+                return np.sum(value)  # TODO Change to HV here
         if isinstance(value, float | int):
             return value
     if isinstance(x, float):
@@ -566,7 +570,11 @@ def process_logs(logs: pd.DataFrame, keep_task_columns: list[str] | None = None)
 
     logger.debug("Handle MO costs...")
     logs["trial_value__cost_raw"] = logs["trial_value__cost"].apply(maybe_convert_cost_dtype)
-    logs["trial_value__cost"] = logs["trial_value__cost_raw"].apply(maybe_convert_cost_to_so)
+    # trial_value__cost_raw for add_reference_point and to calc_hv
+    logs = logs.groupby(by=["task_type", "task_id"]).apply(add_reference_point).reset_index(drop=True)
+    logs = logs.groupby(by=[*run_id, "n_trials"]).apply(calc_hv).reset_index(drop=True)
+    logs["trial_value__cost"] = logs["hypervolume"] #logs["trial_value__cost_raw"].apply(maybe_convert_cost_to_so)
+    print(logs.head())
     logger.debug("Determine incumbent cost...")
     logs["trial_value__cost_inc"] = logs.groupby(by=grouper_keys)["trial_value__cost"].transform("cummin")
 
@@ -613,6 +621,7 @@ def normalize_logs(logs: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Normalized logs
     """
+    grouper_keys = ["task_id", "optimizer_id", "seed"]
     logger.info("Start normalization...")
     logger.info("Normalize n_trials...")
     logs["n_trials_norm"] = logs.groupby("task_id")["n_trials"].transform(normalize)
@@ -623,7 +632,7 @@ def normalize_logs(logs: pd.DataFrame) -> pd.DataFrame:
         hv = logs.loc[ids_mo, "hypervolume"]
         logs.loc[ids_mo, "trial_value__cost"] = -hv  # higher is better
         logs["trial_value__cost"] = logs["trial_value__cost"].astype("float64")
-        logs["trial_value__cost_inc"] = logs["trial_value__cost"].transform("cummin")
+        logs["trial_value__cost_inc"] = logs.groupby(by=grouper_keys)["trial_value__cost"].transform("cummin")
     logs["trial_value__cost_norm"] = logs.groupby("task_id")["trial_value__cost"].transform(normalize)
     logger.info("Calc normalized incumbent cost...")
 

From eb0d40ce3d6139488a11bee3eedd880bf6ddd3bb Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 22 May 2025 16:12:26 +0200
Subject: [PATCH 04/12] smaple matrix in autorank

---
 carps/analysis/run_autorank.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/carps/analysis/run_autorank.py b/carps/analysis/run_autorank.py
index 45c3f401a..0471c348b 100644
--- a/carps/analysis/run_autorank.py
+++ b/carps/analysis/run_autorank.py
@@ -383,6 +383,7 @@ def cd_evaluation(
         alpha=alpha,
         alpha_normality=alpha_normality,
         num_samples=len(rank_data),
+        sample_matrix=None,
         posterior_matrix=None,
         decision_matrix=None,
         rope=None,

From f77b6b88f8cb65364351e2302bd462820ccb9891 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 22 May 2025 16:15:28 +0200
Subject: [PATCH 05/12] make codecarbon optional

---
 carps/experimenter/database/download_results.py | 8 ++++++--
 carps/experimenter/scrape_results_to_db.py      | 1 +
 carps/run_from_db.py                            | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/carps/experimenter/database/download_results.py b/carps/experimenter/database/download_results.py
index 60a285182..54c25f2fe 100644
--- a/carps/experimenter/database/download_results.py
+++ b/carps/experimenter/database/download_results.py
@@ -18,6 +18,7 @@ def main(
     pyexperimenter_configuration_file_path: str | None = None,
     database_credential_file_path: str | Path | None = None,
     outdir: str | Path | None = None,
+    codecarbon: bool = False
 ) -> None:
     """Download results from the database and save them to outdir.
 
@@ -49,6 +50,7 @@ def main(
         database_credential_file_path=database_credential_file_path,
         log_file="logs/reset_experiments.log",
         use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
+        use_codecarbon=codecarbon
     )
 
     experiment_config_table = experimenter.get_table()
@@ -64,12 +66,14 @@ def main(
             logger.info(f"\tFrom them, found {n_errored} errored runs of type {task_type}. ❌")
     trajectory_table = experimenter.get_logtable("trajectory")
     trials_table = experimenter.get_logtable("trials")
-    codecarbon_table = experimenter.get_codecarbon_table()
+    if codecarbon:
+        codecarbon_table = experimenter.get_codecarbon_table()
 
     experiment_config_table.to_parquet(outdir / "experiment_config.parquet", index=False)
     trajectory_table.to_parquet(outdir / "trajectory.parquet", index=False)
     trials_table.to_parquet(outdir / "trials.parquet", index=False)
-    codecarbon_table.to_parquet(outdir / "codecarbon.parquet", index=False)
+    if codecarbon:
+        codecarbon_table.to_parquet(outdir / "codecarbon.parquet", index=False)
     logger.info(
         "Downloaded results from the database. "
         f"Saved to '{outdir}'. "
diff --git a/carps/experimenter/scrape_results_to_db.py b/carps/experimenter/scrape_results_to_db.py
index 902963e63..0cce9ef28 100644
--- a/carps/experimenter/scrape_results_to_db.py
+++ b/carps/experimenter/scrape_results_to_db.py
@@ -35,6 +35,7 @@
     database_credential_file_path=database_credential_file_path,
     log_level=logging.INFO,
     use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
+    use_codecarbon=False
 )
 
 
diff --git a/carps/run_from_db.py b/carps/run_from_db.py
index ddca3774c..d70daf285 100644
--- a/carps/run_from_db.py
+++ b/carps/run_from_db.py
@@ -95,6 +95,7 @@ def main(
         database_credential_file_path=database_credential_file_path,
         log_file=f"logs/{slurm_job_id}.log",
         use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
+        use_codecarbon=False
     )
 
     experimenter.execute(py_experimenter_evaluate, max_experiments=1)

From 222184b777c79c107e48c68065ce4cf9407c6ff5 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 22 May 2025 16:19:54 +0200
Subject: [PATCH 06/12] Possibility to save configs to pickle and upload them
 together to the DB

---
 carps/experimenter/create_cluster_configs.py | 44 ++++++----
 carps/experimenter/write_to_db.py            | 85 ++++++++++++++++++++
 2 files changed, 112 insertions(+), 17 deletions(-)
 create mode 100644 carps/experimenter/write_to_db.py

diff --git a/carps/experimenter/create_cluster_configs.py b/carps/experimenter/create_cluster_configs.py
index 07f064ac6..77d6f3b15 100644
--- a/carps/experimenter/create_cluster_configs.py
+++ b/carps/experimenter/create_cluster_configs.py
@@ -13,6 +13,7 @@
 from py_experimenter.experimenter import PyExperimenter
 
 from carps.utils.loggingutils import CustomEncoder
+import pickle as pckl
 
 logger = logging.getLogger("create experiments")
 
@@ -72,7 +73,7 @@ def get_experiment_definition(cfg: OmegaConf) -> dict:
     cfg_dict = OmegaConf.to_container(cfg=cfg, resolve=True)
 
     cfg_str = json.dumps(cfg_dict, cls=CustomEncoder)
-    cfg_hash = create_config_hash(cfg)
+    cfg_hash = create_config_hash_from_full_cfg(cfg)
 
     return {
         "config": cfg_str,
@@ -100,6 +101,7 @@ def fill_database(cfg: DictConfig, experimenter: PyExperimenter) -> None:
         DatabaseConnectionError: If there is an error with the database connection.
     """
     experiment_definition = get_experiment_definition(cfg)
+    
 
     column_names = list(experimenter.db_connector.database_configuration.keyfields.keys())
     exists = False
@@ -124,7 +126,7 @@ def fill_database(cfg: DictConfig, experimenter: PyExperimenter) -> None:
     # experimenter.close_ssh()
 
 
-@hydra.main(config_path="../configs", config_name="base.yaml", version_base=None)  # type: ignore[misc]
+@hydra.main(config_path="../configs", config_name="base.yaml", version_base=None, save_as_pckl=True, folder_path="configs_pckl")  # type: ignore[misc]
 def main(cfg: DictConfig) -> None:
     """Store experiment config in database.
 
@@ -134,23 +136,31 @@ def main(cfg: DictConfig) -> None:
         Global configuration.
 
     """
-    fill_database(cfg, experimenter)
+    if save_as_pckl:
+        experiment_definition = get_experiment_definition(cfg)
+        files = list(Path(folder_path).glob("*.pkl"))
+        
+        if experiment_definition['config_hash'] not in files:
+            with open(f"{folder_path}{experiment_definition['config_hash']}.pkl", "wb") as f:
+                pckl.dump(experiment_definition, f)
+    else: 
+        experiment_configuration_file_path = Path(__file__).parent / "py_experimenter.yaml"
+        
+        database_credential_file_path = Path(__file__).parent / "credentials.yaml"
+        if database_credential_file_path is not None and not database_credential_file_path.exists():
+            database_credential_file_path = None  # type: ignore[assignment]
+
+        experimenter = PyExperimenter(
+            experiment_configuration_file_path=experiment_configuration_file_path,
+            name="carps",
+            database_credential_file_path=database_credential_file_path,
+            log_level=logging.INFO,
+            use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
+            use_codecarbon=False
+        )
+        fill_database(cfg, experimenter)
 
 
 if __name__ == "__main__":
     # TODO make experiment_configuration_file_path and database_credential_file_path a commandline arg
-    experiment_configuration_file_path = Path(__file__).parent / "py_experimenter.yaml"
-
-    database_credential_file_path = Path(__file__).parent / "credentials.yaml"
-    if database_credential_file_path is not None and not database_credential_file_path.exists():
-        database_credential_file_path = None  # type: ignore[assignment]
-
-    experimenter = PyExperimenter(
-        experiment_configuration_file_path=experiment_configuration_file_path,
-        name="carps",
-        database_credential_file_path=database_credential_file_path,
-        log_level=logging.INFO,
-        use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
-    )
-
     main()
diff --git a/carps/experimenter/write_to_db.py b/carps/experimenter/write_to_db.py
new file mode 100644
index 000000000..c4896c8c6
--- /dev/null
+++ b/carps/experimenter/write_to_db.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+from omegaconf import OmegaConf
+from py_experimenter.experimenter import PyExperimenter
+from pathlib import Path
+import logging
+from multiprocessing import Pool
+from tqdm import tqdm
+import numpy as np
+from concurrent.futures import ThreadPoolExecutor
+import pickle as pckl
+from hydra.core.utils import setup_globals
+
+
+setup_globals()
+
+
+experiment_identifiers = ["optimizer_id", "task_id", "seed", "benchmark_id", "n_trials", "time_budget"]
+
+
+def check_existance_by_keys(experiment_definition: dict, existing_rows: list, identifier_keys: list[str]) -> bool:
+    """Check existance of experiment in database by the identifier keys.
+
+    Args:
+        experiment_definition (dict): Experiment definition.
+        existing_rows (list): List of existing rows in the database.
+        identifier_keys (list[str]): List of keys to check for existance.
+
+    Returns:
+        bool: True if the experiment exists, False otherwise.
+    """
+    return any(all(experiment_definition[k] == e[k] for k in identifier_keys) for e in existing_rows)
+
+
+
+folder_path = Path("configs_pckl")
+pkl_files = list(folder_path.glob("*.pkl"))
+print('length of pkl_files', len(pkl_files))
+
+def load_pickle(file_path):
+    with open(file_path, 'rb') as f:
+        return pckl.load(f)
+
+with ThreadPoolExecutor() as executor:
+    exp_defs = list(executor.map(load_pickle, pkl_files))
+
+
+# CONNECT TO DATABASE and get existing experiments
+experiment_configuration_file_path = "carps/experimenter/py_experimenter copy.yaml"
+database_credential_file_path = "carps/experimenter/credentials.yaml"
+
+experimenter = PyExperimenter(
+    experiment_configuration_file_path=experiment_configuration_file_path,
+    name="carps",
+    database_credential_file_path=database_credential_file_path,
+    log_level=logging.INFO,
+    use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
+    use_codecarbon=False
+)
+
+
+column_names = list(experimenter.db_connector.database_configuration.keyfields.keys())
+existing_rows = experimenter.db_connector._get_existing_rows(column_names)
+
+# Check if experiments exists
+print("Checking if experiments already exist...")
+rows_exist = [
+    check_existance_by_keys(exp_def, existing_rows, experiment_identifiers)
+    for exp_def in tqdm(exp_defs, total=len(exp_defs))
+]
+
+
+print(f"This number of experiments already exists: {np.sum(rows_exist)}")
+
+experiments_to_add = [exp_def for exp_def, exists in zip(exp_defs, rows_exist, strict=True) if not exists]
+print(
+    f"number of existing rows {len(existing_rows)}, previous length: "
+    f"{len(exp_defs)}, length now {len(experiments_to_add)}"
+)
+
+
+BATCH_SIZE = 5000
+for i in range(0, len(experiments_to_add), BATCH_SIZE):
+    batch = experiments_to_add[i:i+BATCH_SIZE]
+    experimenter.fill_table_with_rows(batch)
+

From f770a9ebea35648bbad7abbe0946d1efc371bb6a Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 22 May 2025 16:20:24 +0200
Subject: [PATCH 07/12] larger optimizer_id

---
 carps/experimenter/py_experimenter.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/carps/experimenter/py_experimenter.yaml b/carps/experimenter/py_experimenter.yaml
index 8509494c0..774bba8b2 100644
--- a/carps/experimenter/py_experimenter.yaml
+++ b/carps/experimenter/py_experimenter.yaml
@@ -2,7 +2,7 @@ PY_EXPERIMENTER:
   n_jobs: 1
 
   Database:
-    use_ssh_tunnel: true
+    use_ssh_tunnel: false
     provider: mysql
     database: smacbenchmarking
     table:
@@ -21,7 +21,7 @@ PY_EXPERIMENTER:
         task_type:
           type: VARCHAR(50)
         optimizer_id:
-          type: VARCHAR(50)
+          type: VARCHAR(200)
         optimizer_container_id:
           type: VARCHAR(50)
         seed:
@@ -54,6 +54,7 @@ PY_EXPERIMENTER:
         trial_value__additional_info: JSON
       trajectory:
         n_trials: INT
+        n_function_calls: INT
         trial_info__config: JSON
         trial_info__instance: INT
         trial_info__seed: INT

From 86b3a421d026d116d859e2c40179faa57dfd0c6c Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 22 May 2025 16:22:16 +0200
Subject: [PATCH 08/12] MO process_logs

---
 carps/experimenter/database/process_logs.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/carps/experimenter/database/process_logs.py b/carps/experimenter/database/process_logs.py
index 651361aed..b66be3fba 100644
--- a/carps/experimenter/database/process_logs.py
+++ b/carps/experimenter/database/process_logs.py
@@ -63,6 +63,7 @@ def add_metadata(
         "config",
         "config_hash",
         "name",
+        "n_trials"
     ]
     metadata_columns = [c for c in experiment_config_table.columns if c not in ignore_columns]
 
@@ -100,9 +101,9 @@ def process_single_run_from_database(
     if logs_from_one_run["experiment_id"].nunique() != 1:  # noqa: PD101
         raise ValueError("Multiple values for `experiment_id` found in the logs. Something is suspicious.")
     experiment_id = logs_from_one_run["experiment_id"].iloc[0]
-    logs_from_one_run = process_logs(logs_from_one_run)
-    if only_incumbents:
-        logs_from_one_run = filter_non_incumbent_entries(logs=logs_from_one_run)
+    # logs_from_one_run = process_logs(logs_from_one_run)
+    # if only_incumbents:
+    #     logs_from_one_run = filter_non_incumbent_entries(logs=logs_from_one_run)
     return add_metadata(
         logs_from_one_run=logs_from_one_run,
         experiment_id=experiment_id,
@@ -144,7 +145,7 @@ def process_logs_from_database(
     experiment_config_table_filename: str = "experiment_config.parquet",
     output_filename: str = "processed_logs.parquet",
     results_dir: str = "experimenter/results",
-    only_incumbents: bool = True,  # noqa: FBT001, FBT002
+    only_incumbents: bool = False,  # noqa: FBT001, FBT002
 ) -> pd.DataFrame:
     """Process logs from the database with multiprocessing for speed-up.
 
@@ -192,9 +193,9 @@ def process_logs_from_database(
 
     # Combine the results into a single DataFrame
     processed_logs = pd.concat(result, ignore_index=True).reset_index(drop=True)
-    processed_logs.to_parquet(output_filename, index=False)
+    processed_logs = process_logs(processed_logs)
+    processed_logs.to_parquet(output_filename, index=False, engine="fastparquet")
     logger.info(f"Processed logs saved to {output_filename} 💌.")
-    return processed_logs
 
 
 if __name__ == "__main__":

From 400f7b786a668cafce76f2beec16dc355a0cac19 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 22 May 2025 16:22:26 +0200
Subject: [PATCH 09/12] MO process logs

---
 carps/experimenter/database/process_logs.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/carps/experimenter/database/process_logs.py b/carps/experimenter/database/process_logs.py
index b66be3fba..3136ec0d2 100644
--- a/carps/experimenter/database/process_logs.py
+++ b/carps/experimenter/database/process_logs.py
@@ -179,12 +179,6 @@ def process_logs_from_database(
         only_incumbents=only_incumbents,
     )
 
-    # Set up multiprocessing pool to process the logs
-    # with Pool() as pool:
-    #     # Wrap pool.imap_unordered with tqdm to show the progress bar
-    #     result = list(tqdm(
-    #           pool.imap_unordered(
-    #           process_experiment_partial, experiment_ids), total=len(experiment_ids), desc="Processing experiments"))
     logger.info(f"Start processing {len(experiment_ids)} experiments... This might take a while...")
     result = [
         process_experiment_partial(experiment_id)

From bbfd62a646c37e63507f0fab2b8db45ccad5cdf6 Mon Sep 17 00:00:00 2001
From: Daphne Theodorakopoulos <56087728+daphne12345@users.noreply.github.com>
Date: Thu, 22 May 2025 16:33:33 +0200
Subject: [PATCH 10/12] Update write_to_db.py

---
 carps/experimenter/write_to_db.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/carps/experimenter/write_to_db.py b/carps/experimenter/write_to_db.py
index c4896c8c6..964b841cc 100644
--- a/carps/experimenter/write_to_db.py
+++ b/carps/experimenter/write_to_db.py
@@ -45,7 +45,7 @@ def load_pickle(file_path):
 
 
 # CONNECT TO DATABASE and get existing experiments
-experiment_configuration_file_path = "carps/experimenter/py_experimenter copy.yaml"
+experiment_configuration_file_path = "carps/experimenter/py_experimenter.yaml"
 database_credential_file_path = "carps/experimenter/credentials.yaml"
 
 experimenter = PyExperimenter(

From a390ab7ed3db0450a34028b7b56fa89826c51951 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Thu, 5 Jun 2025 16:43:25 +0200
Subject: [PATCH 11/12] calculates hypervolume based on current pareto front,
 normalize objectives before

---
 carps/analysis/calc_hypervolume.py | 47 ++++++++++++++++++++++++++++--
 carps/analysis/gather_data.py      |  7 +++--
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/carps/analysis/calc_hypervolume.py b/carps/analysis/calc_hypervolume.py
index f5784b6c1..ad9fc956f 100644
--- a/carps/analysis/calc_hypervolume.py
+++ b/carps/analysis/calc_hypervolume.py
@@ -45,6 +45,38 @@ def gather_trajectory(x: pd.DataFrame) -> pd.DataFrame:
         data.append(D)
     return pd.DataFrame(data)
 
+def get_pareto_front(costs):
+    """Return all Pareto-optimal rows from the given array. Assumes minimization."""
+    is_efficient = np.ones(len(costs), dtype=bool)
+    for i, c in enumerate(costs):
+        if is_efficient[i]:
+            is_efficient[is_efficient] = np.any(costs[is_efficient] < c, axis=1) | np.all(costs[is_efficient] == c, axis=1)
+            is_efficient[i] = True
+    return costs[is_efficient]
+
+
+def add_running_pareto_front(group):
+    """Adds the pareto front of all costs up until the current trial to the group.
+
+    Args:
+        group (_type_): _description_
+
+    Returns:
+        _type_: _description_
+    """
+    group = group.sort_values("n_trials").reset_index(drop=True)
+    costs = np.stack(group["trial_value__cost_normalized"].to_numpy())
+    pareto_fronts = []
+
+    for i in range(len(group)):
+        current_costs = costs[:i+1]
+        front = get_pareto_front(current_costs)
+        pareto_fronts.append(tuple(map(tuple, front)))
+
+    group["pareto_front"] = pareto_fronts
+    return group
+
+
 
 def add_reference_point(x: pd.DataFrame) -> pd.DataFrame:
     """Add reference point to the dataframe.
@@ -65,12 +97,21 @@ def add_reference_point(x: pd.DataFrame) -> pd.DataFrame:
         raise ValueError("Inconsistent number of objectives in cost vectors.")
 
     # Reference point is max across all objectives
-    reference_point = np.max(costs, axis=0)
+    reference_point = np.max(costs, axis=0) + 1e-4
     
     # Set reference point per row
     x["reference_point"] = [reference_point] * len(x)
     return x
 
+def normalize_objectives(x: pd.DataFrame) -> pd.DataFrame:
+    costs = np.vstack(x["trial_value__cost_raw"])
+    min_vals, max_vals = costs.min(0), costs.max(0)
+    denom = np.where(max_vals - min_vals == 0, 1, max_vals - min_vals)
+    normalized = (costs - min_vals) / denom
+    x["trial_value__cost_normalized"] = list(normalized)
+    return x
+
+
 
 def calc_hv(x: pd.DataFrame) -> pd.DataFrame:
     """Calculate hypervolume per trajectory step.
@@ -81,9 +122,9 @@ def calc_hv(x: pd.DataFrame) -> pd.DataFrame:
     Returns:
         pd.DataFrame: Dataframe with the hypervolume.
     """
-    F = np.vstack([np.array(p) for p in x["trial_value__cost_raw"]])
+    F = np.vstack([np.array(p) for p in x["pareto_front"]])
 
-    ind = HV(ref_point=x["reference_point"].iloc[0], pf=None, nds=False)
+    ind = HV(ref_point=[1.000001]*F.shape[1], pf=None, nds=False)
     x["hypervolume"] = ind(F)
     return x
 
diff --git a/carps/analysis/gather_data.py b/carps/analysis/gather_data.py
index 838dc1679..01f779cff 100644
--- a/carps/analysis/gather_data.py
+++ b/carps/analysis/gather_data.py
@@ -22,7 +22,7 @@
 from carps.utils.loggingutils import get_logger, setup_logging
 from carps.utils.task import Task
 from carps.utils.trials import TrialInfo
-from carps.analysis.calc_hypervolume import calc_hv, add_reference_point, run_id
+from carps.analysis.calc_hypervolume import calc_hv, add_reference_point, run_id, add_running_pareto_front, normalize_objectives
 
 if TYPE_CHECKING:
     from carps.objective_functions.objective_function import ObjectiveFunction
@@ -571,7 +571,8 @@ def process_logs(logs: pd.DataFrame, keep_task_columns: list[str] | None = None)
     logger.debug("Handle MO costs...")
     logs["trial_value__cost_raw"] = logs["trial_value__cost"].apply(maybe_convert_cost_dtype)
     # trial_value__cost_raw for add_reference_point and to calc_hv
-    logs = logs.groupby(by=["task_type", "task_id"]).apply(add_reference_point).reset_index(drop=True)
+    logs = logs.groupby(by=["task_type", "task_id"]).apply(normalize_objectives).reset_index(drop=True)
+    logs = logs.groupby(by=[*run_id]).apply(add_running_pareto_front).reset_index(drop=True)
     logs = logs.groupby(by=[*run_id, "n_trials"]).apply(calc_hv).reset_index(drop=True)
     logs["trial_value__cost"] = logs["hypervolume"] #logs["trial_value__cost_raw"].apply(maybe_convert_cost_to_so)
     print(logs.head())
@@ -805,7 +806,7 @@ def rename_legacy(logs: pd.DataFrame) -> pd.DataFrame:
 
 # NOTE(eddiebergman): Use `n_processes=None` as default, which uses `os.cpu_count()` in `Pool`
 def filelogs_to_df(
-    rundir: str | list[str], log_fn: str = "trial_logs.jsonl", n_processes: int | None = None
+    rundir: str | list[str] = "results/", log_fn: str = "trial_logs.jsonl", n_processes: int | None = None
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Load logs from file and preprocess.
 

From fb2b7831e732b643cde030b9e6a9c765be032205 Mon Sep 17 00:00:00 2001
From: Daphne12345 <daphne.theo@yahoo.de>
Date: Tue, 1 Jul 2025 10:35:48 +0200
Subject: [PATCH 12/12] mo with pareto front

---
 carps/analysis/calc_hypervolume.py | 2 +-
 carps/analysis/gather_data.py      | 4 ++--
 carps/analysis/run_autorank.py     | 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/carps/analysis/calc_hypervolume.py b/carps/analysis/calc_hypervolume.py
index 4082fea2f..a0912d308 100644
--- a/carps/analysis/calc_hypervolume.py
+++ b/carps/analysis/calc_hypervolume.py
@@ -215,7 +215,7 @@ def add_hypervolume_to_df(logs: pd.DataFrame, on_key: str = "trial_value__cost")
     """
     tqdm.pandas(desc="Calc hypervolume...")
     ids_mo = get_ids_mo(logs)
-    add_reference_point_partial = partial(add_reference_point, on_key=on_key)
+    add_reference_point_partial = partial(add_reference_point)
     mo_cols = ["hypervolume", "reference_point"]
     for mo_col in mo_cols:
         if mo_col not in logs.columns:
diff --git a/carps/analysis/gather_data.py b/carps/analysis/gather_data.py
index 382045193..f5510fe67 100644
--- a/carps/analysis/gather_data.py
+++ b/carps/analysis/gather_data.py
@@ -612,9 +612,9 @@ def normalize_logs(logs: pd.DataFrame) -> pd.DataFrame:
             logs["trial_value__cost_raw"] = logs["trial_value__cost"].apply(maybe_convert_cost_dtype)
         else:
             logs["trial_value__cost_raw"] = logs["trial_value__cost_raw"].apply(maybe_convert_cost_dtype)
-        logs = add_hypervolume_to_df(logs, on_key="trial_value__cost_raw")
+        # logs = add_hypervolume_to_df(logs, on_key="trial_value__cost_raw")
         # IDs have changed, so we need to recalculate
-        ids_mo = get_ids_mo(logs)
+        # ids_mo = get_ids_mo(logs)
         hv = logs.loc[ids_mo, "hypervolume"]
         logs.loc[ids_mo, "trial_value__cost"] = -hv  # higher is better
         logs["trial_value__cost"] = logs["trial_value__cost"].astype("float64")
diff --git a/carps/analysis/run_autorank.py b/carps/analysis/run_autorank.py
index 13ace7d9e..5dcc84b57 100644
--- a/carps/analysis/run_autorank.py
+++ b/carps/analysis/run_autorank.py
@@ -444,8 +444,7 @@ def cd_evaluation(
         rope_mode=None,
         effect_size=res.effect_size,
         force_mode=None,
-        sample_matrix=None,
-        plot_order=None,
+        # plot_order=None,
     )
     is_significant = True
     if result.pvalue >= result.alpha: