Skip to content
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions carps/analysis/calc_hypervolume.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pandas as pd
from pymoo.indicators.hv import HV

from carps.analysis.gather_data import convert_mixed_types_to_str
# from carps.analysis.gather_data import convert_mixed_types_to_str

run_id = ["task_type", "benchmark_id", "task_id", "optimizer_id", "seed"]

Expand Down Expand Up @@ -57,9 +57,17 @@ def add_reference_point(x: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.DataFrame: Dataframe with the reference point.
"""
costs = x["trial_value__cost_inc"].apply(lambda x: np.array([np.array(c) for c in x])).to_list()
costs = np.concatenate(costs)
# Flatten and stack all cost vectors
costs = np.vstack([np.array(c) for c in x["trial_value__cost_raw"]])

# Sanity check for consistent dimensionality
if len(set(cost.shape[0] for cost in costs)) != 1:
raise ValueError("Inconsistent number of objectives in cost vectors.")

# Reference point is max across all objectives
reference_point = np.max(costs, axis=0)

# Set reference point per row
x["reference_point"] = [reference_point] * len(x)
return x

Expand All @@ -73,7 +81,7 @@ def calc_hv(x: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.DataFrame: Dataframe with the hypervolume.
"""
F = np.concatenate(np.array([np.array(p) for p in x["trial_value__cost_inc"].to_numpy()]))
F = np.vstack([np.array(p) for p in x["trial_value__cost_raw"]])

ind = HV(ref_point=x["reference_point"].iloc[0], pf=None, nds=False)
x["hypervolume"] = ind(F)
Expand Down
29 changes: 19 additions & 10 deletions carps/analysis/gather_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from carps.utils.loggingutils import get_logger, setup_logging
from carps.utils.task import Task
from carps.utils.trials import TrialInfo
from carps.analysis.calc_hypervolume import calc_hv, add_reference_point, run_id

if TYPE_CHECKING:
from carps.objective_functions.objective_function import ObjectiveFunction
Expand Down Expand Up @@ -389,11 +390,11 @@ def maybe_postadd_task(logs: pd.DataFrame, overwrite: bool = False) -> pd.DataFr
task_cfg = load_task_cfg(task_id=gid, task_index=task_index)

task_cfg_yaml = OmegaConf.to_yaml(task_cfg)
if "${seed}" in task_cfg_yaml:
# Add seed to config to make it resolvable
assert gdf["seed"].nunique() == 1 # noqa: PD101
seed = gdf["seed"].iloc[0]
task_cfg.seed = int(seed)
# if "${seed}" in task_cfg_yaml:
# # Add seed to config to make it resolvable
# assert gdf["seed"].nunique() == 1 # noqa: PD101
# seed = gdf["seed"].iloc[0]
# task_cfg.seed = int(seed)
task_cfg = OmegaConf.to_container(task_cfg, resolve=False)
task_columns = [c for c in gdf.columns if c.startswith("task.")]
if overwrite:
Expand Down Expand Up @@ -440,10 +441,13 @@ def maybe_convert_cost_dtype(x: int | float | str | list) -> float | list[float]
Returns:
float | list[float]: Cost(s).
"""

if isinstance(x, int | float):
return float(x)
if isinstance(x, str):
return eval(x) # noqa: S307
x = eval(x) # noqa: S307
if isinstance(x, dict):
x = eval(x["cost"])
assert isinstance(x, list)
return x

Expand All @@ -463,7 +467,7 @@ def maybe_convert_cost_to_so(x: float | list | np.ndarray) -> float:
float: Single-objective cost or aggregated cost.
"""
if isinstance(x, list | np.ndarray):
return np.sum(x)
return np.sum(x) # TODO change to HV here
if isinstance(x, dict):
assert len(x.values()) == 1
# Most likely comes from database
Expand All @@ -472,7 +476,7 @@ def maybe_convert_cost_to_so(x: float | list | np.ndarray) -> float:
if isinstance(value, str):
value = ast.literal_eval(value)
if isinstance(value, list):
return np.sum(value)
return np.sum(value) # TODO Change to HV here
if isinstance(value, float | int):
return value
if isinstance(x, float):
Expand Down Expand Up @@ -566,7 +570,11 @@ def process_logs(logs: pd.DataFrame, keep_task_columns: list[str] | None = None)

logger.debug("Handle MO costs...")
logs["trial_value__cost_raw"] = logs["trial_value__cost"].apply(maybe_convert_cost_dtype)
logs["trial_value__cost"] = logs["trial_value__cost_raw"].apply(maybe_convert_cost_to_so)
# trial_value__cost_raw for add_reference_point and to calc_hv
logs = logs.groupby(by=["task_type", "task_id"]).apply(add_reference_point).reset_index(drop=True)
logs = logs.groupby(by=[*run_id, "n_trials"]).apply(calc_hv).reset_index(drop=True)
logs["trial_value__cost"] = logs["hypervolume"] #logs["trial_value__cost_raw"].apply(maybe_convert_cost_to_so)
print(logs.head())
logger.debug("Determine incumbent cost...")
logs["trial_value__cost_inc"] = logs.groupby(by=grouper_keys)["trial_value__cost"].transform("cummin")

Expand Down Expand Up @@ -613,6 +621,7 @@ def normalize_logs(logs: pd.DataFrame) -> pd.DataFrame:
Returns:
pd.DataFrame: Normalized logs
"""
grouper_keys = ["task_id", "optimizer_id", "seed"]
logger.info("Start normalization...")
logger.info("Normalize n_trials...")
logs["n_trials_norm"] = logs.groupby("task_id")["n_trials"].transform(normalize)
Expand All @@ -623,7 +632,7 @@ def normalize_logs(logs: pd.DataFrame) -> pd.DataFrame:
hv = logs.loc[ids_mo, "hypervolume"]
logs.loc[ids_mo, "trial_value__cost"] = -hv # higher is better
logs["trial_value__cost"] = logs["trial_value__cost"].astype("float64")
logs["trial_value__cost_inc"] = logs["trial_value__cost"].transform("cummin")
logs["trial_value__cost_inc"] = logs.groupby(by=grouper_keys)["trial_value__cost"].transform("cummin")
logs["trial_value__cost_norm"] = logs.groupby("task_id")["trial_value__cost"].transform(normalize)
logger.info("Calc normalized incumbent cost...")

Expand Down
1 change: 1 addition & 0 deletions carps/analysis/run_autorank.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ def cd_evaluation(
alpha=alpha,
alpha_normality=alpha_normality,
num_samples=len(rank_data),
sample_matrix=None,
posterior_matrix=None,
decision_matrix=None,
rope=None,
Expand Down
44 changes: 27 additions & 17 deletions carps/experimenter/create_cluster_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from py_experimenter.experimenter import PyExperimenter

from carps.utils.loggingutils import CustomEncoder
import pickle as pckl

logger = logging.getLogger("create experiments")

Expand Down Expand Up @@ -72,7 +73,7 @@ def get_experiment_definition(cfg: OmegaConf) -> dict:
cfg_dict = OmegaConf.to_container(cfg=cfg, resolve=True)

cfg_str = json.dumps(cfg_dict, cls=CustomEncoder)
cfg_hash = create_config_hash(cfg)
cfg_hash = create_config_hash_from_full_cfg(cfg)

return {
"config": cfg_str,
Expand Down Expand Up @@ -100,6 +101,7 @@ def fill_database(cfg: DictConfig, experimenter: PyExperimenter) -> None:
DatabaseConnectionError: If there is an error with the database connection.
"""
experiment_definition = get_experiment_definition(cfg)


column_names = list(experimenter.db_connector.database_configuration.keyfields.keys())
exists = False
Expand All @@ -124,7 +126,7 @@ def fill_database(cfg: DictConfig, experimenter: PyExperimenter) -> None:
# experimenter.close_ssh()


@hydra.main(config_path="../configs", config_name="base.yaml", version_base=None) # type: ignore[misc]
@hydra.main(config_path="../configs", config_name="base.yaml", version_base=None, save_as_pckl=True, folder_path="configs_pckl") # type: ignore[misc]
def main(cfg: DictConfig) -> None:
"""Store experiment config in database.

Expand All @@ -134,23 +136,31 @@ def main(cfg: DictConfig) -> None:
Global configuration.

"""
fill_database(cfg, experimenter)
if save_as_pckl:
experiment_definition = get_experiment_definition(cfg)
files = list(Path(folder_path).glob("*.pkl"))

if experiment_definition['config_hash'] not in files:
with open(f"{folder_path}{experiment_definition['config_hash']}.pkl", "wb") as f:
pckl.dump(experiment_definition, f)
else:
experiment_configuration_file_path = Path(__file__).parent / "py_experimenter.yaml"

database_credential_file_path = Path(__file__).parent / "credentials.yaml"
if database_credential_file_path is not None and not database_credential_file_path.exists():
database_credential_file_path = None # type: ignore[assignment]

experimenter = PyExperimenter(
experiment_configuration_file_path=experiment_configuration_file_path,
name="carps",
database_credential_file_path=database_credential_file_path,
log_level=logging.INFO,
use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
use_codecarbon=False
)
fill_database(cfg, experimenter)


if __name__ == "__main__":
# TODO make experiment_configuration_file_path and database_credential_file_path a commandline arg
experiment_configuration_file_path = Path(__file__).parent / "py_experimenter.yaml"

database_credential_file_path = Path(__file__).parent / "credentials.yaml"
if database_credential_file_path is not None and not database_credential_file_path.exists():
database_credential_file_path = None # type: ignore[assignment]

experimenter = PyExperimenter(
experiment_configuration_file_path=experiment_configuration_file_path,
name="carps",
database_credential_file_path=database_credential_file_path,
log_level=logging.INFO,
use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
)

main()
8 changes: 6 additions & 2 deletions carps/experimenter/database/download_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def main(
pyexperimenter_configuration_file_path: str | None = None,
database_credential_file_path: str | Path | None = None,
outdir: str | Path | None = None,
codecarbon: bool = False
) -> None:
"""Download results from the database and save them to outdir.

Expand Down Expand Up @@ -49,6 +50,7 @@ def main(
database_credential_file_path=database_credential_file_path,
log_file="logs/reset_experiments.log",
use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
use_codecarbon=codecarbon
)

experiment_config_table = experimenter.get_table()
Expand All @@ -64,12 +66,14 @@ def main(
logger.info(f"\tFrom them, found {n_errored} errored runs of type {task_type}. ❌")
trajectory_table = experimenter.get_logtable("trajectory")
trials_table = experimenter.get_logtable("trials")
codecarbon_table = experimenter.get_codecarbon_table()
if codecarbon:
codecarbon_table = experimenter.get_codecarbon_table()

experiment_config_table.to_parquet(outdir / "experiment_config.parquet", index=False)
trajectory_table.to_parquet(outdir / "trajectory.parquet", index=False)
trials_table.to_parquet(outdir / "trials.parquet", index=False)
codecarbon_table.to_parquet(outdir / "codecarbon.parquet", index=False)
if codecarbon:
codecarbon_table.to_parquet(outdir / "codecarbon.parquet", index=False)
logger.info(
"Downloaded results from the database. "
f"Saved to '{outdir}'. "
Expand Down
19 changes: 7 additions & 12 deletions carps/experimenter/database/process_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def add_metadata(
"config",
"config_hash",
"name",
"n_trials"
]
metadata_columns = [c for c in experiment_config_table.columns if c not in ignore_columns]

Expand Down Expand Up @@ -100,9 +101,9 @@ def process_single_run_from_database(
if logs_from_one_run["experiment_id"].nunique() != 1: # noqa: PD101
raise ValueError("Multiple values for `experiment_id` found in the logs. Something is suspicious.")
experiment_id = logs_from_one_run["experiment_id"].iloc[0]
logs_from_one_run = process_logs(logs_from_one_run)
if only_incumbents:
logs_from_one_run = filter_non_incumbent_entries(logs=logs_from_one_run)
# logs_from_one_run = process_logs(logs_from_one_run)
# if only_incumbents:
# logs_from_one_run = filter_non_incumbent_entries(logs=logs_from_one_run)
return add_metadata(
logs_from_one_run=logs_from_one_run,
experiment_id=experiment_id,
Expand Down Expand Up @@ -144,7 +145,7 @@ def process_logs_from_database(
experiment_config_table_filename: str = "experiment_config.parquet",
output_filename: str = "processed_logs.parquet",
results_dir: str = "experimenter/results",
only_incumbents: bool = True, # noqa: FBT001, FBT002
only_incumbents: bool = False, # noqa: FBT001, FBT002
) -> pd.DataFrame:
"""Process logs from the database with multiprocessing for speed-up.

Expand Down Expand Up @@ -178,12 +179,6 @@ def process_logs_from_database(
only_incumbents=only_incumbents,
)

# Set up multiprocessing pool to process the logs
# with Pool() as pool:
# # Wrap pool.imap_unordered with tqdm to show the progress bar
# result = list(tqdm(
# pool.imap_unordered(
# process_experiment_partial, experiment_ids), total=len(experiment_ids), desc="Processing experiments"))
logger.info(f"Start processing {len(experiment_ids)} experiments... This might take a while...")
result = [
process_experiment_partial(experiment_id)
Expand All @@ -192,9 +187,9 @@ def process_logs_from_database(

# Combine the results into a single DataFrame
processed_logs = pd.concat(result, ignore_index=True).reset_index(drop=True)
processed_logs.to_parquet(output_filename, index=False)
processed_logs = process_logs(processed_logs)
processed_logs.to_parquet(output_filename, index=False, engine="fastparquet")
logger.info(f"Processed logs saved to {output_filename} 💌.")
return processed_logs


if __name__ == "__main__":
Expand Down
5 changes: 3 additions & 2 deletions carps/experimenter/py_experimenter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ PY_EXPERIMENTER:
n_jobs: 1

Database:
use_ssh_tunnel: true
use_ssh_tunnel: false
provider: mysql
database: smacbenchmarking
table:
Expand All @@ -21,7 +21,7 @@ PY_EXPERIMENTER:
task_type:
type: VARCHAR(50)
optimizer_id:
type: VARCHAR(50)
type: VARCHAR(200)
optimizer_container_id:
type: VARCHAR(50)
seed:
Expand Down Expand Up @@ -54,6 +54,7 @@ PY_EXPERIMENTER:
trial_value__additional_info: JSON
trajectory:
n_trials: INT
n_function_calls: INT
trial_info__config: JSON
trial_info__instance: INT
trial_info__seed: INT
Expand Down
1 change: 1 addition & 0 deletions carps/experimenter/scrape_results_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
database_credential_file_path=database_credential_file_path,
log_level=logging.INFO,
use_ssh_tunnel=OmegaConf.load(experiment_configuration_file_path).PY_EXPERIMENTER.Database.use_ssh_tunnel,
use_codecarbon=False
)


Expand Down
Loading
Loading