From 23ee19cea57c33ff524699a3c7a3c6c798e6c7cb Mon Sep 17 00:00:00 2001 From: lhennig0103 <144096938+lhennig0103@users.noreply.github.com> Date: Thu, 13 Nov 2025 17:06:54 +0100 Subject: [PATCH 1/3] Add RunHistoryPowerTransformEncoder class Implement RunHistoryPowerTransformEncoder to apply PowerTransformer on response values. --- .../encoder/power_transform_encoder.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 smac/runhistory/encoder/power_transform_encoder.py diff --git a/smac/runhistory/encoder/power_transform_encoder.py b/smac/runhistory/encoder/power_transform_encoder.py new file mode 100644 index 000000000..5b3152d8b --- /dev/null +++ b/smac/runhistory/encoder/power_transform_encoder.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import numpy as np +from sklearn.preprocessing import PowerTransformer + +from smac.runhistory.encoder.encoder import RunHistoryEncoder +from smac.utils.logging import get_logger + +__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +class RunHistoryPowerTransformEncoder(RunHistoryEncoder): + def transform_response_values(self, values: np.ndarray) -> np.ndarray: + """Apply PowerTransformer (Yeo-Johnson) to response values.""" + + if values.size == 0: + logger.debug("Received empty array for transformation.") + return values.reshape(-1, 1) + + values = values.reshape(-1, 1) + transformer = PowerTransformer(method="yeo-johnson", standardize=True) + return transformer.fit_transform(values) From a30ca7255f3ee893aad66f66b674c15208ae401b Mon Sep 17 00:00:00 2001 From: lhennig0103 <144096938+lhennig0103@users.noreply.github.com> Date: Thu, 13 Nov 2025 17:07:30 +0100 Subject: [PATCH 2/3] Add RunHistoryPercentileEncoder class Implement RunHistoryPercentileEncoder for response value transformation. --- smac/runhistory/encoder/percentile_encoder.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 smac/runhistory/encoder/percentile_encoder.py diff --git a/smac/runhistory/encoder/percentile_encoder.py b/smac/runhistory/encoder/percentile_encoder.py new file mode 100644 index 000000000..71f3ca280 --- /dev/null +++ b/smac/runhistory/encoder/percentile_encoder.py @@ -0,0 +1,53 @@ +from __future__ import annotations +import numpy as np +import scipy.stats +from smac import constants +from smac.runhistory.encoder.encoder import RunHistoryEncoder +from smac.utils.logging import get_logger + +__copyright__ = "Copyright 2022, automl.org" +__license__ = "3-clause BSD" + +logger = get_logger(__name__) + + +class RunHistoryPercentileEncoder(RunHistoryEncoder): + def transform_response_values(self, values: np.ndarray) -> np.ndarray: + """Transforms the response values by applying a log transformation, + rank-based quantile transformation, and inverse Gaussian CDF scaling.""" + + # Debug: show what shape is coming in + print(f">>> Encoder input values.shape = {values.shape}") + + # Safeguard: aggregate if values look like multiple per config + if values.ndim > 1: + logger.warning( + f"Received values with shape {values.shape}, aggregating along axis=1." + ) + values = np.mean(values, axis=1) + + # Replace non-positive values with minimal cost + if np.any(values <= 0): + logger.warning( + "Got cost <= 0. Replacing by %f since we use log cost." + % constants.MINIMAL_COST_FOR_LOG + ) + values = np.clip(values, constants.MINIMAL_COST_FOR_LOG, None) + + # Apply log transformation + log_values = np.log(values) + + # Compute rank-based quantiles + eps = 1e-6 # keep strictly within (0,1) + quants = [ + np.clip(scipy.stats.percentileofscore(log_values, v) / 100, eps, 1 - eps) + for v in log_values + ] + + # Inverse Gaussian CDF transformation + output = scipy.stats.norm.ppf(quants).reshape((-1, 1)) + + # Debug: show output shape + print(f">>> Encoder output shape = {output.shape}") + + return output From 88d4facd26299f9a9116fb8e6885a3f61aff1851 Mon Sep 17 00:00:00 2001 From: lhennig0103 <144096938+lhennig0103@users.noreply.github.com> Date: Thu, 13 Nov 2025 17:07:57 +0100 Subject: [PATCH 3/3] Implement Gaussian Copula Encoder for RunHistory --- .../encoder/gaussian_copula_encoder.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 smac/runhistory/encoder/gaussian_copula_encoder.py diff --git a/smac/runhistory/encoder/gaussian_copula_encoder.py b/smac/runhistory/encoder/gaussian_copula_encoder.py new file mode 100644 index 000000000..dde723864 --- /dev/null +++ b/smac/runhistory/encoder/gaussian_copula_encoder.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import numpy as np + +from smac import constants +from smac.runhistory.encoder.encoder import RunHistoryEncoder +from smac.utils.logging import get_logger +import scipy + +__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI" +__license__ = "3-clause BSD" + + +logger = get_logger(__name__) + + +class RunHistoryGaussianCopulaEncoder(RunHistoryEncoder): + def transform_response_values(self, values: np.ndarray) -> np.ndarray: + """Transforms the response values by using log.""" + # Ensure minimal value is sufficiently large + min_log_cost = max(constants.MINIMAL_COST_FOR_LOG, 1e-10) + + if np.any(values <= 0): + logger.warning( + "Got cost of smaller/equal to 0. Replace by %f since we use" + " log cost." % min_log_cost + ) + values[values < min_log_cost] = min_log_cost + + n = max(len(values), 2) # Ensure at least two values to avoid division by zero + log_n = np.log(n) if n > 1 else 1e-10 # Avoid log(1) = 0 + + quants = (scipy.stats.rankdata(values.flatten()) - 1) / (n - 1) + + # Ensure cutoff does not exceed reasonable values + cutoff = min(0.1, 1 / (4 * np.power(n, 0.25) * np.sqrt(np.pi * log_n))) + + quants = np.clip(quants, a_min=cutoff, a_max=1 - cutoff) + + # Inverse Gaussian CDF with proper handling of extreme quantiles + rval = np.array([scipy.stats.norm.ppf(q) for q in quants]).reshape((-1, 1)) + + return rval +