From 23ee19cea57c33ff524699a3c7a3c6c798e6c7cb Mon Sep 17 00:00:00 2001
From: lhennig0103 <144096938+lhennig0103@users.noreply.github.com>
Date: Thu, 13 Nov 2025 17:06:54 +0100
Subject: [PATCH 1/3] Add RunHistoryPowerTransformEncoder class

Implement RunHistoryPowerTransformEncoder to apply PowerTransformer on response values.
---
 .../encoder/power_transform_encoder.py        | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 smac/runhistory/encoder/power_transform_encoder.py

diff --git a/smac/runhistory/encoder/power_transform_encoder.py b/smac/runhistory/encoder/power_transform_encoder.py
new file mode 100644
index 000000000..5b3152d8b
--- /dev/null
+++ b/smac/runhistory/encoder/power_transform_encoder.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+import numpy as np
+from sklearn.preprocessing import PowerTransformer
+
+from smac.runhistory.encoder.encoder import RunHistoryEncoder
+from smac.utils.logging import get_logger
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+logger = get_logger(__name__)
+
+
+class RunHistoryPowerTransformEncoder(RunHistoryEncoder):
+    def transform_response_values(self, values: np.ndarray) -> np.ndarray:
+        """Apply PowerTransformer (Yeo-Johnson) to response values."""
+
+        if values.size == 0:
+            logger.debug("Received empty array for transformation.")
+            return values.reshape(-1, 1)
+
+        values = values.reshape(-1, 1)
+        transformer = PowerTransformer(method="yeo-johnson", standardize=True)
+        return transformer.fit_transform(values)

From a30ca7255f3ee893aad66f66b674c15208ae401b Mon Sep 17 00:00:00 2001
From: lhennig0103 <144096938+lhennig0103@users.noreply.github.com>
Date: Thu, 13 Nov 2025 17:07:30 +0100
Subject: [PATCH 2/3] Add RunHistoryPercentileEncoder class

Implement RunHistoryPercentileEncoder for response value transformation.
---
 smac/runhistory/encoder/percentile_encoder.py | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 smac/runhistory/encoder/percentile_encoder.py

diff --git a/smac/runhistory/encoder/percentile_encoder.py b/smac/runhistory/encoder/percentile_encoder.py
new file mode 100644
index 000000000..71f3ca280
--- /dev/null
+++ b/smac/runhistory/encoder/percentile_encoder.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+import numpy as np
+import scipy.stats
+from smac import constants
+from smac.runhistory.encoder.encoder import RunHistoryEncoder
+from smac.utils.logging import get_logger
+
+__copyright__ = "Copyright 2022, automl.org"
+__license__ = "3-clause BSD"
+
+logger = get_logger(__name__)
+
+
+class RunHistoryPercentileEncoder(RunHistoryEncoder):
+    def transform_response_values(self, values: np.ndarray) -> np.ndarray:
+        """Transforms the response values by applying a log transformation,
+        rank-based quantile transformation, and inverse Gaussian CDF scaling."""
+
+        # Debug: show what shape is coming in
+        print(f">>> Encoder input values.shape = {values.shape}")
+
+        # Safeguard: aggregate if values look like multiple per config
+        if values.ndim > 1:
+            logger.warning(
+                f"Received values with shape {values.shape}, aggregating along axis=1."
+            )
+            values = np.mean(values, axis=1)
+
+        # Replace non-positive values with minimal cost
+        if np.any(values <= 0):
+            logger.warning(
+                "Got cost <= 0. Replacing by %f since we use log cost."
+                % constants.MINIMAL_COST_FOR_LOG
+            )
+            values = np.clip(values, constants.MINIMAL_COST_FOR_LOG, None)
+
+        # Apply log transformation
+        log_values = np.log(values)
+
+        # Compute rank-based quantiles
+        eps = 1e-6  # keep strictly within (0,1)
+        quants = [
+            np.clip(scipy.stats.percentileofscore(log_values, v) / 100, eps, 1 - eps)
+            for v in log_values
+        ]
+
+        # Inverse Gaussian CDF transformation
+        output = scipy.stats.norm.ppf(quants).reshape((-1, 1))
+
+        # Debug: show output shape
+        print(f">>> Encoder output shape = {output.shape}")
+
+        return output

From 88d4facd26299f9a9116fb8e6885a3f61aff1851 Mon Sep 17 00:00:00 2001
From: lhennig0103 <144096938+lhennig0103@users.noreply.github.com>
Date: Thu, 13 Nov 2025 17:07:57 +0100
Subject: [PATCH 3/3] Implement Gaussian Copula Encoder for RunHistory

---
 .../encoder/gaussian_copula_encoder.py        | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 smac/runhistory/encoder/gaussian_copula_encoder.py

diff --git a/smac/runhistory/encoder/gaussian_copula_encoder.py b/smac/runhistory/encoder/gaussian_copula_encoder.py
new file mode 100644
index 000000000..dde723864
--- /dev/null
+++ b/smac/runhistory/encoder/gaussian_copula_encoder.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+import numpy as np
+
+from smac import constants
+from smac.runhistory.encoder.encoder import RunHistoryEncoder
+from smac.utils.logging import get_logger
+import scipy
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+
+logger = get_logger(__name__)
+
+
+class RunHistoryGaussianCopulaEncoder(RunHistoryEncoder):
+    def transform_response_values(self, values: np.ndarray) -> np.ndarray:
+        """Transforms the response values by using log."""
+        # Ensure minimal value is sufficiently large
+        min_log_cost = max(constants.MINIMAL_COST_FOR_LOG, 1e-10)
+        
+        if np.any(values <= 0):
+            logger.warning(
+                "Got cost of smaller/equal to 0. Replace by %f since we use"
+                " log cost." % min_log_cost
+            )
+            values[values < min_log_cost] = min_log_cost
+
+        n = max(len(values), 2)  # Ensure at least two values to avoid division by zero
+        log_n = np.log(n) if n > 1 else 1e-10  # Avoid log(1) = 0
+
+        quants = (scipy.stats.rankdata(values.flatten()) - 1) / (n - 1)
+
+        # Ensure cutoff does not exceed reasonable values
+        cutoff = min(0.1, 1 / (4 * np.power(n, 0.25) * np.sqrt(np.pi * log_n)))
+
+        quants = np.clip(quants, a_min=cutoff, a_max=1 - cutoff)
+
+        # Inverse Gaussian CDF with proper handling of extreme quantiles
+        rval = np.array([scipy.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))
+
+        return rval
+