Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions smac/runhistory/encoder/gaussian_copula_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import annotations

import numpy as np

from smac import constants
from smac.runhistory.encoder.encoder import RunHistoryEncoder
from smac.utils.logging import get_logger
import scipy

__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"


logger = get_logger(__name__)


class RunHistoryGaussianCopulaEncoder(RunHistoryEncoder):
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Transforms the response values by using log."""
# Ensure minimal value is sufficiently large
min_log_cost = max(constants.MINIMAL_COST_FOR_LOG, 1e-10)

if np.any(values <= 0):
logger.warning(
"Got cost of smaller/equal to 0. Replace by %f since we use"
" log cost." % min_log_cost
)
values[values < min_log_cost] = min_log_cost

n = max(len(values), 2) # Ensure at least two values to avoid division by zero
log_n = np.log(n) if n > 1 else 1e-10 # Avoid log(1) = 0

quants = (scipy.stats.rankdata(values.flatten()) - 1) / (n - 1)

# Ensure cutoff does not exceed reasonable values
cutoff = min(0.1, 1 / (4 * np.power(n, 0.25) * np.sqrt(np.pi * log_n)))

quants = np.clip(quants, a_min=cutoff, a_max=1 - cutoff)

# Inverse Gaussian CDF with proper handling of extreme quantiles
rval = np.array([scipy.stats.norm.ppf(q) for q in quants]).reshape((-1, 1))

return rval

53 changes: 53 additions & 0 deletions smac/runhistory/encoder/percentile_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from __future__ import annotations
import numpy as np
import scipy.stats
from smac import constants
from smac.runhistory.encoder.encoder import RunHistoryEncoder
from smac.utils.logging import get_logger

__copyright__ = "Copyright 2022, automl.org"
__license__ = "3-clause BSD"

logger = get_logger(__name__)


class RunHistoryPercentileEncoder(RunHistoryEncoder):
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Transforms the response values by applying a log transformation,
rank-based quantile transformation, and inverse Gaussian CDF scaling."""

# Debug: show what shape is coming in
print(f">>> Encoder input values.shape = {values.shape}")

# Safeguard: aggregate if values look like multiple per config
if values.ndim > 1:
logger.warning(
f"Received values with shape {values.shape}, aggregating along axis=1."
)
values = np.mean(values, axis=1)

# Replace non-positive values with minimal cost
if np.any(values <= 0):
logger.warning(
"Got cost <= 0. Replacing by %f since we use log cost."
% constants.MINIMAL_COST_FOR_LOG
)
values = np.clip(values, constants.MINIMAL_COST_FOR_LOG, None)

# Apply log transformation
log_values = np.log(values)

# Compute rank-based quantiles
eps = 1e-6 # keep strictly within (0,1)
quants = [
np.clip(scipy.stats.percentileofscore(log_values, v) / 100, eps, 1 - eps)
for v in log_values
]

# Inverse Gaussian CDF transformation
output = scipy.stats.norm.ppf(quants).reshape((-1, 1))

# Debug: show output shape
print(f">>> Encoder output shape = {output.shape}")

return output
25 changes: 25 additions & 0 deletions smac/runhistory/encoder/power_transform_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from __future__ import annotations

import numpy as np
from sklearn.preprocessing import PowerTransformer

from smac.runhistory.encoder.encoder import RunHistoryEncoder
from smac.utils.logging import get_logger

__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
__license__ = "3-clause BSD"

logger = get_logger(__name__)


class RunHistoryPowerTransformEncoder(RunHistoryEncoder):
def transform_response_values(self, values: np.ndarray) -> np.ndarray:
"""Apply PowerTransformer (Yeo-Johnson) to response values."""

if values.size == 0:
logger.debug("Received empty array for transformation.")
return values.reshape(-1, 1)

values = values.reshape(-1, 1)
transformer = PowerTransformer(method="yeo-johnson", standardize=True)
return transformer.fit_transform(values)
Loading