Skip to content

Commit 4f11321

Browse files
committed
add XGBoost Image Classifier test function
1 parent 88bfc1f commit 4f11321

File tree

4 files changed

+190
-0
lines changed

4 files changed

+190
-0
lines changed

src/surfaces/test_functions/machine_learning/image/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@
1515
except ImportError:
1616
_HAS_TENSORFLOW = False
1717

18+
# XGBoost classifier (requires xgboost)
19+
try:
20+
from .classification import XGBoostImageClassifierFunction
21+
22+
_HAS_XGBOOST = True
23+
except ImportError:
24+
_HAS_XGBOOST = False
25+
1826
__all__ = [
1927
"SVMImageClassifierFunction",
2028
"RandomForestImageClassifierFunction",
@@ -27,3 +35,6 @@
2735
"DeepCNNClassifierFunction",
2836
]
2937
)
38+
39+
if _HAS_XGBOOST:
40+
__all__.append("XGBoostImageClassifierFunction")

src/surfaces/test_functions/machine_learning/image/classification/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@
1515
except ImportError:
1616
_HAS_TENSORFLOW = False
1717

18+
# XGBoost classifier (requires xgboost)
19+
try:
20+
from .test_functions import XGBoostImageClassifierFunction
21+
22+
_HAS_XGBOOST = True
23+
except ImportError:
24+
_HAS_XGBOOST = False
25+
1826
__all__ = [
1927
"SVMImageClassifierFunction",
2028
"RandomForestImageClassifierFunction",
@@ -27,3 +35,6 @@
2735
"DeepCNNClassifierFunction",
2836
]
2937
)
38+
39+
if _HAS_XGBOOST:
40+
__all__.append("XGBoostImageClassifierFunction")

src/surfaces/test_functions/machine_learning/image/classification/test_functions/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414
except ImportError:
1515
_HAS_TENSORFLOW = False
1616

17+
# XGBoost classifier (requires xgboost)
18+
try:
19+
from .xgboost_image_classifier import XGBoostImageClassifierFunction
20+
21+
_HAS_XGBOOST = True
22+
except ImportError:
23+
_HAS_XGBOOST = False
24+
1725
__all__ = [
1826
"SVMImageClassifierFunction",
1927
"RandomForestImageClassifierFunction",
@@ -26,3 +34,6 @@
2634
"DeepCNNClassifierFunction",
2735
]
2836
)
37+
38+
if _HAS_XGBOOST:
39+
__all__.append("XGBoostImageClassifierFunction")
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# Author: Simon Blanke
2+
3+
# License: MIT License
4+
5+
"""XGBoost Image Classifier test function."""
6+
7+
import numpy as np
8+
9+
from .._base_image_classification import BaseImageClassification
10+
from ..datasets import DATASETS
11+
12+
13+
def _check_xgboost():
14+
"""Check if xgboost is available."""
15+
try:
16+
import xgboost # noqa: F401
17+
18+
return True
19+
except ImportError:
20+
raise ImportError(
21+
"XGBoost image classifier requires xgboost. "
22+
"Install with: pip install surfaces[xgboost]"
23+
)
24+
25+
26+
class XGBoostImageClassifierFunction(BaseImageClassification):
27+
"""XGBoost Image Classifier test function.
28+
29+
Uses XGBoost on PCA-reduced image features for classification.
30+
XGBoost is a gradient boosting library optimized for speed and performance.
31+
32+
Parameters
33+
----------
34+
dataset : str, default="mnist"
35+
Dataset to use. One of: "mnist", "fashion_mnist".
36+
cv : int, default=3
37+
Number of cross-validation folds.
38+
n_components : int, default=50
39+
Number of PCA components to retain.
40+
use_surrogate : bool, default=False
41+
If True, use pre-trained surrogate for fast evaluation.
42+
objective : str, default="maximize"
43+
Either "minimize" or "maximize".
44+
sleep : float, default=0
45+
Artificial delay in seconds.
46+
47+
Examples
48+
--------
49+
>>> from surfaces.test_functions.machine_learning.image import (
50+
... XGBoostImageClassifierFunction
51+
... )
52+
>>> func = XGBoostImageClassifierFunction(dataset="mnist")
53+
>>> result = func({"n_estimators": 100, "max_depth": 6, "learning_rate": 0.1})
54+
"""
55+
56+
name = "XGBoost Image Classifier Function"
57+
_name_ = "xgboost_image_classifier"
58+
__name__ = "XGBoostImageClassifierFunction"
59+
60+
available_datasets = list(DATASETS.keys())
61+
available_cv = [2, 3, 5]
62+
63+
# Search space parameters
64+
para_names = ["n_estimators", "max_depth", "learning_rate"]
65+
n_estimators_default = [50, 100, 150, 200, 250]
66+
max_depth_default = [3, 4, 5, 6, 7, 8, 10]
67+
learning_rate_default = [0.01, 0.05, 0.1, 0.2, 0.3]
68+
69+
def __init__(
70+
self,
71+
dataset: str = "mnist",
72+
cv: int = 3,
73+
n_components: int = 50,
74+
objective: str = "maximize",
75+
sleep: float = 0,
76+
memory: bool = False,
77+
collect_data: bool = True,
78+
callbacks=None,
79+
catch_errors=None,
80+
use_surrogate: bool = False,
81+
):
82+
_check_xgboost()
83+
84+
if dataset not in DATASETS:
85+
raise ValueError(
86+
f"Unknown dataset '{dataset}'. " f"Available: {self.available_datasets}"
87+
)
88+
89+
if cv not in self.available_cv:
90+
raise ValueError(f"Invalid cv={cv}. Available: {self.available_cv}")
91+
92+
self.dataset = dataset
93+
self.cv = cv
94+
self.n_components = n_components
95+
self._dataset_loader = DATASETS[dataset]
96+
97+
super().__init__(
98+
objective=objective,
99+
sleep=sleep,
100+
memory=memory,
101+
collect_data=collect_data,
102+
callbacks=callbacks,
103+
catch_errors=catch_errors,
104+
use_surrogate=use_surrogate,
105+
)
106+
107+
@property
108+
def search_space(self):
109+
"""Search space containing hyperparameters."""
110+
return {
111+
"n_estimators": self.n_estimators_default,
112+
"max_depth": self.max_depth_default,
113+
"learning_rate": self.learning_rate_default,
114+
}
115+
116+
def _create_objective_function(self):
117+
"""Create objective function with fixed dataset and cv."""
118+
from sklearn.decomposition import PCA
119+
from sklearn.model_selection import cross_val_score
120+
from sklearn.preprocessing import StandardScaler
121+
from xgboost import XGBClassifier
122+
123+
X_raw, y = self._dataset_loader()
124+
125+
# Apply PCA for dimensionality reduction
126+
scaler = StandardScaler()
127+
pca = PCA(n_components=self.n_components, random_state=42)
128+
X_scaled = scaler.fit_transform(X_raw)
129+
X = pca.fit_transform(X_scaled)
130+
131+
cv = self.cv
132+
n_classes = len(np.unique(y))
133+
134+
def xgboost_image_classifier(params):
135+
model = XGBClassifier(
136+
n_estimators=params["n_estimators"],
137+
max_depth=params["max_depth"],
138+
learning_rate=params["learning_rate"],
139+
objective="multi:softmax" if n_classes > 2 else "binary:logistic",
140+
num_class=n_classes if n_classes > 2 else None,
141+
random_state=42,
142+
n_jobs=-1,
143+
verbosity=0,
144+
)
145+
scores = cross_val_score(model, X, y, cv=cv, scoring="accuracy")
146+
return scores.mean()
147+
148+
self.pure_objective_function = xgboost_image_classifier
149+
150+
def _get_surrogate_params(self, params):
151+
"""Add fixed parameters for surrogate prediction."""
152+
return {
153+
**params,
154+
"dataset": self.dataset,
155+
"cv": self.cv,
156+
"n_components": self.n_components,
157+
}

0 commit comments

Comments
 (0)