|
| 1 | +# Author: Simon Blanke |
| 2 | + |
| 3 | +# License: MIT License |
| 4 | + |
| 5 | +"""XGBoost Image Classifier test function.""" |
| 6 | + |
| 7 | +import numpy as np |
| 8 | + |
| 9 | +from .._base_image_classification import BaseImageClassification |
| 10 | +from ..datasets import DATASETS |
| 11 | + |
| 12 | + |
| 13 | +def _check_xgboost(): |
| 14 | + """Check if xgboost is available.""" |
| 15 | + try: |
| 16 | + import xgboost # noqa: F401 |
| 17 | + |
| 18 | + return True |
| 19 | + except ImportError: |
| 20 | + raise ImportError( |
| 21 | + "XGBoost image classifier requires xgboost. " |
| 22 | + "Install with: pip install surfaces[xgboost]" |
| 23 | + ) |
| 24 | + |
| 25 | + |
| 26 | +class XGBoostImageClassifierFunction(BaseImageClassification): |
| 27 | + """XGBoost Image Classifier test function. |
| 28 | +
|
| 29 | + Uses XGBoost on PCA-reduced image features for classification. |
| 30 | + XGBoost is a gradient boosting library optimized for speed and performance. |
| 31 | +
|
| 32 | + Parameters |
| 33 | + ---------- |
| 34 | + dataset : str, default="mnist" |
| 35 | + Dataset to use. One of: "mnist", "fashion_mnist". |
| 36 | + cv : int, default=3 |
| 37 | + Number of cross-validation folds. |
| 38 | + n_components : int, default=50 |
| 39 | + Number of PCA components to retain. |
| 40 | + use_surrogate : bool, default=False |
| 41 | + If True, use pre-trained surrogate for fast evaluation. |
| 42 | + objective : str, default="maximize" |
| 43 | + Either "minimize" or "maximize". |
| 44 | + sleep : float, default=0 |
| 45 | + Artificial delay in seconds. |
| 46 | +
|
| 47 | + Examples |
| 48 | + -------- |
| 49 | + >>> from surfaces.test_functions.machine_learning.image import ( |
| 50 | + ... XGBoostImageClassifierFunction |
| 51 | + ... ) |
| 52 | + >>> func = XGBoostImageClassifierFunction(dataset="mnist") |
| 53 | + >>> result = func({"n_estimators": 100, "max_depth": 6, "learning_rate": 0.1}) |
| 54 | + """ |
| 55 | + |
| 56 | + name = "XGBoost Image Classifier Function" |
| 57 | + _name_ = "xgboost_image_classifier" |
| 58 | + __name__ = "XGBoostImageClassifierFunction" |
| 59 | + |
| 60 | + available_datasets = list(DATASETS.keys()) |
| 61 | + available_cv = [2, 3, 5] |
| 62 | + |
| 63 | + # Search space parameters |
| 64 | + para_names = ["n_estimators", "max_depth", "learning_rate"] |
| 65 | + n_estimators_default = [50, 100, 150, 200, 250] |
| 66 | + max_depth_default = [3, 4, 5, 6, 7, 8, 10] |
| 67 | + learning_rate_default = [0.01, 0.05, 0.1, 0.2, 0.3] |
| 68 | + |
| 69 | + def __init__( |
| 70 | + self, |
| 71 | + dataset: str = "mnist", |
| 72 | + cv: int = 3, |
| 73 | + n_components: int = 50, |
| 74 | + objective: str = "maximize", |
| 75 | + sleep: float = 0, |
| 76 | + memory: bool = False, |
| 77 | + collect_data: bool = True, |
| 78 | + callbacks=None, |
| 79 | + catch_errors=None, |
| 80 | + use_surrogate: bool = False, |
| 81 | + ): |
| 82 | + _check_xgboost() |
| 83 | + |
| 84 | + if dataset not in DATASETS: |
| 85 | + raise ValueError( |
| 86 | + f"Unknown dataset '{dataset}'. " f"Available: {self.available_datasets}" |
| 87 | + ) |
| 88 | + |
| 89 | + if cv not in self.available_cv: |
| 90 | + raise ValueError(f"Invalid cv={cv}. Available: {self.available_cv}") |
| 91 | + |
| 92 | + self.dataset = dataset |
| 93 | + self.cv = cv |
| 94 | + self.n_components = n_components |
| 95 | + self._dataset_loader = DATASETS[dataset] |
| 96 | + |
| 97 | + super().__init__( |
| 98 | + objective=objective, |
| 99 | + sleep=sleep, |
| 100 | + memory=memory, |
| 101 | + collect_data=collect_data, |
| 102 | + callbacks=callbacks, |
| 103 | + catch_errors=catch_errors, |
| 104 | + use_surrogate=use_surrogate, |
| 105 | + ) |
| 106 | + |
| 107 | + @property |
| 108 | + def search_space(self): |
| 109 | + """Search space containing hyperparameters.""" |
| 110 | + return { |
| 111 | + "n_estimators": self.n_estimators_default, |
| 112 | + "max_depth": self.max_depth_default, |
| 113 | + "learning_rate": self.learning_rate_default, |
| 114 | + } |
| 115 | + |
| 116 | + def _create_objective_function(self): |
| 117 | + """Create objective function with fixed dataset and cv.""" |
| 118 | + from sklearn.decomposition import PCA |
| 119 | + from sklearn.model_selection import cross_val_score |
| 120 | + from sklearn.preprocessing import StandardScaler |
| 121 | + from xgboost import XGBClassifier |
| 122 | + |
| 123 | + X_raw, y = self._dataset_loader() |
| 124 | + |
| 125 | + # Apply PCA for dimensionality reduction |
| 126 | + scaler = StandardScaler() |
| 127 | + pca = PCA(n_components=self.n_components, random_state=42) |
| 128 | + X_scaled = scaler.fit_transform(X_raw) |
| 129 | + X = pca.fit_transform(X_scaled) |
| 130 | + |
| 131 | + cv = self.cv |
| 132 | + n_classes = len(np.unique(y)) |
| 133 | + |
| 134 | + def xgboost_image_classifier(params): |
| 135 | + model = XGBClassifier( |
| 136 | + n_estimators=params["n_estimators"], |
| 137 | + max_depth=params["max_depth"], |
| 138 | + learning_rate=params["learning_rate"], |
| 139 | + objective="multi:softmax" if n_classes > 2 else "binary:logistic", |
| 140 | + num_class=n_classes if n_classes > 2 else None, |
| 141 | + random_state=42, |
| 142 | + n_jobs=-1, |
| 143 | + verbosity=0, |
| 144 | + ) |
| 145 | + scores = cross_val_score(model, X, y, cv=cv, scoring="accuracy") |
| 146 | + return scores.mean() |
| 147 | + |
| 148 | + self.pure_objective_function = xgboost_image_classifier |
| 149 | + |
| 150 | + def _get_surrogate_params(self, params): |
| 151 | + """Add fixed parameters for surrogate prediction.""" |
| 152 | + return { |
| 153 | + **params, |
| 154 | + "dataset": self.dataset, |
| 155 | + "cv": self.cv, |
| 156 | + "n_components": self.n_components, |
| 157 | + } |
0 commit comments