Document that final_estimator parameters in ensemble are not auto-tuned (#1499)

Copilot · thinkall · web-flow · commit c64eeb5e8df5 · 2026-01-20T21:59:31.000+08:00
* Initial plan

* Document final_estimator parameter behavior in ensemble configuration

Co-authored-by: thinkall &lt;3197038+thinkall@users.noreply.github.com&gt;

* Address code review feedback: fix syntax in examples and use float comparison

Co-authored-by: thinkall &lt;3197038+thinkall@users.noreply.github.com&gt;

* Run pre-commit to fix formatting issues

Co-authored-by: thinkall &lt;3197038+thinkall@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: thinkall &lt;3197038+thinkall@users.noreply.github.com&gt;
Co-authored-by: Li Jiang &lt;bnujli@gmail.com&gt;
diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
@@ -176,6 +176,11 @@ def custom_metric(
                 and 'final_estimator' to specify the passthrough and
                 final_estimator in the stacker. The dict can also contain
                 'n_jobs' as the key to specify the number of jobs for the stacker.
+                Note: The hyperparameters of a custom 'final_estimator' are NOT
+                automatically tuned. If you provide an estimator instance (e.g.,
+                CatBoostClassifier()), it will use the parameters you specified
+                or their defaults. If 'final_estimator' is not provided, the best
+                model found during the search will be used as the final estimator.
             eval_method: A string of resampling strategy, one of
                 ['auto', 'cv', 'holdout'].
             split_ratio: A float of the valiation data percentage for holdout.
@@ -1827,6 +1832,11 @@ def custom_metric(
                 and 'final_estimator' to specify the passthrough and
                 final_estimator in the stacker. The dict can also contain
                 'n_jobs' as the key to specify the number of jobs for the stacker.
+                Note: The hyperparameters of a custom 'final_estimator' are NOT
+                automatically tuned. If you provide an estimator instance (e.g.,
+                CatBoostClassifier()), it will use the parameters you specified
+                or their defaults. If 'final_estimator' is not provided, the best
+                model found during the search will be used as the final estimator.
             eval_method: A string of resampling strategy, one of
                 ['auto', 'cv', 'holdout'].
             split_ratio: A float of the valiation data percentage for holdout.
@@ -3147,6 +3157,10 @@ def _search(self):
                     # the total degree of parallelization = parallelization degree per estimator * parallelization degree of ensemble
                 )
                 if isinstance(self._ensemble, dict):
+                    # Note: If a custom final_estimator is provided, it is used as-is without
+                    # hyperparameter tuning. The user is responsible for setting appropriate
+                    # parameters or using defaults. If not provided, the best model found
+                    # during the search (self._trained_estimator) is used.
                     final_estimator = self._ensemble.get("final_estimator", self._trained_estimator)
                     passthrough = self._ensemble.get("passthrough", True)
                     ensemble_n_jobs = self._ensemble.get("n_jobs", ensemble_n_jobs)
diff --git a/test/automl/test_multiclass.py b/test/automl/test_multiclass.py
@@ -181,6 +181,49 @@ def test_ensemble(self):
         }
         automl.fit(X_train=X_train, y_train=y_train, **settings)
 
+    def test_ensemble_final_estimator_params_not_tuned(self):
+        """Test that final_estimator parameters in ensemble are not automatically tuned.
+
+        This test verifies that when a custom final_estimator is provided with specific
+        parameters, those parameters are used as-is without any hyperparameter tuning.
+        """
+        from sklearn.linear_model import LogisticRegression
+
+        automl = AutoML()
+        X_train, y_train = load_wine(return_X_y=True)
+
+        # Create a LogisticRegression with specific non-default parameters
+        custom_params = {
+            "C": 0.5,  # Non-default value
+            "max_iter": 50,  # Non-default value
+            "random_state": 42,
+        }
+        final_est = LogisticRegression(**custom_params)
+
+        settings = {
+            "time_budget": 5,
+            "estimator_list": ["rf", "lgbm"],
+            "task": "classification",
+            "ensemble": {
+                "final_estimator": final_est,
+                "passthrough": False,
+            },
+            "n_jobs": 1,
+        }
+        automl.fit(X_train=X_train, y_train=y_train, **settings)
+
+        # Verify that the final estimator in the stacker uses the exact parameters we specified
+        if hasattr(automl.model, "final_estimator_"):
+            # The model is a StackingClassifier
+            fitted_final_estimator = automl.model.final_estimator_
+            assert (
+                abs(fitted_final_estimator.C - custom_params["C"]) < 1e-9
+            ), f"Expected C={custom_params['C']}, but got {fitted_final_estimator.C}"
+            assert (
+                fitted_final_estimator.max_iter == custom_params["max_iter"]
+            ), f"Expected max_iter={custom_params['max_iter']}, but got {fitted_final_estimator.max_iter}"
+            print("✓ Final estimator parameters were preserved (not tuned)")
+
     def test_dataframe(self):
         self.test_classification(True)
 
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -469,18 +469,40 @@ To use stacked ensemble after the model search, set `ensemble=True` or a dict. W
 - "final_estimator": an instance of the final estimator in the stacker.
 - "passthrough": True (default) or False, whether to pass the original features to the stacker.
 
+**Important Note:** The hyperparameters of a custom `final_estimator` are **NOT automatically tuned**. If you provide an estimator instance (e.g., `CatBoostClassifier()`), it will use the parameters you specified or their defaults. To use specific hyperparameters, you must set them when creating the estimator instance. If `final_estimator` is not provided, the best model found during the search will be used as the final estimator (recommended for best performance).
+
 For example,
 
 ```python
 automl.fit(
-    X_train, y_train, task="classification",
-    "ensemble": {
-        "final_estimator": LogisticRegression(),
+    X_train,
+    y_train,
+    task="classification",
+    ensemble={
+        "final_estimator": LogisticRegression(),  # Uses default LogisticRegression parameters
         "passthrough": False,
     },
 )
 ```
 
+Or with custom parameters:
+
+```python
+from catboost import CatBoostClassifier
+
+automl.fit(
+    X_train,
+    y_train,
+    task="classification",
+    ensemble={
+        "final_estimator": CatBoostClassifier(
+            iterations=100, depth=6, learning_rate=0.1
+        ),
+        "passthrough": True,
+    },
+)
+```
+
 ### Resampling strategy
 
 By default, flaml decides the resampling automatically according to the data size and the time budget. If you would like to enforce a certain resampling strategy, you can set `eval_method` to be "holdout" or "cv" for holdout or cross-validation.