Merge pull request freqtrade#11805 from viotemp1/optuna_addons

xmatthias · web-flow · commit e8145e0057d0 · 2025-05-31T16:26:32.000+02:00
fix hyperopt repeated parameters between batches
diff --git a/freqtrade/optimize/hyperopt/hyperopt.py b/freqtrade/optimize/hyperopt/hyperopt.py
@@ -15,13 +15,14 @@
 
 import rapidjson
 from joblib import Parallel, cpu_count
+from optuna.trial import FrozenTrial, Trial, TrialState
 
 from freqtrade.constants import FTHYPT_FILEVERSION, LAST_BT_RESULT_FN, Config
 from freqtrade.enums import HyperoptState
 from freqtrade.exceptions import OperationalException
 from freqtrade.misc import file_dump_json, plural
 from freqtrade.optimize.hyperopt.hyperopt_logger import logging_mp_handle, logging_mp_setup
-from freqtrade.optimize.hyperopt.hyperopt_optimizer import HyperOptimizer
+from freqtrade.optimize.hyperopt.hyperopt_optimizer import INITIAL_POINTS, HyperOptimizer
 from freqtrade.optimize.hyperopt.hyperopt_output import HyperoptOutput
 from freqtrade.optimize.hyperopt_tools import (
     HyperoptStateContainer,
@@ -34,9 +35,6 @@
 logger = logging.getLogger(__name__)
 
 
-INITIAL_POINTS = 30
-
-
 log_queue: Any
 
 
@@ -91,6 +89,7 @@ def __init__(self, config: Config) -> None:
         self.print_json = self.config.get("print_json", False)
 
         self.hyperopter = HyperOptimizer(self.config, self.data_pickle_file)
+        self.count_skipped_epochs = 0
 
     @staticmethod
     def get_lock_filename(config: Config) -> str:
@@ -169,56 +168,49 @@ def get_optuna_asked_points(self, n_points: int, dimensions: dict) -> list[Any]:
             asked.append(self.opt.ask(dimensions))
         return asked
 
+    def duplicate_optuna_asked_points(self, trial: Trial, asked_trials: list[FrozenTrial]) -> bool:
+        asked_trials_no_dups: list[FrozenTrial] = []
+        trials_to_consider = trial.study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
+        # Check whether we already evaluated the sampled `params`.
+        for t in reversed(trials_to_consider):
+            if trial.params == t.params:
+                return True
+        # Check whether same`params` in one batch (asked_trials). Autosampler is doing this.
+        for t in asked_trials:
+            if t.params not in asked_trials_no_dups:
+                asked_trials_no_dups.append(t)
+        if len(asked_trials_no_dups) != len(asked_trials):
+            return True
+        return False
+
     def get_asked_points(self, n_points: int, dimensions: dict) -> tuple[list[Any], list[bool]]:
         """
         Enforce points returned from `self.opt.ask` have not been already evaluated
 
         Steps:
         1. Try to get points using `self.opt.ask` first
         2. Discard the points that have already been evaluated
-        3. Retry using `self.opt.ask` up to 3 times
-        4. If still some points are missing in respect to `n_points`, random sample some points
-        5. Repeat until at least `n_points` points in the `asked_non_tried` list
-        6. Return a list with length truncated at `n_points`
+        3. Retry using `self.opt.ask` up to `n_points` times
         """
-
-        def unique_list(a_list):
-            new_list = []
-            for item in a_list:
-                if item not in new_list:
-                    new_list.append(item)
-            return new_list
-
+        asked_non_tried: list[FrozenTrial] = []
+        optuna_asked_trials = self.get_optuna_asked_points(n_points=n_points, dimensions=dimensions)
+        asked_non_tried += [
+            x
+            for x in optuna_asked_trials
+            if not self.duplicate_optuna_asked_points(x, optuna_asked_trials)
+        ]
         i = 0
-        asked_non_tried: list[list[Any]] = []
-        is_random_non_tried: list[bool] = []
-        while i < 5 and len(asked_non_tried) < n_points:
-            if i < 3:
-                self.opt.cache_ = {}
-                asked = unique_list(
-                    self.get_optuna_asked_points(
-                        n_points=n_points * 5 if i > 0 else n_points, dimensions=dimensions
-                    )
-                )
-                is_random = [False for _ in range(len(asked))]
-            else:
-                asked = unique_list(self.opt.space.rvs(n_samples=n_points * 5))
-                is_random = [True for _ in range(len(asked))]
-            is_random_non_tried += [
-                rand for x, rand in zip(asked, is_random, strict=False) if x not in asked_non_tried
-            ]
-            asked_non_tried += [x for x in asked if x not in asked_non_tried]
+        while i < 2 * n_points and len(asked_non_tried) < n_points:
+            asked_new = self.get_optuna_asked_points(n_points=1, dimensions=dimensions)[0]
+            if not self.duplicate_optuna_asked_points(asked_new, asked_non_tried):
+                asked_non_tried.append(asked_new)
             i += 1
+        if len(asked_non_tried) < n_points:
+            if self.count_skipped_epochs == 0:
+                logger.warning("Duplicate params detected. Maybe your search space is too small?")
+            self.count_skipped_epochs += n_points - len(asked_non_tried)
 
-        if asked_non_tried:
-            return (
-                asked_non_tried[: min(len(asked_non_tried), n_points)],
-                is_random_non_tried[: min(len(asked_non_tried), n_points)],
-            )
-        else:
-            return self.get_optuna_asked_points(n_points=n_points, dimensions=dimensions), [
-                False for _ in range(n_points)
-            ]
+        return asked_non_tried, [False for _ in range(len(asked_non_tried))]
 
     def evaluate_result(self, val: dict[str, Any], current: int, is_random: bool):
         """
@@ -304,6 +296,7 @@ def start(self) -> None:
                             parallel,
                             [asked1.params for asked1 in asked],
                         )
+
                         f_val_loss = [v["loss"] for v in f_val]
                         for o_ask, v in zip(asked, f_val_loss, strict=False):
                             self.opt.tell(o_ask, v)
@@ -327,6 +320,12 @@ def start(self) -> None:
         except KeyboardInterrupt:
             print("User interrupted..")
 
+        if self.count_skipped_epochs > 0:
+            logger.info(
+                f"{self.count_skipped_epochs} {plural(self.count_skipped_epochs, 'epoch')} "
+                f"skipped due to duplicate parameters."
+            )
+
         logger.info(
             f"{self.num_epochs_saved} {plural(self.num_epochs_saved, 'epoch')} "
             f"saved to '{self.results_file}'."
diff --git a/freqtrade/optimize/hyperopt/hyperopt_optimizer.py b/freqtrade/optimize/hyperopt/hyperopt_optimizer.py
@@ -45,6 +45,7 @@
 
 logger = logging.getLogger(__name__)
 
+INITIAL_POINTS = 30
 
 MAX_LOSS = 100000  # just a big enough number to be bad result in loss optimization
 
@@ -425,7 +426,16 @@ def get_optimizer(
                 raise OperationalException(f"Optuna Sampler {o_sampler} not supported.")
             with warnings.catch_warnings():
                 warnings.filterwarnings(action="ignore", category=ExperimentalWarning)
-                sampler = optuna_samplers_dict[o_sampler](seed=random_state)
+                if o_sampler in ["NSGAIIISampler", "NSGAIISampler"]:
+                    sampler = optuna_samplers_dict[o_sampler](
+                        seed=random_state, population_size=INITIAL_POINTS
+                    )
+                elif o_sampler in ["GPSampler", "TPESampler", "CmaEsSampler"]:
+                    sampler = optuna_samplers_dict[o_sampler](
+                        seed=random_state, n_startup_trials=INITIAL_POINTS
+                    )
+                else:
+                    sampler = optuna_samplers_dict[o_sampler](seed=random_state)
         else:
             sampler = o_sampler