diff --git a/.github/workflows/citation.yml b/.github/workflows/citation.yml
index 91509b900..a9565a32e 100644
--- a/.github/workflows/citation.yml
+++ b/.github/workflows/citation.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out a copy of the repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Check whether the citation metadata from CITATION.cff is valid
         uses: citation-file-format/cffconvert-github-action@2.0.0
diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml
index 8e9908060..a2bcf839e 100644
--- a/.github/workflows/dist.yml
+++ b/.github/workflows/dist.yml
@@ -28,10 +28,10 @@ jobs:
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v5
 
     - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v6
       with:
         python-version: "3.10"
 
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index d109fc6bc..67cc52c55 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -17,8 +17,8 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@v6
         with:
           python-version: "3.10"
       - name: "Install dependancies"
diff --git a/.github/workflows/pre-commit-update.yml b/.github/workflows/pre-commit-update.yml
index b279eb40b..4e2024a4f 100644
--- a/.github/workflows/pre-commit-update.yml
+++ b/.github/workflows/pre-commit-update.yml
@@ -11,9 +11,9 @@ jobs:
   auto-update:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
 
       - uses: browniebroke/pre-commit-autoupdate-action@main
                  
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 31d957762..984711c65 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -27,12 +27,12 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
       with:
         submodules: recursive
 
     - name: Setup Python 3.10
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v6
       with:
         python-version: "3.10"
 
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index c94e246bb..cc2efc458 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -49,15 +49,15 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11"]
         os: ["ubuntu-latest"]
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v5
 
     - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v6
       with:
         python-version: ${{ matrix.python-version }}
 
@@ -103,12 +103,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11"]
         os: ["ubuntu-latest"]
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v5
 
     - name: Conda install
       uses: conda-incubator/setup-miniconda@v3
@@ -143,15 +143,15 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11"]
         os: ["ubuntu-latest"]
 
     steps:
     - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v5
 
     - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v6
       with:
         python-version: ${{ matrix.python-version }}
 
diff --git a/.github/workflows/recent_reminder.yml b/.github/workflows/recent_reminder.yml
index 1b40255f5..9d2d4e54c 100644
--- a/.github/workflows/recent_reminder.yml
+++ b/.github/workflows/recent_reminder.yml
@@ -32,7 +32,7 @@ jobs:
         echo "$(<recent_issues.md) <br />" >> mail.html
     - name: Send mail
       id: mail
-      uses: dawidd6/action-send-mail@v4
+      uses: dawidd6/action-send-mail@v6
       with:
         server_address: ${{secrets.MAIL_SERVER_ADDRESS}}
         server_port: ${{secrets.MAIL_SERVER_PORT}}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36b2d614d..53488674b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,27 @@
-# 2.3.1
+# 2.4.0
+## Improvements
+- Replace random forest from pyrfr with random forest from sklearn (#1246)
+- Submit trials to runners in SMBO instead of running configs directly (#937)
+- `target_function` becomes optional in Facade when using ask and tell exclusively (#946)
+- Added adaptive capping for algorithm configuration with runtime as target (#1247) 
+- Readded UCB as an acquisition function (#1255)
+
+## Documentation
+- Ask and tell without initial design and warmstarting
+- Random forest replacement (sklearn instead of pyrfr)
+- Deterministic configuration of SMAC
+
+## Examples
+- Add target function with additional arguments (#1134)
+
+## Bugfixes 
+- Ask and tell without initial design may no longer return a config from the initial design - if it is not "removed".
+- Addressing situations where the acquisition function suggests configurations that have already been sampled in prior iterations (#1216)
 
+## Misc
+- Rename retries to max_new_config_tries in ConfigSelector (#1207)
+
+# 2.3.1
 ## Misc
 - New SMAC logo
 - Fix doc link in README
diff --git a/CITATION.cff b/CITATION.cff
index 35d457a01..abb9d7a88 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -9,7 +9,7 @@ date-released: "2016-08-17"
 url: "https://automl.github.io/SMAC3/master/index.html"
 repository-code: "https://github.com/automl/SMAC3"
 
-version: "2.3.1"
+version: "2.4.0"
 
 type: "software"
 keywords:
diff --git a/README.md b/README.md
index e2f0e2b29..8fda28491 100644
--- a/README.md
+++ b/README.md
@@ -47,11 +47,6 @@ conda create -n SMAC python=3.10
 conda activate SMAC
 ```
 
-Install swig:
-```
-conda install gxx_linux-64 gcc_linux-64 swig
-```
-
 Install SMAC via PyPI:
 ```
 pip install smac
@@ -63,6 +58,20 @@ git clone https://github.com/automl/SMAC3.git && cd SMAC3
 make install-dev
 ```
 
+## Running SMAC with pyrfr
+starting from 2.4.0, SMAC uses random forest from [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html)
+instead of random forest from [pyrfr](https://pypi.org/project/pyrfr/) as the default surrogate model for HPO tasks.
+However, you could still use the old pyrfr surrogate model by calling `smac.facade.old.HyperparameterOptimizationRFRFacade`
+and `smac.facade.old.MultiFidelityRFRFacade`
+
+To work with pyrfr, you need to first install gcc, gxx, and swig:
+```
+conda install gxx_linux-64 gcc_linux-64 swig
+```
+then install smac with the pyrfr option:
+```
+pip install smac[pyrfr]
+```
 
 ## Minimal Example
 
diff --git a/docs/3_getting_started.md b/docs/3_getting_started.md
index b88a4ead5..ede8da4a8 100644
--- a/docs/3_getting_started.md
+++ b/docs/3_getting_started.md
@@ -2,7 +2,7 @@
 # Getting Started
 
 SMAC needs four core components (configuration space, target function, scenario and a facade) to run an
-optimization process, all of which are explained on this page.
+optimization process called Sequential Model Based Optimization (SMBO), all of which are explained on this page.
 
 They interact in the following way:
 
@@ -93,8 +93,8 @@ scenario = Scenario(
 !!! warn
     By default Facades will try to warmstart on preexisting logs. This behavior can be specified using the `overwrite` parameter.
 
-A [facade][smac.facade.abstract_facade] is the entry point to SMAC, which constructs a default optimization 
-pipeline for you. SMAC offers various facades, which satisfy many common use cases and are crucial to
+A [facade][smac.facade.abstract_facade] is the entry point to SMAC, which constructs a Sequential Model Based Optimization (`SMBO`) object for you.
+SMAC offers various facades, which satisfy many common use cases and are crucial to
 achieving peak performance. The idea behind the facades is to provide a simple interface to all of SMAC's components,
 which is easy to use and understand and without the need of deep diving into the material. However, experts are
 invited to change the components to their specific hyperparameter optimization needs. The following
@@ -139,4 +139,15 @@ smac = MFFacade(scenario=scenario, target_function=train)
 smac = ACFacade(scenario=scenario, target_function=train)
 smac = RFacade(scenario=scenario, target_function=train)
 smac = HBFacade(scenario=scenario, target_function=train)
-```
\ No newline at end of file
+```
+
+## SMBO
+Each `Facade` creates a [Sequential Model Based Optimization (SMBO)][smac.main.smbo] object for you. In addition to starting the optimization with
+```python
+incumbent = smbo.optimize()
+```
+it supports access to e.g. the runhistory with
+````
+runhistory = smbo.runhistory()
+````
+For more information check out the [API][smac.main.smbo].
\ No newline at end of file
diff --git a/docs/advanced_usage/5_ask_and_tell.md b/docs/advanced_usage/5_ask_and_tell.md
index ba464beaa..4aebe6855 100644
--- a/docs/advanced_usage/5_ask_and_tell.md
+++ b/docs/advanced_usage/5_ask_and_tell.md
@@ -15,5 +15,7 @@ and report the results of the trial.
     different budgets, they, obviously, can not be considered. However, all user-provided configurations will flow 
     into the intensification process.
 
+Notice: if you are exclusively using the ask-and-tell interface and do not use `smac.optimize()`, then smac no longer
+is responsible for the evaluation of the trials and therefore the Facade no longer will require a specified `target_algorithm` argument.
 
 Please have a look at our [ask-and-tell example](../examples/1%20Basics/3_ask_and_tell.md).
diff --git a/docs/devnotes/release_flow.md b/docs/devnotes/release_flow.md
new file mode 100644
index 000000000..4b8250f65
--- /dev/null
+++ b/docs/devnotes/release_flow.md
@@ -0,0 +1,64 @@
+# How to Create a New Release
+Export version numer, e.g.
+```bash
+export VERSION="2.4.0"
+```
+If you do not use `uv`, remove `uv` from the commands.
+
+1. Refresh main.
+    ```bash
+    git checkout main
+    git pull
+    ```
+
+1. Checkout development branch:
+    ```bash
+    git checkout development
+    git pull
+    ```
+
+1. Run `make tests` to ensure everything works. If tests run through, proceed.
+    ```bash
+    make tests
+    ```
+
+1. Create new branch from development with name e.g. `v${VERSION}`:
+    ```bash
+    git branch v${VERSION}
+    git checkout v${VERSION}
+    ```
+
+1. Merge main into branch
+    ```bash
+    git merge main
+    ```
+
+1. Check `CHANGELOG.md` whether the version number is correct and the order is fine.
+1. Replace version numbers everywhere: in `CITATION.cff`, `__init__.py`.
+1. Create a PR to merge branch `v${VERSION}` into `main`. As description you can use the changelog notes.
+1. Test installation with a fresh environment, see `test_package.sh`.
+1. Merge PR if tests are fine and installation is fine.
+1. Create release, add notes from changelog.
+1. Update doc link.
+1. Deploy github pages (replace version in the following command):
+    ```bash
+    make deploy "v${VERSION}" latest -u -p --title "v${VERSION} (latest)"
+    ```
+
+1. Upload to testpypi:
+    ```bash
+    python -m twine upload --repository testpypi dist/*
+    ```
+
+1. Test from testpypi:
+    ```bash
+    uv pip uninstall smac
+    uv pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ smac==${VERSION}
+    python -c 'import smac'
+    ```
+    If this is fine, proceed.
+
+1. Upload to pypi: 
+    ```bash
+    python -m twine upload dist/*
+    ```
\ No newline at end of file
diff --git a/docs/devnotes/test_package.sh b/docs/devnotes/test_package.sh
new file mode 100644
index 000000000..113ca1be8
--- /dev/null
+++ b/docs/devnotes/test_package.sh
@@ -0,0 +1,9 @@
+export SMACVERSION="2.4.0"
+make clean 
+make build
+pip install uv
+rm -r smac_test || true
+uv venv --python=3.12 smac_test
+source smac_test/bin/activate
+uv pip install dist/smac-$SMACVERSION.tar.gz
+python -c 'import smac'
diff --git a/docs/index.md b/docs/index.md
index 46d572189..fdf801c79 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -9,6 +9,20 @@ SMAC is a tool for algorithm configuration to optimize the parameters of arbitra
 
 SMAC3 is written in Python3 and continuously tested with Python 3.8, 3.9, and 3.10. Its Random Forest is written in C++. In the following, SMAC is representatively mentioned for SMAC3.
 
+## Features
+
+* Open source + active maintenance
+* Rich search space with floats, ordinals, categoricals and conditions
+* Ask-and-Tell Interface
+* Continue and Warmstart Optimization
+* Intensification mechanism to efficiently compare configurations
+* User priors
+* Parallelization, local and on a cluster with Dask
+* Multi-fidelity optimization, e.g. when we can evaluate our function with different resolutions
+* Multi-objective optimization with ParEGO
+* Optimization across many tasks (aka algorithm configuration)
+* Function to optimize can either be pythonic or called via a script
+* Easily extensible with callbacks
 
 ## Cite Us
 If you use SMAC, please cite our [JMLR paper](https://jmlr.org/papers/v23/21-0888.html):
diff --git a/examples/1_basics/2_svm_cv.py b/examples/1_basics/2_svm_cv.py
index 7bf6594db..5bc7fe45d 100644
--- a/examples/1_basics/2_svm_cv.py
+++ b/examples/1_basics/2_svm_cv.py
@@ -53,8 +53,8 @@ def configspace(self) -> ConfigurationSpace:
     def train(self, config: Configuration, seed: int = 0) -> float:
         """Creates a SVM based on a configuration and evaluates it on the
         iris-dataset using cross-validation."""
-        config_dict = config.get_dictionary()
-        if "gamma" in config:
+        config_dict = dict(config)
+        if "gamma" in config_dict:
             config_dict["gamma"] = config_dict["gamma_value"] if config_dict["gamma"] == "value" else "auto"
             config_dict.pop("gamma_value", None)
 
diff --git a/examples/1_basics/3_ask_and_tell.py b/examples/1_basics/3_ask_and_tell.py
index 4ec5811a3..39eb6433e 100644
--- a/examples/1_basics/3_ask_and_tell.py
+++ b/examples/1_basics/3_ask_and_tell.py
@@ -2,6 +2,10 @@
 # Flags: doc-Runnable
 
 This examples show how to use the Ask-and-Tell interface.
+
+Notice, that the ask-and-tell interface will still use the initial design specified in the facade.
+Should you wish to add your own evaluated configurations instead or deactivate the initial
+design all together, please refer to the warmstarting example in conjunction with this one.
 """
 
 from ConfigSpace import Configuration, ConfigurationSpace, Float
@@ -52,7 +56,7 @@ def train(self, config: Configuration, seed: int = 0) -> float:
     # Now we use SMAC to find the best hyperparameters
     smac = HyperparameterOptimizationFacade(
         scenario,
-        model.train,
+        target_function=model.train,
         intensifier=intensifier,
         overwrite=True,
     )
@@ -68,7 +72,14 @@ def train(self, config: Configuration, seed: int = 0) -> float:
         smac.tell(info, value)
 
     # After calling ask+tell, we can still optimize
-    # Note: SMAC will optimize the next 90 trials because 10 trials already have been evaluated
+    # Note: SMAC will optimize the next 90 trials because 10 trials already have been evaluated.
+    # If we however choose not to call optimize; e.g. because we want to manage heavy
+    # computation of model.train completely outside smac, but still use it to suggest new
+    # configurations, then n_trials will only be relevant for the initial design in combination
+    # with initial design max_ratio! In fact in an only ask+tell case, we could even set
+    # target_function=None in the constructor, because smac wouldn't even need to know
+    # what the target function is. But that will prevent us from calling optimize and validate later
+    # on.
     incumbent = smac.optimize()
 
     # Get cost of default configuration
diff --git a/examples/1_basics/8_warmstart.py b/examples/1_basics/8_warmstart.py
index bc6f29b23..209b0b83e 100644
--- a/examples/1_basics/8_warmstart.py
+++ b/examples/1_basics/8_warmstart.py
@@ -56,15 +56,19 @@ def evaluate(self, config: Configuration, seed: int = 0) -> float:
     intensifier = HyperparameterOptimizationFacade.get_intensifier(scenario, max_config_calls=1)
     smac = HyperparameterOptimizationFacade(
         scenario,
-        task.evaluate,
+        target_function=task.evaluate,
         intensifier=intensifier,
         overwrite=True,
 
         # Modify the initial design to use our custom initial design
         initial_design=HyperparameterOptimizationFacade.get_initial_design(
             scenario, 
-            n_configs=0,  # Do not use the default initial design
-            additional_configs=configurations  # Use the configurations previously evaluated as initial design
+            n_configs=0,  # Do not use the default initial design at all
+
+            # You can pass the configurations as additional_configs, which will specify their
+            # origin to be the initial design. However, this is not necessary and we can just
+            # smac.tell the configurations.
+            # additional_configs=configurations  # Use the configurations previously evaluated as initial design
                                                # This only passes the configurations but not the cost!
                                                # So in order to actually use the custom, pre-evaluated initial design
                                                # we need to tell those trials, like below.
@@ -80,4 +84,6 @@ def evaluate(self, config: Configuration, seed: int = 0) -> float:
         smac.tell(info, value)
 
     # Optimize as usual
-    smac.optimize()
\ No newline at end of file
+    # Notice, that since we added three configurations, n_trials for the remaining optimization
+    # is effectively 27 in optimize().
+    smac.optimize()
diff --git a/examples/1_basics/9_additional_arguments.py b/examples/1_basics/9_additional_arguments.py
new file mode 100644
index 000000000..a9c409523
--- /dev/null
+++ b/examples/1_basics/9_additional_arguments.py
@@ -0,0 +1,95 @@
+"""Quadratic Function
+# Flags: doc-Runnable
+
+An example of adding additional arguments to the target function either using a class, or a partial function.
+
+This example extends the quadratic function example at examples/1_basics/1_quadratic_function.py.
+"""
+
+from functools import partial
+
+from ConfigSpace import Configuration, ConfigurationSpace, Float
+from matplotlib import pyplot as plt
+
+from smac import RunHistory, Scenario
+from smac.facade.hyperparameter_optimization_facade import (
+    HyperparameterOptimizationFacade as HPOFacade,
+)
+
+__copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
+__license__ = "3-clause BSD"
+
+
+class AdditionalArgumentsClass:
+    def __init__(self, bias:int) -> None:
+        self.bias = bias
+
+    @property
+    def configspace(self) -> ConfigurationSpace:
+        cs = ConfigurationSpace(seed=0)
+        x = Float("x", (-5, 5), default=-5)
+        cs.add([x])
+
+        return cs
+
+    def train(self, config: Configuration, seed: int = 0) -> float:
+        """Returns the y value of a quadratic function with a minimum we know to be at x=0."""
+        x = config["x"]
+        return x**2 + self.bias
+    
+class PartialFunctionClass:
+    @property
+    def configspace(self) -> ConfigurationSpace:
+        cs = ConfigurationSpace(seed=0)
+        x = Float("x", (-5, 5), default=-5)
+        cs.add([x])
+
+        return cs
+
+    def train(self, config: Configuration, seed: int = 0, bias:int=0) -> float:
+        """Returns the y value of a quadratic function with a minimum we know to be at x=0."""
+        x = config["x"]
+        return x**2 + bias
+
+def plot(runhistory: RunHistory, incumbent: Configuration, incumbent_cost:float, bias: int, incumbent_color:str, color:str) -> None:
+    # Get all configurations and costs
+    configs = [config["x"] for config in runhistory.get_configs()]
+    costs = [config.cost for config in runhistory.values()]
+
+    # Plot all trials
+    plt.scatter(configs, costs, c=color, alpha=0.4, zorder=9999, marker="o", label=f"Model with bias {bias}")
+
+    # Plot incumbent
+    plt.scatter(incumbent["x"], incumbent_cost, c=incumbent_color,s = 100, zorder=10000, marker="x", label=f"Incumbent with bias {bias}")
+
+
+if __name__ == "__main__":
+    for model, bias, color, incumbent_color in [(AdditionalArgumentsClass(bias=2), 2, "green", "red"), (PartialFunctionClass(), -2, "blue", "orange")]:
+        # Scenario object specifying the optimization "environment"
+        seed = 0 if isinstance(model, AdditionalArgumentsClass) else 1
+        scenario = Scenario(model.configspace, deterministic=True, n_trials=100, seed=seed)
+
+        if isinstance(model, PartialFunctionClass):
+            model.train = partial(model.train, bias=-2)
+
+        # Now we use SMAC to find the best hyperparameters
+        smac = HPOFacade(
+            scenario,
+            model.train,  # We pass the target function here
+            overwrite=True,  # Overrides any previous results that are found that are inconsistent with the meta-data
+        )
+
+        incumbent_config = smac.optimize()
+
+        # Get cost of default configuration
+        default_cost = smac.validate(model.configspace.get_default_configuration())
+        print(f"Default cost: {default_cost}")
+
+        # Let's calculate the cost of the incumbent
+        incumbent_cost = smac.validate(incumbent_config)
+        print(f"Incumbent cost: {incumbent_cost}")
+
+        # Let's plot it too
+        plot(smac.runhistory, incumbent_config, incumbent_cost, bias=bias, color=color, incumbent_color=incumbent_color)
+    plt.legend()
+    plt.show()
diff --git a/setup.py b/setup.py
index 6ac234efe..1eca8d910 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,9 @@ def read_file(filepath: str) -> str:
 
 
 extras_require = {
+    "pyrfr": [
+        "pyrfr>=0.9.0",
+    ],
     "dev": [
         "setuptools",
         "types-setuptools",
@@ -79,8 +82,7 @@ def read_file(filepath: str) -> str:
         "pynisher>=1.0.0",
         "ConfigSpace>=1.0.0",
         "joblib",
-        "scikit-learn>=1.1.2",
-        "pyrfr>=0.9.0",
+        "scikit-learn>=1.6.1",
         "dask[distributed]",
         "dask_jobqueue>=0.8.2",
         "emcee>=3.0.0",
diff --git a/smac/__init__.py b/smac/__init__.py
index 881b8ea9d..725ba353a 100644
--- a/smac/__init__.py
+++ b/smac/__init__.py
@@ -20,7 +20,7 @@
     "Matthias Feurer, André Biedenkapp, Difan Deng, Carolin Benjamins, Tim Ruhkopf, René Sass "
     "and Frank Hutter"
 )
-version = "2.3.1"
+version = "2.4.0"
 
 
 try:
diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py
index 153cef6b4..422935f14 100644
--- a/smac/acquisition/maximizer/local_search.py
+++ b/smac/acquisition/maximizer/local_search.py
@@ -237,17 +237,13 @@ def _get_init_points_from_previous_configs(
         else:
             additional_start_points = []
 
-        init_points = []
-        init_points_as_set: set[Configuration] = set()
-        for cand in itertools.chain(
-            previous_configs_sorted_by_cost,
-            additional_start_points,
-        ):
-            if cand not in init_points_as_set:
-                init_points.append(cand)
-                init_points_as_set.add(cand)
-
-        return init_points
+        init_points_as_set: set[Configuration] = set(
+            itertools.chain(
+                previous_configs_sorted_by_cost,
+                additional_start_points,
+            )
+        )
+        return list(init_points_as_set)
 
     def _search(
         self,
diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py
index 8851aafa4..9d3912061 100644
--- a/smac/facade/abstract_facade.py
+++ b/smac/facade/abstract_facade.py
@@ -58,9 +58,12 @@ class AbstractFacade:
     ----------
     scenario : Scenario
         The scenario object, holding all environmental information.
-    target_function : Callable | str | AbstractRunner
+    target_function : Callable | str | AbstractRunner | None, defaults to None
         This function is called internally to judge a trial's performance. If a string is passed,
         it is assumed to be a script. In this case, ``TargetFunctionScriptRunner`` is used to run the script.
+        In the rare case that only ``ask`` and ``tell`` and not ``optimize`` is used to optimize
+        the hyperparameters, the target_function argument can be None, because SMAC no longer is
+        charge of the evaluation of the configuration and thus does not need to know about it.
     model : AbstractModel | None, defaults to None
         The surrogate model.
     acquisition_function : AbstractAcquisitionFunction | None, defaults to None
@@ -105,7 +108,7 @@ class AbstractFacade:
     def __init__(
         self,
         scenario: Scenario,
-        target_function: Callable | str | AbstractRunner,
+        target_function: Callable | str | AbstractRunner | None = None,
         *,
         model: AbstractModel | None = None,
         acquisition_function: AbstractAcquisitionFunction | None = None,
@@ -154,7 +157,8 @@ def __init__(
             config_selector = self.get_config_selector(scenario)
 
         # Initialize empty stats and runhistory object
-        runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm)
+        n_objectives = len(scenario.objectives) if isinstance(scenario.objectives, list) else -1
+        runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm, n_objectives=n_objectives)
 
         # Set the seed for configuration space
         scenario.configspace.seed(scenario.seed)
@@ -175,8 +179,10 @@ def __init__(
         self._overwrite = overwrite
 
         # Prepare the algorithm executer
-        runner: AbstractRunner
-        if isinstance(target_function, AbstractRunner):
+        runner: AbstractRunner | None
+        if isinstance(target_function, AbstractRunner) or target_function is None:
+            # in case the target_function is None (e.g. we purely use ask & tell)
+            # we let smbo.optimize raise an error
             runner = target_function
         elif isinstance(target_function, str):
             runner = TargetFunctionScriptRunner(
@@ -192,7 +198,7 @@ def __init__(
             )
 
         # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
-        if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
+        if ((n_workers := scenario.n_workers) > 1 or dask_client is not None) and runner is not None:
             if dask_client is not None and n_workers > 1:
                 logger.warning(
                     "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
@@ -200,7 +206,10 @@ def __init__(
             else:
                 available_workers = joblib.cpu_count()
                 if n_workers > available_workers:
-                    logger.info(f"Workers are reduced to {n_workers}.")
+                    logger.info(
+                        f"Configured {n_workers} workers is reduced to the number of available workers "
+                        f"{available_workers}."
+                    )
                     n_workers = available_workers
 
             # We use a dask runner for parallelization
@@ -261,7 +270,7 @@ def meta(self) -> dict[str, Any]:
 
         meta = {
             "facade": {"name": self.__class__.__name__},
-            "runner": self._runner.meta,
+            "runner": self._runner.meta if self._runner is not None else None,
             "model": self._model.meta,
             "acquisition_maximizer": self._acquisition_maximizer.meta,
             "acquisition_function": self._acquisition_function.meta,
@@ -422,7 +431,7 @@ def get_config_selector(
         retries: int = 16,
     ) -> ConfigSelector:
         """Returns the default configuration selector."""
-        return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries)
+        return ConfigSelector(scenario, retrain_after=retrain_after, max_new_config_tries=retries)
 
     def _get_optimizer(self) -> SMBO:
         """Fills the SMBO with all the pre-initialized components."""
diff --git a/smac/facade/old/__init__.py b/smac/facade/old/__init__.py
new file mode 100644
index 000000000..7f73b5e78
--- /dev/null
+++ b/smac/facade/old/__init__.py
@@ -0,0 +1,9 @@
+from smac.facade.old.hyperparameter_optimization_facade_pyrfr import (
+    HyperparameterOptimizationRFRFacade,
+)
+from smac.facade.old.multi_fidelity_facade_pyrfr import MultiFidelityRFRFacade
+
+__all__ = [
+    "HyperparameterOptimizationRFRFacade",
+    "MultiFidelityRFRFacade",
+]
diff --git a/smac/facade/old/hyperparameter_optimization_facade_pyrfr.py b/smac/facade/old/hyperparameter_optimization_facade_pyrfr.py
new file mode 100644
index 000000000..f833822de
--- /dev/null
+++ b/smac/facade/old/hyperparameter_optimization_facade_pyrfr.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from smac.facade.hyperparameter_optimization_facade import (
+    HyperparameterOptimizationFacade,
+)
+from smac.model.random_forest.pyrfr.random_forest_pyrfr import PyrfrRandomForest
+from smac.scenario import Scenario
+
+
+class HyperparameterOptimizationRFRFacade(HyperparameterOptimizationFacade):
+    @staticmethod
+    def get_model(  # type: ignore
+        scenario: Scenario,
+        *,
+        n_trees: int = 10,
+        ratio_features: float = 1.0,
+        min_samples_split: int = 2,
+        min_samples_leaf: int = 1,
+        max_depth: int = 2**20,
+        bootstrapping: bool = True,
+    ) -> PyrfrRandomForest:
+        """Returns a random forest as surrogate model.
+
+        Parameters
+        ----------
+        n_trees : int, defaults to 10
+            The number of trees in the random forest.
+        ratio_features : float, defaults to 5.0 / 6.0
+            The ratio of features that are considered for splitting.
+        min_samples_split : int, defaults to 3
+            The minimum number of data points to perform a split.
+        min_samples_leaf : int, defaults to 3
+            The minimum number of data points in a leaf.
+        max_depth : int, defaults to 20
+            The maximum depth of a single tree.
+        bootstrapping : bool, defaults to True
+            Enables bootstrapping.
+        """
+        return PyrfrRandomForest(
+            log_y=True,
+            n_trees=n_trees,
+            bootstrapping=bootstrapping,
+            ratio_features=ratio_features,
+            min_samples_split=min_samples_split,
+            min_samples_leaf=min_samples_leaf,
+            max_depth=max_depth,
+            configspace=scenario.configspace,
+            instance_features=scenario.instance_features,
+            seed=scenario.seed,
+        )
diff --git a/smac/facade/old/multi_fidelity_facade_pyrfr.py b/smac/facade/old/multi_fidelity_facade_pyrfr.py
new file mode 100644
index 000000000..eef3a4629
--- /dev/null
+++ b/smac/facade/old/multi_fidelity_facade_pyrfr.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from smac.facade.multi_fidelity_facade import MultiFidelityFacade
+from smac.facade.old.hyperparameter_optimization_facade_pyrfr import (
+    HyperparameterOptimizationRFRFacade,
+)
+
+__copyright__ = "Copyright 2022, automl.org"
+__license__ = "3-clause BSD"
+
+
+class MultiFidelityRFRFacade(MultiFidelityFacade, HyperparameterOptimizationRFRFacade):
+    pass
diff --git a/smac/initial_design/abstract_initial_design.py b/smac/initial_design/abstract_initial_design.py
index d3f957329..466ec7649 100644
--- a/smac/initial_design/abstract_initial_design.py
+++ b/smac/initial_design/abstract_initial_design.py
@@ -82,7 +82,11 @@ def __init__(
             )
 
         # If the number of configurations is too large, we reduce it
-        _n_configs = int(max(1, min(self._n_configs, (max_ratio * scenario.n_trials))))
+        if self._n_configs > 1:
+            _n_configs = int(max(1, min(self._n_configs, (max_ratio * scenario.n_trials))))
+        else:
+            _n_configs = self._n_configs
+
         if self._n_configs != _n_configs:
             logger.info(
                 f"Reducing the number of initial configurations from {self._n_configs} to "
diff --git a/smac/intensifier/hyperband_utils.py b/smac/intensifier/hyperband_utils.py
index 77f6a748c..f9a46366e 100644
--- a/smac/intensifier/hyperband_utils.py
+++ b/smac/intensifier/hyperband_utils.py
@@ -44,7 +44,11 @@ def determine_HB(min_budget: float, max_budget: float, eta: int = 3) -> dict:
         _max_iterations[i] = max_iter + 1
 
     total_trials = np.sum([np.sum(v) for v in _n_configs_in_stage.values()])
-    total_budget = np.sum([np.sum(v) for v in _budgets_in_stage.values()])
+
+    total_budget = 0
+    for stage in _n_configs_in_stage.keys():
+        for b, c in zip(_budgets_in_stage[stage], _n_configs_in_stage[stage]):
+            total_budget += b * c
 
     return {
         "max_iterations": _max_iterations,
diff --git a/smac/intensifier/successive_halving.py b/smac/intensifier/successive_halving.py
index c14448ee6..546a27377 100644
--- a/smac/intensifier/successive_halving.py
+++ b/smac/intensifier/successive_halving.py
@@ -450,7 +450,7 @@ def __iter__(self) -> Iterator[TrialInfo]:  # noqa: D102
                     # We stop if we don't find any configuration anymore
                     logger.warning(
                         "If you assume your configspace was not yet exhausted, try to "
-                        "increase the number of retries in the config selector."
+                        "increase the number of max_new_config_tries in the config selector."
                     )
                     return
 
diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py
index e50702103..739aa8107 100644
--- a/smac/main/config_selector.py
+++ b/smac/main/config_selector.py
@@ -15,6 +15,7 @@
 )
 from smac.callback.callback import Callback
 from smac.initial_design import AbstractInitialDesign
+from smac.main.exceptions import ConfigurationSpaceExhaustedException
 from smac.model.abstract_model import AbstractModel
 from smac.random_design.abstract_random_design import AbstractRandomDesign
 from smac.runhistory.encoder.abstract_encoder import AbstractRunHistoryEncoder
@@ -37,7 +38,7 @@ class ConfigSelector:
     ----------
     retrain_after : int, defaults to 8
         How many configurations should be returned before the surrogate model is retrained.
-    retries : int, defaults to 8
+    max_new_config_tries : int, defaults to 8
         How often to retry receiving a new configuration before giving up.
     min_trials: int, defaults to 1
         How many samples are required to train the surrogate model. If budgets are involved,
@@ -51,7 +52,7 @@ def __init__(
         scenario: Scenario,
         *,
         retrain_after: int = 8,
-        retries: int = 16,
+        max_new_config_tries: int = 16,
         min_trials: int = 1,
     ) -> None:
         # Those are the configs sampled from the passed initial design
@@ -77,7 +78,7 @@ def __init__(
 
         # How often to retry receiving a new configuration
         # (counter increases if the received config was already returned before)
-        self._retries = retries
+        self._max_new_config_tries = max_new_config_tries
 
         # Processed configurations should be stored here; this is important to not return the same configuration twice
         self._processed_configs: list[Configuration] = []
@@ -103,7 +104,8 @@ def _set_components(
 
         self._initial_design_configs = initial_design.select_configurations()
         if len(self._initial_design_configs) == 0:
-            raise RuntimeError("SMAC needs initial configurations to work.")
+            # raise RuntimeError("SMAC needs initial configurations to work.")
+            logger.warning("No initial configurations were sampled.")
 
     @property
     def meta(self) -> dict[str, Any]:
@@ -111,7 +113,7 @@ def meta(self) -> dict[str, Any]:
         return {
             "name": self.__class__.__name__,
             "retrain_after": self._retrain_after,
-            "retries": self._retries,
+            "max_new_config_tries": self._max_new_config_tries,
             "min_trials": self._min_trials,
         }
 
@@ -144,7 +146,7 @@ def __iter__(self) -> Iterator[Configuration]:
         self._processed_configs = self._runhistory.get_configs()
 
         # We add more retries because there could be a case in which the processed configs are sampled again
-        self._retries += len(self._processed_configs)
+        self._max_new_config_tries += len(self._processed_configs)
 
         logger.debug("Search for the next configuration...")
         self._call_callbacks_on_start()
@@ -237,9 +239,32 @@ def __iter__(self) -> Iterator[Configuration]:
                     failed_counter += 1
 
                     # We exit the loop if we have tried to add the same configuration too often
-                    if failed_counter == self._retries:
-                        logger.warning(f"Could not return a new configuration after {self._retries} retries." "")
-                        return
+                    if failed_counter == self._max_new_config_tries:
+                        logger.warning(f"Could not return a new configuration after {failed_counter} retries.")
+                        break
+
+            # if we don't have enough configurations, we want to sample random configurations
+            if not retrain:
+                logger.warning(
+                    "Did not find enough configuration from the acquisition function. Sampling random configurations."
+                )
+                random_configs_retries = 0
+                while counter < self._retrain_after and random_configs_retries < self._max_new_config_tries:
+                    config = self._scenario.configspace.sample_configuration()
+                    if config not in self._processed_configs:
+                        counter += 1
+                        config.origin = "Random Search (max retries, no candidates)"
+                        self._processed_configs.append(config)
+                        self._call_callbacks_on_end(config)
+                        yield config
+                        retrain = counter == self._retrain_after
+                        self._call_callbacks_on_start()
+                    else:
+                        random_configs_retries += 1
+
+                    if random_configs_retries == self._max_new_config_tries:
+                        logger.warning(f"Could not return a new configuration after {random_configs_retries} retries.")
+                        raise ConfigurationSpaceExhaustedException()
 
     def _call_callbacks_on_start(self) -> None:
         for callback in self._callbacks:
diff --git a/smac/main/exceptions.py b/smac/main/exceptions.py
new file mode 100644
index 000000000..059e95d3f
--- /dev/null
+++ b/smac/main/exceptions.py
@@ -0,0 +1,7 @@
+class ConfigurationSpaceExhaustedException(Exception):
+    """Exception indicating that the configuration space is exhausted and no more configurations
+    can be sampled. This is usually raised when the maximum number of configurations has been
+    reached or when the configuration space has been fully explored.
+    """
+
+    pass
diff --git a/smac/main/smbo.py b/smac/main/smbo.py
index aa8330310..11c4d817b 100644
--- a/smac/main/smbo.py
+++ b/smac/main/smbo.py
@@ -40,8 +40,10 @@ class SMBO:
     ----------
     scenario : Scenario
         The scenario object, holding all environmental information.
-    runner : AbstractRunner
+    runner : AbstractRunner | None
         The runner (containing the target function) is called internally to judge a trial's performance.
+        In the rare case that ``optimize`` is never called and SMBO is operated with ``ask`` and ``tell`` only,
+        the runner is allowed to be None
     runhistory : Runhistory
         The runhistory stores all trials.
     intensifier : AbstractIntensifier
@@ -60,7 +62,7 @@ class SMBO:
     def __init__(
         self,
         scenario: Scenario,
-        runner: AbstractRunner,
+        runner: AbstractRunner | None,
         runhistory: RunHistory,
         intensifier: AbstractIntensifier,
         overwrite: bool = False,
@@ -290,6 +292,11 @@ def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configur
             callback.on_start(self)
 
         dask_data_to_scatter = {}
+        if self._runner is None:
+            raise ValueError(
+                "Runner is not set in SMBO. Likely issue is that the target_function was not set in the Facade."
+            )
+
         if isinstance(self._runner, DaskParallelRunner) and data_to_scatter is not None:
             dask_data_to_scatter = dict(data_to_scatter=self._runner._client.scatter(data_to_scatter, broadcast=True))
         elif data_to_scatter is not None:
@@ -435,6 +442,12 @@ def _add_results(self) -> None:
         """Adds results from the runner to the runhistory. Although most of the functionality could be written
         in the tell method, we separate it here to make it accessible for the automatic optimization procedure only.
         """
+        if self._runner is None:
+            raise ValueError(
+                "Runner is not set in SMBO. Likely issue is that the target_function was not set "
+                "in the Facade. So we cannot query the runner for results."
+            )
+
         # Check if there is any result
         for trial_info, trial_value in self._runner.iter_results():
             # Add the results of the run to the run history
@@ -578,6 +591,11 @@ def validate(
             The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
             averaged.
         """
+        if self._runner is None:
+            raise ValueError(
+                "Runner is not set in SMBO. Likely issue is that the target_function was not set in the Facade."
+            )
+
         if seed is None:
             seed = self._scenario.seed
 
@@ -591,11 +609,8 @@ def validate(
             if trial.instance is not None:
                 kwargs["instance"] = trial.instance
 
-            # TODO: Use submit run for faster evaluation
-            # self._runner.submit_trial(trial_info=trial)
-            _, cost, _, _, _ = self._runner.run(config, **kwargs)
-            costs += [cost]
-
+            self._runner.submit_trial(trial_info=trial)
+        costs = [trial_value.cost for _, trial_value in self._runner.iter_results()]
         np_costs = np.array(costs)
         return np.mean(np_costs, axis=0)
 
diff --git a/smac/model/random_forest/pyrfr/__init__.py b/smac/model/random_forest/pyrfr/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/smac/model/random_forest/pyrfr/random_forest_pyrfr.py b/smac/model/random_forest/pyrfr/random_forest_pyrfr.py
new file mode 100644
index 000000000..6efa21f5b
--- /dev/null
+++ b/smac/model/random_forest/pyrfr/random_forest_pyrfr.py
@@ -0,0 +1,311 @@
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+from ConfigSpace import ConfigurationSpace
+
+try:
+    from pyrfr import regression
+    from pyrfr.regression import binary_rss_forest as BinaryForest
+    from pyrfr.regression import default_data_container as DataContainer
+except ImportError as e:
+    import warnings
+
+    warnings.warn(
+        "You are using SMAC RandomForest with pyrfr."
+        "However, the pyrfr package is not installed. "
+        "Please install pyrfr with the following commands:"
+        "conda install gxx_linux-64 gcc_linux-64 swig"
+        "pip install pyrfr>=0.9.0"
+    )
+    raise e
+
+
+from smac.constants import N_TREES, VERY_SMALL_NUMBER
+from smac.model.random_forest import AbstractRandomForest
+
+__copyright__ = "Copyright 2022, automl.org"
+__license__ = "3-clause BSD"
+
+
+class PyrfrRandomForest(AbstractRandomForest):
+    """Random forest that takes instance features into account.
+
+    Parameters
+    ----------
+    n_trees : int, defaults to `N_TREES`
+        The number of trees in the random forest.
+    n_points_per_tree : int, defaults to -1
+        Number of points per tree. If the value is smaller than 0, the number of samples will be used.
+    ratio_features : float, defaults to 5.0 / 6.0
+        The ratio of features that are considered for splitting.
+    min_samples_split : int, defaults to 3
+        The minimum number of data points to perform a split.
+    min_samples_leaf : int, defaults to 3
+        The minimum number of data points in a leaf.
+    max_depth : int, defaults to 2**20
+        The maximum depth of a single tree.
+    eps_purity : float, defaults to 1e-8
+        The minimum difference between two target values to be considered.
+    max_nodes : int, defaults to 2**20
+        The maximum total number of nodes in a tree.
+    bootstrapping : bool, defaults to True
+        Enables bootstrapping.
+    log_y: bool, defaults to False
+        The y values (passed to this random forest) are expected to be log(y) transformed.
+        This will be considered during predicting.
+    instance_features : dict[str, list[int | float]] | None, defaults to None
+        Features (list of int or floats) of the instances (str). The features are incorporated into the X data,
+        on which the model is trained on.
+    pca_components : float, defaults to 7
+        Number of components to keep when using PCA to reduce dimensionality of instance features.
+    seed : int
+    """
+
+    def __init__(
+        self,
+        configspace: ConfigurationSpace,
+        n_trees: int = N_TREES,
+        n_points_per_tree: int = -1,
+        ratio_features: float = 5.0 / 6.0,
+        min_samples_split: int = 3,
+        min_samples_leaf: int = 3,
+        max_depth: int = 2**20,
+        eps_purity: float = 1e-8,
+        max_nodes: int = 2**20,
+        bootstrapping: bool = True,
+        log_y: bool = False,
+        instance_features: dict[str, list[int | float]] | None = None,
+        pca_components: int | None = 7,
+        seed: int = 0,
+    ) -> None:
+        super().__init__(
+            configspace=configspace,
+            instance_features=instance_features,
+            pca_components=pca_components,
+            seed=seed,
+        )
+
+        max_features = 0 if ratio_features > 1.0 else max(1, int(len(self._types) * ratio_features))
+
+        self._rf_opts = regression.forest_opts()
+        self._rf_opts.num_trees = n_trees
+        self._rf_opts.do_bootstrapping = bootstrapping
+        self._rf_opts.tree_opts.max_features = max_features
+        self._rf_opts.tree_opts.min_samples_to_split = min_samples_split
+        self._rf_opts.tree_opts.min_samples_in_leaf = min_samples_leaf
+        self._rf_opts.tree_opts.max_depth = max_depth
+        self._rf_opts.tree_opts.epsilon_purity = eps_purity
+        self._rf_opts.tree_opts.max_num_nodes = max_nodes
+        self._rf_opts.compute_law_of_total_variance = False
+        self._rf: BinaryForest | None = None
+        self._log_y = log_y
+
+        # Case to `int` incase we get an `np.integer` type
+        self._rng = regression.default_random_engine(int(seed))
+
+        self._n_trees = n_trees
+        self._n_points_per_tree = n_points_per_tree
+        self._ratio_features = ratio_features
+        self._min_samples_split = min_samples_split
+        self._min_samples_leaf = min_samples_leaf
+        self._max_depth = max_depth
+        self._eps_purity = eps_purity
+        self._max_nodes = max_nodes
+        self._bootstrapping = bootstrapping
+
+        # This list well be read out by save_iteration() in the solver
+        # self._hypers = [
+        #    n_trees,
+        #    max_nodes,
+        #    bootstrapping,
+        #    n_points_per_tree,
+        #    ratio_features,
+        #    min_samples_split,
+        #    min_samples_leaf,
+        #    max_depth,
+        #    eps_purity,
+        #    self._seed,
+        # ]
+
+    @property
+    def meta(self) -> dict[str, Any]:  # noqa: D102
+        meta = super().meta
+        meta.update(
+            {
+                "n_trees": self._n_trees,
+                "n_points_per_tree": self._n_points_per_tree,
+                "ratio_features": self._ratio_features,
+                "min_samples_split": self._min_samples_split,
+                "min_samples_leaf": self._min_samples_leaf,
+                "max_depth": self._max_depth,
+                "eps_purity": self._eps_purity,
+                "max_nodes": self._max_nodes,
+                "bootstrapping": self._bootstrapping,
+                "pca_components": self._pca_components,
+            }
+        )
+
+        return meta
+
+    def _train(self, X: np.ndarray, y: np.ndarray) -> PyrfrRandomForest:
+        X = self._impute_inactive(X)
+        y = y.flatten()
+
+        # self.X = X
+        # self.y = y.flatten()
+
+        if self._n_points_per_tree <= 0:
+            self._rf_opts.num_data_points_per_tree = X.shape[0]
+        else:
+            self._rf_opts.num_data_points_per_tree = self._n_points_per_tree
+
+        self._rf = regression.binary_rss_forest()
+        self._rf.options = self._rf_opts
+
+        data = self._init_data_container(X, y)
+        self._rf.fit(data, rng=self._rng)
+
+        return self
+
+    def _init_data_container(self, X: np.ndarray, y: np.ndarray) -> DataContainer:
+        """Fills a pyrfr default data container s.t. the forest knows categoricals and bounds for continous data.
+
+        Parameters
+        ----------
+        X : np.ndarray [#samples, #hyperparameter + #features]
+            Input data points.
+        Y : np.ndarray [#samples, #objectives]
+            The corresponding target values.
+
+        Returns
+        -------
+        data : DataContainer
+            The filled data container that pyrfr can interpret.
+        """
+        # Retrieve the types and the bounds from the ConfigSpace
+        data = regression.default_data_container(X.shape[1])
+
+        for i, (mn, mx) in enumerate(self._bounds):
+            if np.isnan(mx):
+                data.set_type_of_feature(i, mn)
+            else:
+                data.set_bounds_of_feature(i, mn, mx)
+
+        for row_X, row_y in zip(X, y):
+            data.add_data_point(row_X, row_y)
+
+        return data
+
+    def _predict(
+        self,
+        X: np.ndarray,
+        covariance_type: str | None = "diagonal",
+    ) -> tuple[np.ndarray, np.ndarray | None]:
+        if len(X.shape) != 2:
+            raise ValueError("Expected 2d array, got %dd array!" % len(X.shape))
+
+        if X.shape[1] != len(self._types):
+            raise ValueError("Rows in X should have %d entries but have %d!" % (len(self._types), X.shape[1]))
+
+        if covariance_type != "diagonal":
+            raise ValueError("`covariance_type` can only take `diagonal` for this model.")
+
+        assert self._rf is not None
+        X = self._impute_inactive(X)
+
+        if self._log_y:
+            all_preds = []
+            third_dimension = 0
+
+            # Gather data in a list of 2d arrays and get statistics about the required size of the 3d array
+            for row_X in X:
+                preds_per_tree = self._rf.all_leaf_values(row_X)
+                all_preds.append(preds_per_tree)
+                max_num_leaf_data = max(map(len, preds_per_tree))
+                third_dimension = max(max_num_leaf_data, third_dimension)
+
+            # Transform list of 2d arrays into a 3d array
+            preds_as_array = np.zeros((X.shape[0], self._rf_opts.num_trees, third_dimension)) * np.nan
+            for i, preds_per_tree in enumerate(all_preds):
+                for j, pred in enumerate(preds_per_tree):
+                    preds_as_array[i, j, : len(pred)] = pred
+
+            # Do all necessary computation with vectorized functions
+            preds_as_array = np.log(np.nanmean(np.exp(preds_as_array), axis=2) + VERY_SMALL_NUMBER)
+
+            # Compute the mean and the variance across the different trees
+            means = preds_as_array.mean(axis=1)
+            vars_ = preds_as_array.var(axis=1)
+        else:
+            means, vars_ = [], []
+            for row_X in X:
+                mean_, var = self._rf.predict_mean_var(row_X)
+                means.append(mean_)
+                vars_.append(var)
+
+        means = np.array(means)
+        vars_ = np.array(vars_)
+
+        return means.reshape((-1, 1)), vars_.reshape((-1, 1))
+
+    def predict_marginalized(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+        """Predicts mean and variance marginalized over all instances.
+
+        Note
+        ----
+        The method is random forest specific and follows the SMAC2 implementation. It requires
+        no distribution assumption to marginalize the uncertainty estimates.
+
+        Parameters
+        ----------
+        X : np.ndarray [#samples, #hyperparameter + #features]
+            Input data points.
+
+        Returns
+        -------
+        means : np.ndarray [#samples, 1]
+            The predictive mean.
+        vars : np.ndarray [#samples, 1]
+            The predictive variance.
+        """
+        if self._n_features == 0:
+            mean_, var = self.predict(X)
+            assert var is not None
+
+            var[var < self._var_threshold] = self._var_threshold
+            var[np.isnan(var)] = self._var_threshold
+
+            return mean_, var
+
+        assert self._instance_features is not None
+
+        if len(X.shape) != 2:
+            raise ValueError("Expected 2d array, got %dd array!" % len(X.shape))
+
+        if X.shape[1] != len(self._bounds):
+            raise ValueError("Rows in X should have %d entries but have %d!" % (len(self._bounds), X.shape[1]))
+
+        assert self._rf is not None
+        X = self._impute_inactive(X)
+
+        X_feat = list(self._instance_features.values())
+        dat_ = self._rf.predict_marginalized_over_instances_batch(X, X_feat, self._log_y)
+        dat_ = np.array(dat_)
+
+        # 3. compute statistics across trees
+        mean_ = dat_.mean(axis=1)
+        var = dat_.var(axis=1)
+
+        if var is None:
+            raise RuntimeError("The variance must not be none.")
+
+        var[var < self._var_threshold] = self._var_threshold
+
+        if len(mean_.shape) == 1:
+            mean_ = mean_.reshape((-1, 1))
+        if len(var.shape) == 1:
+            var = var.reshape((-1, 1))
+
+        return mean_, var
diff --git a/smac/model/random_forest/random_forest.py b/smac/model/random_forest/random_forest.py
index 0c5110240..e60ca088e 100644
--- a/smac/model/random_forest/random_forest.py
+++ b/smac/model/random_forest/random_forest.py
@@ -1,20 +1,640 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, Callable, Iterable, Tuple
+
+import threading
+from itertools import product
 
 import numpy as np
 from ConfigSpace import ConfigurationSpace
-from pyrfr import regression
-from pyrfr.regression import binary_rss_forest as BinaryForest
-from pyrfr.regression import default_data_container as DataContainer
-
-from smac.constants import N_TREES, VERY_SMALL_NUMBER
+from scipy.sparse import issparse
+from sklearn.ensemble._base import _partition_estimators
+from sklearn.ensemble._forest import ForestRegressor
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.tree._tree import DTYPE
+from sklearn.utils.parallel import Parallel, delayed
+from sklearn.utils.validation import check_is_fitted, validate_data
+
+from smac.constants import N_TREES
 from smac.model.random_forest import AbstractRandomForest
 
 __copyright__ = "Copyright 2025, Leibniz University Hanover, Institute of AI"
 __license__ = "3-clause BSD"
 
 
+def estimator_predict(predict: Callable, X: np.ndarray, results: np.ndarray, tree_idx: int) -> None:
+    """
+    Collect predictions from a single estimator.
+
+    Parameters
+    ----------
+    predict: Callable
+        the prediction function, in this scenario, it is the prediction function of each tree
+    X: np.ndarray [#samples, #hyperparameter]
+        input features
+    results: np.ndarray [#samples, #estimators]
+        output values from all the predictors
+    tree_idx:
+        estimator index
+    """
+    prediction = predict(X, check_input=False)
+    results[:, tree_idx] = prediction  # Populate the corresponding column
+
+
+def accumulate_predict_over_instances(
+    predict: Callable,
+    X: np.ndarray,
+    X_instance_feat: np.ndarray,
+    results: np.ndarray,
+    tree_idx: int,
+    n_instances: int,
+    lock: threading.Lock,
+) -> None:
+    """
+    Collect predictions from a single estimator. However, we sum the results from all instances
+
+    Parameters
+    ----------
+    predict: Callable
+        the prediction function, in this scenario, it is the prediction function of each tree
+    X: np.ndarray [#samples, #hyperparameter]
+        Input data points.
+    X_instance_feat: np.ndarray [#instance, #features],
+        Features (np.ndarray) of the instances (str). The features are incorporated into the X data,
+         on which the model is trained on.
+
+    results: np.ndarray [#samples, #estimators]
+        output values from all the predictors
+    tree_idx: int
+        tree index
+    n_instances: int
+        number of instance
+    lock: threading.Lock
+        threading lock
+    """
+    X_instance_feat_ = np.tile(X_instance_feat[None, :], (len(X), 1))
+    prediction = predict(np.concatenate([X, X_instance_feat_], axis=1), check_input=False)
+    with lock:
+        results[:, tree_idx,] += (
+            prediction / n_instances
+        )
+
+
+class EPMRandomForest(ForestRegressor):
+    def __init__(
+        self,
+        n_estimators: int = 100,
+        *,
+        log_y: bool = False,
+        cross_trees_variance: bool = False,
+        criterion: str = "squared_error",
+        splitter: str = "random",
+        max_depth: int | None = None,
+        min_samples_split: int = 2,
+        min_samples_leaf: int = 1,
+        min_weight_fraction_leaf: float = 0.0,
+        max_features: float = 1.0,
+        max_leaf_nodes: int | None = None,
+        min_impurity_decrease: float = 0.0,
+        bootstrap: bool = False,
+        oob_score: bool = False,
+        n_jobs: int | None = None,
+        random_state: int | None = None,
+        verbose: int = 0,
+        warm_start: bool = False,
+        ccp_alpha: float = 0.0,
+        max_samples: int | float | None = None,
+        monotonic_cst: Iterable | None = None,
+    ) -> None:
+        """A decision tree regressor.
+
+        Read more in the :ref:`User Guide <tree>`.
+
+        Parameters
+        ----------
+        n_estimators : int, default=100
+        The number of trees in the forest.
+
+        .. versionchanged:: 0.22
+           The default value of ``n_estimators`` changed from 10 to 100
+           in 0.22.
+
+        criterion : {"squared_error", "absolute_error", "friedman_mse", "poisson"}, \
+                default="squared_error"
+            The function to measure the quality of a split. Supported criteria
+            are "squared_error" for the mean squared error, which is equal to
+            variance reduction as feature selection criterion and minimizes the L2
+            loss using the mean of each terminal node, "friedman_mse", which uses
+            mean squared error with Friedman's improvement score for potential
+            splits, "absolute_error" for the mean absolute error, which minimizes
+            the L1 loss using the median of each terminal node, and "poisson" which
+            uses reduction in Poisson deviance to find splits.
+            Training using "absolute_error" is significantly slower
+            than when using "squared_error".
+
+            .. versionadded:: 0.18
+            Mean Absolute Error (MAE) criterion.
+
+            .. versionadded:: 1.0
+            Poisson criterion.
+
+        max_depth : int, default=None
+            The maximum depth of the tree. If None, then nodes are expanded until
+            all leaves are pure or until all leaves contain less than
+            min_samples_split samples.
+
+        min_samples_split : int or float, default=2
+            The minimum number of samples required to split an internal node:
+
+            - If int, then consider `min_samples_split` as the minimum number.
+            - If float, then `min_samples_split` is a fraction and
+            `ceil(min_samples_split * n_samples)` are the minimum
+            number of samples for each split.
+
+            .. versionchanged:: 0.18
+            Added float values for fractions.
+
+        min_samples_leaf : int or float, default=1
+            The minimum number of samples required to be at a leaf node.
+            A split point at any depth will only be considered if it leaves at
+            least ``min_samples_leaf`` training samples in each of the left and
+            right branches.  This may have the effect of smoothing the model,
+            especially in regression.
+
+            - If int, then consider `min_samples_leaf` as the minimum number.
+            - If float, then `min_samples_leaf` is a fraction and
+            `ceil(min_samples_leaf * n_samples)` are the minimum
+            number of samples for each node.
+
+            .. versionchanged:: 0.18
+            Added float values for fractions.
+
+        min_weight_fraction_leaf : float, default=0.0
+            The minimum weighted fraction of the sum total of weights (of all
+            the input samples) required to be at a leaf node. Samples have
+            equal weight when sample_weight is not provided.
+
+        max_features : {"sqrt", "log2", None}, int or float, default=1.0
+            The number of features to consider when looking for the best split:
+
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a fraction and
+            `max(1, int(max_features * n_features_in_))` features are considered at each
+            split.
+            - If "auto", then `max_features=n_features`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None or 1.0, then `max_features=n_features`.
+
+            .. note::
+                The default of 1.0 is equivalent to bagged trees and more
+                randomness can be achieved by setting smaller values, e.g. 0.3.
+
+            .. versionchanged:: 1.1
+                The default of `max_features` changed from `"auto"` to 1.0.
+
+            .. deprecated:: 1.1
+                The `"auto"` option was deprecated in 1.1 and will be removed
+                in 1.3.
+
+            Note: the search for a split does not stop until at least one
+            valid partition of the node samples is found, even if it requires to
+            effectively inspect more than ``max_features`` features.
+
+        max_leaf_nodes : int, default=None
+            Grow trees with ``max_leaf_nodes`` in best-first fashion.
+            Best nodes are defined as relative reduction in impurity.
+            If None then unlimited number of leaf nodes.
+
+        min_impurity_decrease : float, default=0.0
+            A node will be split if this split induces a decrease of the impurity
+            greater than or equal to this value.
+
+            The weighted impurity decrease equation is the following::
+
+                N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                    - N_t_L / N_t * left_impurity)
+
+            where ``N`` is the total number of samples, ``N_t`` is the number of
+            samples at the current node, ``N_t_L`` is the number of samples in the
+            left child, and ``N_t_R`` is the number of samples in the right child.
+
+            ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+            if ``sample_weight`` is passed.
+
+            .. versionadded:: 0.19
+
+        bootstrap : bool, default=True
+            Whether bootstrap samples are used when building trees. If False, the
+            whole dataset is used to build each tree.
+
+        oob_score : bool, default=False
+            Whether to use out-of-bag samples to estimate the generalization score.
+            Only available if bootstrap=True.
+
+        n_jobs : int, default=None
+            The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,
+            :meth:`decision_path` and :meth:`apply` are all parallelized over the
+            trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`
+            context. ``-1`` means using all processors. See :term:`Glossary
+            <n_jobs>` for more details.
+
+        random_state : int, RandomState instance or None, default=None
+            Controls both the randomness of the bootstrapping of the samples used
+            when building trees (if ``bootstrap=True``) and the sampling of the
+            features to consider when looking for the best split at each node
+            (if ``max_features < n_features``).
+            See :term:`Glossary <random_state>` for details.
+
+        verbose : int, default=0
+            Controls the verbosity when fitting and predicting.
+
+        warm_start : bool, default=False
+            When set to ``True``, reuse the solution of the previous call to fit
+            and add more estimators to the ensemble, otherwise, just fit a whole
+            new forest. See :term:`Glossary <warm_start>` and
+            :ref:`gradient_boosting_warm_start` for details.
+
+        ccp_alpha : non-negative float, default=0.0
+            Complexity parameter used for Minimal Cost-Complexity Pruning. The
+            subtree with the largest cost complexity that is smaller than
+            ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
+            :ref:`minimal_cost_complexity_pruning` for details.
+
+            .. versionadded:: 0.22
+
+        max_samples : int or float, default=None
+            If bootstrap is True, the number of samples to draw from X
+            to train each base estimator.
+
+            - If None (default), then draw `X.shape[0]` samples.
+            - If int, then draw `max_samples` samples.
+            - If float, then draw `max_samples * X.shape[0]` samples. Thus,
+            `max_samples` should be in the interval `(0.0, 1.0]`.
+
+            .. versionadded:: 0.22
+
+        criterion : {"squared_error", "friedman_mse", "absolute_error", \
+                "poisson"}, default="squared_error"
+            The function to measure the quality of a split. Supported criteria
+            are "squared_error" for the mean squared error, which is equal to
+            variance reduction as feature selection criterion and minimizes the L2
+            loss using the mean of each terminal node, "friedman_mse", which uses
+            mean squared error with Friedman's improvement score for potential
+            splits, "absolute_error" for the mean absolute error, which minimizes
+            the L1 loss using the median of each terminal node, and "poisson" which
+            uses reduction in Poisson deviance to find splits.
+
+            .. versionadded:: 0.18
+            Mean Absolute Error (MAE) criterion.
+
+            .. versionadded:: 0.24
+                Poisson deviance criterion.
+
+        splitter : {"best", "random"}, default="best"
+            The strategy used to choose the split at each node. Supported
+            strategies are "best" to choose the best split and "random" to choose
+            the best random split.
+
+        max_depth : int, default=None
+            The maximum depth of the tree. If None, then nodes are expanded until
+            all leaves are pure or until all leaves contain less than
+            min_samples_split samples.
+
+        min_samples_split : int or float, default=2
+            The minimum number of samples required to split an internal node:
+
+            - If int, then consider `min_samples_split` as the minimum number.
+            - If float, then `min_samples_split` is a fraction and
+            `ceil(min_samples_split * n_samples)` are the minimum
+            number of samples for each split.
+
+            .. versionchanged:: 0.18
+            Added float values for fractions.
+
+        min_samples_leaf : int or float, default=1
+            The minimum number of samples required to be at a leaf node.
+            A split point at any depth will only be considered if it leaves at
+            least ``min_samples_leaf`` training samples in each of the left and
+            right branches.  This may have the effect of smoothing the model,
+            especially in regression.
+
+            - If int, then consider `min_samples_leaf` as the minimum number.
+            - If float, then `min_samples_leaf` is a fraction and
+            `ceil(min_samples_leaf * n_samples)` are the minimum
+            number of samples for each node.
+
+            .. versionchanged:: 0.18
+            Added float values for fractions.
+
+        min_weight_fraction_leaf : float, default=0.0
+            The minimum weighted fraction of the sum total of weights (of all
+            the input samples) required to be at a leaf node. Samples have
+            equal weight when sample_weight is not provided.
+
+        max_features : int, float or {"auto", "sqrt", "log2"}, default=None
+            The number of features to consider when looking for the best split:
+
+            - If int, then consider `max_features` features at each split.
+            - If float, then `max_features` is a fraction and
+            `max(1, int(max_features * n_features_in_))` features are considered at each
+            split.
+            - If "auto", then `max_features=n_features`.
+            - If "sqrt", then `max_features=sqrt(n_features)`.
+            - If "log2", then `max_features=log2(n_features)`.
+            - If None, then `max_features=n_features`.
+
+            .. deprecated:: 1.1
+                The `"auto"` option was deprecated in 1.1 and will be removed
+                in 1.3.
+
+            Note: the search for a split does not stop until at least one
+            valid partition of the node samples is found, even if it requires to
+            effectively inspect more than ``max_features`` features.
+
+        random_state : int, RandomState instance or None, default=None
+            Controls the randomness of the estimator. The features are always
+            randomly permuted at each split, even if ``splitter`` is set to
+            ``"best"``. When ``max_features < n_features``, the algorithm will
+            select ``max_features`` at random at each split before finding the best
+            split among them. But the best found split may vary across different
+            runs, even if ``max_features=n_features``. That is the case, if the
+            improvement of the criterion is identical for several splits and one
+            split has to be selected at random. To obtain a deterministic behaviour
+            during fitting, ``random_state`` has to be fixed to an integer.
+            See :term:`Glossary <random_state>` for details.
+
+        max_leaf_nodes : int, default=None
+            Grow a tree with ``max_leaf_nodes`` in best-first fashion.
+            Best nodes are defined as relative reduction in impurity.
+            If None then unlimited number of leaf nodes.
+
+        min_impurity_decrease : float, default=0.0
+            A node will be split if this split induces a decrease of the impurity
+            greater than or equal to this value.
+
+            The weighted impurity decrease equation is the following::
+
+                N_t / N * (impurity - N_t_R / N_t * right_impurity
+                                    - N_t_L / N_t * left_impurity)
+
+            where ``N`` is the total number of samples, ``N_t`` is the number of
+            samples at the current node, ``N_t_L`` is the number of samples in the
+            left child, and ``N_t_R`` is the number of samples in the right child.
+
+            ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,
+            if ``sample_weight`` is passed.
+
+            .. versionadded:: 0.19
+
+        ccp_alpha : non-negative float, default=0.0
+            Complexity parameter used for Minimal Cost-Complexity Pruning. The
+            subtree with the largest cost complexity that is smaller than
+            ``ccp_alpha`` will be chosen. By default, no pruning is performed. See
+            :ref:`minimal_cost_complexity_pruning` for details.
+
+            .. versionadded:: 0.22
+        """
+        super().__init__(
+            DecisionTreeRegressor(),
+            n_estimators,
+            estimator_params=(
+                "criterion",
+                "max_depth",
+                "min_samples_split",
+                "min_samples_leaf",
+                "min_weight_fraction_leaf",
+                "max_features",
+                "max_leaf_nodes",
+                "min_impurity_decrease",
+                "random_state",
+                "ccp_alpha",
+                "monotonic_cst",
+            ),
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start,
+            max_samples=max_samples,
+        )
+        self.criterion = criterion
+        self.max_depth = max_depth
+        self.min_samples_split = min_samples_split
+        self.min_samples_leaf = min_samples_leaf
+        self.min_weight_fraction_leaf = min_weight_fraction_leaf
+        self.max_features = max_features
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_impurity_decrease = min_impurity_decrease
+        self.ccp_alpha = ccp_alpha
+        self.monotonic_cst = monotonic_cst
+        self.splitter = splitter
+        self.log_y = log_y
+        self.cross_trees_variance = cross_trees_variance
+
+    def fit(self, X: np.ndarray, y: np.ndarray, sample_weight=None) -> None:  # type: ignore
+        """
+        Build a forest of trees from the training set (X, y). In additional to the vanilla RF fitting process, we also
+        need to edit the estimators' parameters after the fitting process when self.log_y is True. This ensures that the
+        model performance consistently compared to the pyrfr version. To compute the means of all the values, we first
+        need to recover the log scaled values stored in the leave nodes to their raw scale and then compute the mean
+        over those values. This mean value will then transformed back to the log scale.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples. Internally, its dtype will be converted
+            to ``dtype=np.float32``. If a sparse matrix is provided, it will be
+            converted into a sparse ``csc_matrix``.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            The target values (class labels in classification, real numbers in
+            regression).
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. In the case of
+            classification, splits are also ignored if they would result in any
+            single class carrying a negative weight in either child node.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        assert sample_weight is None, "Sample weights are not supported"
+        super().fit(X=X, y=y, sample_weight=sample_weight)
+
+        self.trainX = X
+        self.trainY = y
+        if self.log_y:
+            for tree, samples_idx in zip(self.estimators_, self.estimators_samples_):
+                curX = X[samples_idx]
+                curY = y[samples_idx]
+                preds = tree.apply(curX)
+                for k in np.unique(preds):
+                    tree.tree_.value[k, 0, 0] = np.log(np.exp(curY[preds == k]).mean())
+
+    def all_trees_pred(self, X: np.ndarray) -> np.ndarray:
+        """
+        This function is used to parally predict the target X values. It is based on rf regressor from sklearn 1.6.1:
+        https://github.com/scikit-learn/scikit-learn/blob/99bf3d8e4eed5ba5db19a1869482a238b6223ffd/sklearn/ensemble/_forest.py#L1045
+
+        Parameters
+        ----------
+        X: np.ndarray [#samples, #features]
+            input feature X
+
+        Returns
+        -------
+        preds: np.ndarray [#samples, #estimators,#output]
+            Predictions from all trees
+
+        """
+        # check_is_fitted(self)
+        # Check data
+        X = self._validate_X_predict(X)
+
+        if X.ndim == 1:
+            X = X[None, :]
+
+        # Assign chunk of trees to jobs
+        n_jobs, _, _ = _partition_estimators(self.n_estimators, self.n_jobs)
+
+        # avoid storing the output of every estimator by summing them here
+        if self.n_outputs_ > 1:
+            preds = np.zeros((X.shape[0], self.n_estimators, self.n_outputs_), dtype=np.float64)
+        else:
+            preds = np.zeros((X.shape[0], self.n_estimators), dtype=np.float64)
+
+        # Parallel loop
+        Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")(
+            delayed(estimator_predict)(e.predict, X, preds, tree_idx) for tree_idx, e in enumerate(self.estimators_)
+        )
+        # This should be equivalent to the following implementation
+
+        # preds_ = np.zeros([len(X), self.n_estimators])
+        # for i, tree in enumerate(self.estimators_):
+        #    preds_[:, i] = tree.predict(X)
+        # assert np.allclose(preds, preds_)
+
+        return preds
+
+    def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Predict the mean and variance of X. Here mean and variances are the empirical mean and variance values from the
+        prediction results of the other trees.
+
+        Parameters
+        ----------
+        X: np.ndarray [#samples, #hyperparameter]
+            Input data points to be testsed.
+
+        Returns
+        -------
+            means: np.ndarray [#samples, 1]
+                predicted mean.
+            vars: np.ndarray [#samples, 1]
+                predicted variance.
+
+        """
+        preds = self.all_trees_pred(X)
+
+        means = preds.mean(axis=1)
+        vars = preds.var(axis=1)
+
+        return means.reshape(-1, 1), vars.reshape(-1, 1)
+
+    def predict_marginalized_over_instances_batch(self, X: np.ndarray, X_feat: np.ndarray, log_y: bool) -> np.ndarray:
+        """
+        Collects the predictions for each tree in the forest for multiple configurations over a set of instances.
+        Each configuration vector is combined with all the instance feature vectors. Based on the response values over
+        all these feature vectors the mean is computed. In the case of log transformation the response values are
+        decompressed before averaging.
+
+        Parameters
+        ----------
+        X: np.ndarray [#samples, #hyperparameter]
+            Input data points.
+        X_feat: np.ndarray [#instance, #features],
+            Features (np.ndarray) of the instances (str). The features are incorporated into the X data,
+            on which the model is trained on.
+        log_y: bool,
+            if log_y is applied to the predictions.
+
+        Returns
+        -------
+        preds: np.ndarray [#samples, #estimators]
+            predictions for each sample and trees. Each element in preds corresponds to the mean response values for
+            the target estimator and configuration accross all the instances.
+
+        """
+        X = self._validate_X_predict(X, ensure_2d=False)
+        X_feat = self._validate_X_predict(X_feat, ensure_2d=False)
+        assert X.shape[-1] + X_feat.shape[-1] == self.n_features_in_
+
+        n_instances = len(X_feat)
+
+        if X.ndim == 1:
+            X = X[None, :]
+
+        # Assign chunk of trees to jobs
+        n_jobs, _, _ = _partition_estimators(self.n_estimators * n_instances, self.n_jobs)
+
+        # avoid storing the output of every estimator by summing them here
+        if self.n_outputs_ > 1:
+            preds = np.zeros((X.shape[0], self.n_estimators, self.n_outputs_), dtype=np.float64)
+        else:
+            preds = np.zeros((X.shape[0], self.n_estimators), dtype=np.float64)
+        lock = threading.Lock()
+        # Parallel loop
+        Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")(
+            delayed(accumulate_predict_over_instances)(e.predict, X, x_feat, preds, tree_idx, n_instances, lock)
+            for (tree_idx, e), x_feat in product(enumerate(self.estimators_), X_feat)
+        )
+
+        return preds
+
+    def _validate_X_predict(self, X: np.ndarray, ensure_2d: bool = True) -> np.ndarray:
+        """
+        Validate X whenever one tries to predict, apply, predict_proba.
+        It is based on rf regressor from sklearn 1.6.1:
+         https://github.com/scikit-learn/scikit-learn/blob/99bf3d8e4eed5ba5db19a1869482a238b6223ffd/sklearn/ensemble/_forest.py#L629
+         However, we add another parameter to allow the model to ignore feature checking.
+         This is applied for the cases where we have both hyperpameter features and instance features, the two features
+         will only be concatenated within each tree estimation functions. Hence, there is no need to check if their
+         individual number of features fit the number of features set in the RF model.
+         We will check if the number fo features fit the model afterwards for predict_marginalized_over_instances_batch
+
+        Parameters
+        ----------
+        X: np.ndarray
+            input features to be validated
+        ensure_2d: bool
+            if we check if the X's size match the fitted estimators' features
+
+        """
+        check_is_fitted(self)
+        if self.estimators_[0]._support_missing_values(X):
+            ensure_all_finite = "allow-nan"
+        else:
+            ensure_all_finite = True  # type: ignore
+        X = validate_data(
+            self,
+            X,
+            dtype=DTYPE,
+            accept_sparse="csr",
+            reset=False,
+            ensure_all_finite=ensure_all_finite,
+            ensure_2d=ensure_2d,
+        )
+        if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc):  # type: ignore
+            raise ValueError("No support for np.int64 index based sparse matrices")
+        return X
+
+
 class RandomForest(AbstractRandomForest):
     """Random forest that takes instance features into account.
 
@@ -22,8 +642,9 @@ class RandomForest(AbstractRandomForest):
     ----------
     n_trees : int, defaults to `N_TREES`
         The number of trees in the random forest.
-    n_points_per_tree : int, defaults to -1
-        Number of points per tree. If the value is smaller than 0, the number of samples will be used.
+    max_samples : int | float | None, defaults to None
+        Number of points per tree. If the value is None, the number of samples will be used. Otherwise, use
+        max_samples (if it is int) or max(round(n_samples * max_samples), 1) (if it is float value)
     ratio_features : float, defaults to 5.0 / 6.0
         The ratio of features that are considered for splitting.
     min_samples_split : int, defaults to 3
@@ -32,9 +653,7 @@ class RandomForest(AbstractRandomForest):
         The minimum number of data points in a leaf.
     max_depth : int, defaults to 2**20
         The maximum depth of a single tree.
-    eps_purity : float, defaults to 1e-8
-        The minimum difference between two target values to be considered.
-    max_nodes : int, defaults to 2**20
+    max_leaf_nodes : int, defaults to 2**20
         The maximum total number of nodes in a tree.
     bootstrapping : bool, defaults to True
         Enables bootstrapping.
@@ -52,19 +671,29 @@ class RandomForest(AbstractRandomForest):
     def __init__(
         self,
         configspace: ConfigurationSpace,
-        n_trees: int = N_TREES,
-        n_points_per_tree: int = -1,
+        max_samples: int | float | None = None,
         ratio_features: float = 5.0 / 6.0,
-        min_samples_split: int = 3,
-        min_samples_leaf: int = 3,
-        max_depth: int = 2**20,
-        eps_purity: float = 1e-8,
-        max_nodes: int = 2**20,
-        bootstrapping: bool = True,
         log_y: bool = False,
         instance_features: dict[str, list[int | float]] | None = None,
         pca_components: int | None = 7,
         seed: int = 0,
+        n_trees: int = N_TREES,
+        cross_trees_variance: bool = False,
+        criterion: str = "squared_error",
+        splitter: str = "random",
+        max_depth: int = 2**20,
+        min_samples_split: int = 3,
+        min_samples_leaf: int = 3,
+        min_weight_fraction_leaf: float = 0.0,
+        max_leaf_nodes: int = 2**20,
+        min_impurity_decrease: float = 1e-8,
+        bootstrapping: bool = True,
+        oob_score: bool = False,
+        n_jobs: int | None = -1,
+        verbose: int = 0,
+        warm_start: bool = False,
+        ccp_alpha: float = 0.0,
+        monotonic_cst: Iterable | None = None,
     ) -> None:
         super().__init__(
             configspace=configspace,
@@ -75,63 +704,39 @@ def __init__(
 
         max_features = 0 if ratio_features > 1.0 else max(1, int(len(self._types) * ratio_features))
 
-        self._rf_opts = regression.forest_opts()
-        self._rf_opts.num_trees = n_trees
-        self._rf_opts.do_bootstrapping = bootstrapping
-        self._rf_opts.tree_opts.max_features = max_features
-        self._rf_opts.tree_opts.min_samples_to_split = min_samples_split
-        self._rf_opts.tree_opts.min_samples_in_leaf = min_samples_leaf
-        self._rf_opts.tree_opts.max_depth = max_depth
-        self._rf_opts.tree_opts.epsilon_purity = eps_purity
-        self._rf_opts.tree_opts.max_num_nodes = max_nodes
-        self._rf_opts.compute_law_of_total_variance = False
-        self._rf: BinaryForest | None = None
+        self._rf: EPMRandomForest | None = None
+        self._rng = np.random.default_rng(seed=seed)  # type: ignore
+
         self._log_y = log_y
 
-        # Case to `int` incase we get an `np.integer` type
-        self._rng = regression.default_random_engine(int(seed))
-
-        self._n_trees = n_trees
-        self._n_points_per_tree = n_points_per_tree
-        self._ratio_features = ratio_features
-        self._min_samples_split = min_samples_split
-        self._min_samples_leaf = min_samples_leaf
-        self._max_depth = max_depth
-        self._eps_purity = eps_purity
-        self._max_nodes = max_nodes
-        self._bootstrapping = bootstrapping
-
-        # This list well be read out by save_iteration() in the solver
-        # self._hypers = [
-        #    n_trees,
-        #    max_nodes,
-        #    bootstrapping,
-        #    n_points_per_tree,
-        #    ratio_features,
-        #    min_samples_split,
-        #    min_samples_leaf,
-        #    max_depth,
-        #    eps_purity,
-        #    self._seed,
-        # ]
+        self._rf_opts = {
+            "n_estimators": n_trees,
+            "cross_trees_variance": cross_trees_variance,
+            "criterion": criterion,
+            "splitter": splitter,
+            "max_depth": max_depth,
+            "min_samples_split": min_samples_split,
+            "min_samples_leaf": min_samples_leaf,
+            "min_weight_fraction_leaf": min_weight_fraction_leaf,
+            "max_leaf_nodes": max_leaf_nodes,
+            "min_impurity_decrease": min_impurity_decrease,
+            "bootstrap": bootstrapping,
+            "oob_score": oob_score,
+            "n_jobs": n_jobs,
+            "verbose": verbose,
+            "warm_start": warm_start,
+            "ccp_alpha": ccp_alpha,
+            "max_samples": max_samples,
+            "monotonic_cst": monotonic_cst,
+            "random_state": seed,
+            "max_features": max_features,
+            "log_y": log_y,
+        }
 
     @property
     def meta(self) -> dict[str, Any]:  # noqa: D102
         meta = super().meta
-        meta.update(
-            {
-                "n_trees": self._n_trees,
-                "n_points_per_tree": self._n_points_per_tree,
-                "ratio_features": self._ratio_features,
-                "min_samples_split": self._min_samples_split,
-                "min_samples_leaf": self._min_samples_leaf,
-                "max_depth": self._max_depth,
-                "eps_purity": self._eps_purity,
-                "max_nodes": self._max_nodes,
-                "bootstrapping": self._bootstrapping,
-                "pca_components": self._pca_components,
-            }
-        )
+        meta.update(self._rf_opts)
 
         return meta
 
@@ -139,51 +744,12 @@ def _train(self, X: np.ndarray, y: np.ndarray) -> RandomForest:
         X = self._impute_inactive(X)
         y = y.flatten()
 
-        # self.X = X
-        # self.y = y.flatten()
-
-        if self._n_points_per_tree <= 0:
-            self._rf_opts.num_data_points_per_tree = X.shape[0]
-        else:
-            self._rf_opts.num_data_points_per_tree = self._n_points_per_tree
-
-        self._rf = regression.binary_rss_forest()
-        self._rf.options = self._rf_opts
+        self._rf = EPMRandomForest(**self._rf_opts)  # type: ignore
 
-        data = self._init_data_container(X, y)
-        self._rf.fit(data, rng=self._rng)
+        self._rf.fit(X, y)
 
         return self
 
-    def _init_data_container(self, X: np.ndarray, y: np.ndarray) -> DataContainer:
-        """Fills a pyrfr default data container s.t. the forest knows categoricals and bounds for continous data.
-
-        Parameters
-        ----------
-        X : np.ndarray [#samples, #hyperparameter + #features]
-            Input data points.
-        Y : np.ndarray [#samples, #objectives]
-            The corresponding target values.
-
-        Returns
-        -------
-        data : DataContainer
-            The filled data container that pyrfr can interpret.
-        """
-        # Retrieve the types and the bounds from the ConfigSpace
-        data = regression.default_data_container(X.shape[1])
-
-        for i, (mn, mx) in enumerate(self._bounds):
-            if np.isnan(mx):
-                data.set_type_of_feature(i, mn)
-            else:
-                data.set_bounds_of_feature(i, mn, mx)
-
-        for row_X, row_y in zip(X, y):
-            data.add_data_point(row_X, row_y)
-
-        return data
-
     def _predict(
         self,
         X: np.ndarray,
@@ -200,45 +766,19 @@ def _predict(
 
         assert self._rf is not None
         X = self._impute_inactive(X)
-
-        if self._log_y:
-            all_preds = []
-            third_dimension = 0
-
-            # Gather data in a list of 2d arrays and get statistics about the required size of the 3d array
-            for row_X in X:
-                preds_per_tree = self._rf.all_leaf_values(row_X)
-                all_preds.append(preds_per_tree)
-                max_num_leaf_data = max(map(len, preds_per_tree))
-                third_dimension = max(max_num_leaf_data, third_dimension)
-
-            # Transform list of 2d arrays into a 3d array
-            preds_as_array = np.zeros((X.shape[0], self._rf_opts.num_trees, third_dimension)) * np.nan
-            for i, preds_per_tree in enumerate(all_preds):
-                for j, pred in enumerate(preds_per_tree):
-                    preds_as_array[i, j, : len(pred)] = pred
-
-            # Do all necessary computation with vectorized functions
-            preds_as_array = np.log(np.nanmean(np.exp(preds_as_array), axis=2) + VERY_SMALL_NUMBER)
-
-            # Compute the mean and the variance across the different trees
-            means = preds_as_array.mean(axis=1)
-            vars_ = preds_as_array.var(axis=1)
-        else:
-            means, vars_ = [], []
-            for row_X in X:
-                mean_, var = self._rf.predict_mean_var(row_X)
-                means.append(mean_)
-                vars_.append(var)
-
-        means = np.array(means)
-        vars_ = np.array(vars_)
-
+        means, vars_ = self._rf.predict(X)
         return means.reshape((-1, 1)), vars_.reshape((-1, 1))
 
     def predict_marginalized(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         """Predicts mean and variance marginalized over all instances.
 
+        Under the hood: Collects the predictions for each tree in the forest
+        for multiple configurations over a set of instances. Each configuration
+        vector is combined with all the instance feature vectors. Based on the
+        response values over all these feature vectors the mean is computed.
+        In the case of log transformation the response values are decompressed
+        before averaging.
+
         Note
         ----
         The method is random forest specific and follows the SMAC2 implementation. It requires
@@ -276,7 +816,7 @@ def predict_marginalized(self, X: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
         assert self._rf is not None
         X = self._impute_inactive(X)
 
-        X_feat = list(self._instance_features.values())
+        X_feat = np.asarray(list(self._instance_features.values()))
         dat_ = self._rf.predict_marginalized_over_instances_batch(X, X_feat, self._log_y)
         dat_ = np.array(dat_)
 
diff --git a/smac/multi_objective/parego.py b/smac/multi_objective/parego.py
index 4345d1532..59cd5eacd 100644
--- a/smac/multi_objective/parego.py
+++ b/smac/multi_objective/parego.py
@@ -11,7 +11,7 @@
 
 
 class ParEGO(AbstractMultiObjectiveAlgorithm):
-    """ParEGO implementation based on https://www.cs.bham.ac.uk/~jdk/UKCI-2015.pdf.
+    """ParEGO implementation based on https://ieeexplore.ieee.org/abstract/document/1583627.
 
     Parameters
     ----------
diff --git a/smac/runhistory/runhistory.py b/smac/runhistory/runhistory.py
index 6989b6aba..6f648a4a0 100644
--- a/smac/runhistory/runhistory.py
+++ b/smac/runhistory/runhistory.py
@@ -60,9 +60,11 @@ def __init__(
         self,
         multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None,
         overwrite_existing_trials: bool = False,
+        n_objectives: int = -1,
     ) -> None:
         self._multi_objective_algorithm = multi_objective_algorithm
         self._overwrite_existing_trials = overwrite_existing_trials
+        self._n_objectives = n_objectives
         self.reset()
 
     @property
@@ -133,9 +135,6 @@ def reset(self) -> None:
         # and is necessary for computing the moving average.
         self._num_trials_per_config: dict[int, int] = {}
 
-        # Store whether a datapoint is "external", which means it was read from
-        # a JSON file. Can be chosen to not be written to disk.
-        self._n_objectives: int = -1
         self._objective_bounds: list[tuple[float, float]] = []
 
     def __contains__(self, k: object) -> bool:
@@ -335,7 +334,7 @@ def add_running_trial(self, trial: TrialInfo) -> None:
         """
         self.add(
             config=trial.config,
-            cost=float(MAXINT),
+            cost=float(MAXINT) if self._n_objectives <= 1 else [float(MAXINT)] * self._n_objectives,
             time=0.0,
             cpu_time=0.0,
             status=StatusType.RUNNING,
diff --git a/tests/fixtures/config_selector.py b/tests/fixtures/config_selector.py
index 2ad2bece2..baff7680f 100644
--- a/tests/fixtures/config_selector.py
+++ b/tests/fixtures/config_selector.py
@@ -20,7 +20,7 @@ def __init__(
         super().__init__(
             scenario,
             retrain_after=retrain_after,
-            retries=retries,
+            max_new_config_tries=retries,
         )
 
         initial_design = RandomInitialDesign(scenario, n_configs=n_initial_configs)
diff --git a/tests/test_intensifier/test_hyperband_utils.py b/tests/test_intensifier/test_hyperband_utils.py
index 33179d0ad..0b0392b16 100644
--- a/tests/test_intensifier/test_hyperband_utils.py
+++ b/tests/test_intensifier/test_hyperband_utils.py
@@ -31,7 +31,7 @@ def test_determine_HB():
         4: [81],
     }
     expected_trials_used = 206
-    expected_budget_used = 547
+    expected_budget_used = 1902
     expected_number_of_brackets = 5
 
     assert result["max_iterations"] == expected_max_iterations
@@ -52,7 +52,7 @@ def test_determine_hyperband_for_multifidelity():
         total_budget=total_budget, min_budget=min_budget, max_budget=max_budget, eta=eta
     )
 
-    expected_n_trials = 206 + 137  # 206 trials for one full round, and additional trials for the remaining budget
+    expected_n_trials = 188  # Budget not enough for one full round (would nee 1902 as total budget)
 
     assert result["n_trials"] == expected_n_trials
     assert result["total_budget"] == total_budget
@@ -71,4 +71,9 @@ def test_get_n_trials_for_hyperband_multifidelity():
         total_budget=total_budget, min_budget=min_budget, max_budget=max_budget, eta=eta
     )
 
-    assert n_trials == (206 + 137)
+    assert n_trials == 188
+
+if __name__=="__main__":
+    test_determine_HB()
+    test_determine_hyperband_for_multifidelity()
+    test_get_n_trials_for_hyperband_multifidelity()
\ No newline at end of file
diff --git a/tests/test_main/test_config_selector.py b/tests/test_main/test_config_selector.py
new file mode 100644
index 000000000..6073c1813
--- /dev/null
+++ b/tests/test_main/test_config_selector.py
@@ -0,0 +1,27 @@
+import pytest
+
+from smac.main.exceptions import ConfigurationSpaceExhaustedException
+from ConfigSpace import ConfigurationSpace, Categorical
+from smac import HyperparameterOptimizationFacade, Scenario
+
+
+def test_exhausted_configspace():
+    cs = ConfigurationSpace()
+    cs.add(Categorical("x", [1, 2, 3]))
+
+    def objective_function(x, seed):
+        return x["x"] ** 2
+    
+    scenario = Scenario(
+        configspace=cs,
+        n_trials=10,
+    )
+
+    smac = HyperparameterOptimizationFacade(
+        scenario,
+        objective_function,
+        overwrite=True,
+    )
+
+    with pytest.raises(ConfigurationSpaceExhaustedException):
+        smac.optimize()
diff --git a/tests/test_model/test_rf.py b/tests/test_model/test_rf.py
index 65e6478ff..789e8fb40 100644
--- a/tests/test_model/test_rf.py
+++ b/tests/test_model/test_rf.py
@@ -9,6 +9,7 @@
     UniformIntegerHyperparameter,
 )
 
+from smac import constants
 from smac.model.random_forest.random_forest import RandomForest
 from smac.utils.configspace import convert_configurations_to_array
 
@@ -125,6 +126,24 @@ def test_predict_marginalized():
     assert means.shape == (20, 1)
     assert variances.shape == (20, 1)
 
+    # now we need tp ensure that the prediction results is the same as we do that prediction individually
+    n_estimators = model._rf_opts["n_estimators"]
+    n_features = len(F)
+    n_data = len(X)
+
+    all_features = np.asarray(list(F.values()))
+    all_preds = np.empty(
+        [n_data, n_estimators, n_features]
+    )
+    for i_tree in range(n_estimators):
+        for i_feat in range(n_features):
+            for i_data in range(n_data):
+                x_input = np.concatenate([X[i_data], all_features[i_feat]])[None, :]
+                all_preds[[i_data], i_tree, i_feat] = model._rf.estimators_[i_tree].predict(x_input)
+    pred_marginalized_over_instance = np.mean(all_preds, -1)
+    assert np.allclose(np.mean(pred_marginalized_over_instance, axis=-1, keepdims=True), means)
+    assert np.allclose(np.var(pred_marginalized_over_instance, axis=-1, keepdims=True), variances)
+
 
 def test_predict_marginalized_mocked():
     rs = np.random.RandomState(1)
@@ -288,3 +307,72 @@ def test_impute_inactive_hyperparameters():
         elif line[0] == 2:
             assert line[1] == 2
             assert line[2] == -1
+
+
+def test_rf_with_log_y():
+    X = np.array(
+        [
+            [0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0],
+            [0.0, 1.0, 0.0],
+            [0.0, 1.0, 1.0],
+            [1.0, 0.0, 0.0],
+            [1.0, 0.0, 1.0],
+            [1.0, 1.0, 0.0],
+            [1.0, 1.0, 1.0],
+        ],
+        dtype=np.float64,
+    )
+    y = np.array([[0.1], [0.2], [9], [9.2], [100.0], [100.2], [109.0], [109.2]], dtype=np.float64)
+    model1 = RandomForest(
+        configspace=_get_cs(3),
+        instance_features=None,
+        seed=12345,
+        ratio_features=1.0,
+        log_y=True
+    )
+    model1.train(np.vstack((X, X, X, X, X, X, X, X)), np.vstack((y, y, y, y, y, y, y, y)))
+    X_test = np.random.rand(10, 3)
+
+    mean1, var1 = model1.predict(X_test)
+    #for y_i, y_hat_i in zip(y.reshape((1, -1)).flatten(), y_hat.reshape((1, -1)).flatten()):
+    #    assert pytest.approx(y_i, 0.1) == y_hat_i
+
+    # The following should be equivalent to the log_y version
+
+    model2 = RandomForest(
+        configspace=_get_cs(3),
+        instance_features=None,
+        seed=12345,
+        ratio_features=1.0,
+        log_y=False
+    )
+    all_preds = []
+    third_dimension = 0
+
+    model2.train(np.vstack((X, X, X, X, X, X, X, X)), np.vstack((y, y, y, y, y, y, y, y)))
+
+    # Gather data in a list of 2d arrays and get statistics about the required size of the 3d array
+    for row_X in X_test:
+        preds_per_tree = [estimator.predict(row_X[None, :]) for estimator in model2._rf.estimators_]
+        #preds_per_tree = model_no_logy._rf.all_leaf_values(row_X)
+        all_preds.append(preds_per_tree)
+        max_num_leaf_data = max(map(len, preds_per_tree))
+        third_dimension = max(max_num_leaf_data, third_dimension)
+
+    # Transform list of 2d arrays into a 3d array
+    preds_as_array = np.zeros((X_test.shape[0], model2._rf_opts['n_estimators'], third_dimension)) * np.nan
+    for i, preds_per_tree in enumerate(all_preds):
+        for j, pred in enumerate(preds_per_tree):
+            preds_as_array[i, j, : len(pred)] = pred
+
+    # Do all necessary computation with vectorized functions
+    preds_as_array = np.log(np.nanmean(np.exp(preds_as_array), axis=2) + constants.VERY_SMALL_NUMBER)
+
+    # Compute the mean and the variance across the different trees
+    mean2 = preds_as_array.mean(axis=1, keepdims=True)
+    var2 = preds_as_array.var(axis=1, keepdims=True)
+
+    assert np.allclose(mean1, mean2)
+    assert np.allclose(var1, var2)
+
diff --git a/tests/test_multi_objective/test_combined_function.py b/tests/test_multi_objective/test_combined_function.py
index fdf7f9713..1e6707e89 100644
--- a/tests/test_multi_objective/test_combined_function.py
+++ b/tests/test_multi_objective/test_combined_function.py
@@ -74,38 +74,40 @@ def test_mean_aggregation(facade, make_scenario, configspace):
     RETRAIN_AFTER = 8
 
     scenario: Scenario = make_scenario(configspace, use_multi_objective=True, n_trials=N_TRIALS)
-    # TODO: Check whether different weighting affects the sampled configurations.
-    multi_objective_algorithm = WrapStrategy(MeanAggregationStrategy, scenario=scenario)
-    intensifier = Intensifier(scenario, max_config_calls=1, max_incumbents=10)
-    config_selector = ConfigSelector(scenario, retrain_after=RETRAIN_AFTER)
-    initial_design = RandomInitialDesign(scenario, n_configs=1)
-
-    smac = facade(
-        scenario=scenario,
-        target_function=tae,
-        multi_objective_algorithm=multi_objective_algorithm,
-        intensifier=intensifier,
-        config_selector=config_selector,
-        initial_design=initial_design,
-        overwrite=True,
-    )
-    incumbents = smac.optimize()
-
-    # We sort the incumbents by their x values and then make sure that the current y is
-    # smaller than the previous one.
-    sorted_incumbents = []
-    for incumbent in incumbents:
-        x, y = func(incumbent["x"])
-        sorted_incumbents.append((x, y))
-
-    sorted_incumbents = sorted(sorted_incumbents, key=lambda x: x[0])
-    previous_y = np.inf
-    for x, y in sorted_incumbents:
-        assert y <= previous_y
-        previous_y = y
-
-    # We expect N_TRIALS/RETRAIN_AFTER updates
-    assert multi_objective_algorithm._n_calls_update_on_iteration_start == int(N_TRIALS / RETRAIN_AFTER)
+        # TODO: Check whether different weighting affects the sampled configurations.
+    weights = [[0.1,0.9], [0.5,0.5], [0.8,0.2], [1.0,0.0], [0.0,1.0], None]
+    for weight_pair in weights:
+        multi_objective_algorithm = WrapStrategy(MeanAggregationStrategy, objective_weights=weight_pair, scenario=scenario)
+        intensifier = Intensifier(scenario, max_config_calls=1, max_incumbents=10)
+        config_selector = ConfigSelector(scenario, retrain_after=RETRAIN_AFTER)
+        initial_design = RandomInitialDesign(scenario, n_configs=1)
+
+        smac = facade(
+            scenario=scenario,
+            target_function=tae,
+            multi_objective_algorithm=multi_objective_algorithm,
+            intensifier=intensifier,
+            config_selector=config_selector,
+            initial_design=initial_design,
+            overwrite=True,
+        )
+        incumbents = smac.optimize()
+
+        # We sort the incumbents by their x values and then make sure that the current y is
+        # smaller than the previous one.
+        sorted_incumbents = []
+        for incumbent in incumbents:
+            x, y = func(incumbent["x"])
+            sorted_incumbents.append((x, y))
+
+        sorted_incumbents = sorted(sorted_incumbents, key=lambda x: x[0])
+        previous_y = np.inf
+        for x, y in sorted_incumbents:
+            assert y <= previous_y
+            previous_y = y
+
+        # We expect N_TRIALS/RETRAIN_AFTER updates
+        assert multi_objective_algorithm._n_calls_update_on_iteration_start == int(N_TRIALS / RETRAIN_AFTER)
 
 
 @pytest.mark.parametrize("facade", FACADES)