diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 5f03e854f6..d109fc6bc4 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -1,89 +1,27 @@
+# This workflow is just to test that the docs build successfully.
 name: docs
-
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 on:
-  # Manual trigger option in GitHub
-  # This won't push to GitHub pages where docs are hosted due
-  # to the guarded if statement in those steps
   workflow_dispatch:
-
-  # Trigger on push to these branches
   push:
     branches:
       - main
-      - development
-
-  # Trigger on open/push to a PR targeting one of these branches
   pull_request:
-    types:
-      - opened
-      - synchronize
-      - reopened
-      - ready_for_review
     branches:
       - main
-      - development
-
-env:
-  name: SMAC3
-
+permissions:
+  contents: write
 jobs:
-  build-and-deploy:
-    if: ${{ !github.event.pull_request.draft }}
+  build:
     runs-on: ubuntu-latest
     steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: "3.10"
-
-    - name: Install dependencies
-      id: install
-      run: |
-        pip install ".[gpytorch,dev]"
-
-        # Getting the version
-        SMAC_VERSION=$(python -c "import smac; print('v' + str(smac.version));")
-
-        # Make it a global variable
-        echo "SMAC_VERSION=$SMAC_VERSION" >> $GITHUB_ENV
-
-    - name: Make docs
-      run: |
-        make clean
-        make docs
-
-    - name: Pull latest gh-pages
-      if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
-      run: |
-        cd ..
-        git clone https://github.com/${{ github.repository }}.git --branch gh-pages --single-branch gh-pages
-
-    - name: Copy new docs into gh-pages
-      if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
-      run: |
-        branch_name=${GITHUB_REF##*/}
-        cd ../gh-pages
-        rm -rf $branch_name
-        cp -r ../${{ env.name }}/docs/build/html $branch_name
-
-        # we also copy the current SMAC_VERSION
-        rm -rf $SMAC_VERSION
-        cp -r ../${{ env.name }}/docs/build/html $SMAC_VERSION
-
-
-    - name: Push to gh-pages
-      if: (contains(github.ref, 'develop') || contains(github.ref, 'main')) && github.event_name == 'push'
-      run: |
-        last_commit=$(git log --pretty=format:"%an: %s")
-        cd ../gh-pages
-        branch_name=${GITHUB_REF##*/}
-        git add $branch_name/
-        git add $SMAC_VERSION/
-        git config --global user.name 'Github Actions'
-        git config --global user.email 'not@mail.com'
-        git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
-        git commit -am "$last_commit"
-        git push
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: "Install dependancies"
+        run: python -m pip install -e ".[dev]"
+      - name: "Build Docs"
+        run: mkdocs build --clean --strict
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 679812827c..f72077d614 100644
--- a/.gitignore
+++ b/.gitignore
@@ -147,4 +147,7 @@ src
 
 # Pycharm
 .idea
-.vscode
\ No newline at end of file
+.vscode
+
+projects
+_api
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c20493bef..db009bf97d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+# 2.3.0
+
+## Documentation
+- Update windows install guide (#952)
+- Correct intensifier for Algorithm Configuration Facade (#1162, #1165)
+- Migrate sphinx docs to mkdocs (#1155)
+
 # 2.2.1
 
 ## Improvements
@@ -688,7 +695,7 @@ Since many urgent features were already taken care of in 0.14.0, this release ma
   conditions when starting multiple runs on a cluster.
 * MAINT #209: adds the seed or a pseudo-seed to the output directory name for
   better identifiability of the output directories.
-* FIX #216: replace broken call to in EIPS acqusition function.
+* FIX #216: replace broken call to in EIPS acquisition function.
 * MAINT: use codecov.io instead of coveralls.io.
 * MAINT: increase minimal required version of the ConfigSpace package to 0.3.2.
 
diff --git a/Makefile b/Makefile
index 145d1bb14c..aade064b33 100644
--- a/Makefile
+++ b/Makefile
@@ -86,17 +86,30 @@ format: format-black format-isort
 tests:
 	$(PYTEST) ${TESTS_DIR}
 
+# Launch the docs, executing code blocks and examples
+docs-full:
+	$(PYTHON) -m webbrowser -t "http://127.0.0.1:8000/"
+	SMAC_DOC_RENDER_EXAMPLES=all \
+		SMAC_DOCS_OFFLINE=true \
+		SMAC_EXEC_DOCS=true \
+		mkdocs serve --watch-theme
+
+# Launch the docs and execute code blocks
+docs-code:
+	$(PYTHON) -m webbrowser -t "http://127.0.0.1:8000/"
+	SMAC_DOCS_OFFLINE=true \
+		SMAC_EXEC_DOCS=true \
+		SMAC_DOC_RENDER_EXAMPLES=false \
+		mkdocs serve --watch-theme
+
+# Launch the docs but dont run code examples
 docs:
-	$(MAKE) -C ${DOCDIR} docs
-	@echo
-	@echo "View docs at:"
-	@echo ${INDEX_HTML}
-
-examples:
-	$(MAKE) -C ${DOCDIR} examples
-	@echo
-	@echo "View docs at:"
-	@echo ${INDEX_HTML}
+	$(PYTHON) -m webbrowser -t "http://127.0.0.1:8000/"
+	SMAC_DOCS_OFFLINE=true \
+		SMAC_EXEC_DOCS=false \
+		SMAC_DOC_RENDER_EXAMPLES=false \
+		mkdocs serve --watch-theme
+	# https://github.com/pawamoy/markdown-exec/issues/19
 
 # Build a distribution in ./dist
 build:
@@ -104,9 +117,6 @@ build:
 
 clean: clean-build clean-docs clean-data
 
-clean-docs:
-	$(MAKE) -C ${DOCDIR} clean
-
 clean-build:
 	$(PYTHON) setup.py clean
 	rm -rf ${DIST}
diff --git a/docs/10_experimental.rst b/docs/10_experimental.md
similarity index 52%
rename from docs/10_experimental.rst
rename to docs/10_experimental.md
index a075498c4a..984586505b 100644
--- a/docs/10_experimental.rst
+++ b/docs/10_experimental.md
@@ -1,12 +1,10 @@
-Experimental
-============
+# Experimental
 
-.. warning::
+!!! warning
     This part is experimental and might not work in each case. If you would like to suggest any changes, please let us know. 
 
 
-Installation in Windows via WSL
-------------------------------
+## Installation in Windows via WSL
 
 SMAC can be installed in a WSL (Windows Subsystem for Linux) under Windows.
 
@@ -21,28 +19,31 @@ Download an Anaconda Linux version to drive D under Windows, e.g. D:\\Anaconda3-
     
 In the WSL, Windows resources are mounted under /mnt:
 
-.. code:: bash
-
-    cd /mnt/d
-    bash Anaconda3-2023.03-1-Linux-x86_64
+```bash
+cd /mnt/d
+bash Anaconda3-2023.03-1-Linux-x86_64
+```
 
 Enter this command to create the environment variable:
 
-.. code:: bash
-
-    export PATH="$PATH:/home/${USER}/anaconda3/bin
+```bash
+export PATH="$PATH:/home/${USER}/anaconda3/bin
+```
 
-Input 'python' to check if the installation was successful.
+Input `python` to check if the installation was successful.
 
 **3) Install SMAC**
 
 Change to your home folder and install the general software there:
 
-.. code:: bash
-
-    cd /home/${USER}
-    sudo apt-get install software-properties-common
-    sudo apt-get update
-    sudo apt-get install build-essential swig
-    conda install gxx_linux-64 gcc_linux-64 swig
-    curl https://raw.githubusercontent.com/automl/smac3/master/requirements.txt | xargs -n 1 -L 1 pip install
+```bash
+cd /home/${USER}
+sudo apt-get install software-properties-common
+sudo apt-get update
+sudo apt-get install build-essential swig
+conda install gxx_linux-64 gcc_linux-64 swig
+curl https://raw.githubusercontent.com/automl/smac3/master/requirements.txt | xargs -n 1 -L 1 pip install
+```
+
+## Installation in Pure Windows
+Please refer to this [issue](https://github.com/automl/SMAC3/issues/952) for installation instructions for SMAC3-1.4 and SMAC3-2.x.
\ No newline at end of file
diff --git a/docs/1_installation.md b/docs/1_installation.md
new file mode 100644
index 0000000000..d886d4b494
--- /dev/null
+++ b/docs/1_installation.md
@@ -0,0 +1,72 @@
+# Installation
+
+## Requirements
+
+SMAC is written in python3 and therefore requires an environment with python>=3.8.
+Furthermore, the Random Forest used in SMAC requires SWIG as a build dependency.
+
+!!! info 
+
+    SMAC is tested on Linux and Mac machines with python >=3.8.
+
+
+## SetUp
+
+We recommend using Anaconda to create and activate an environment:
+
+```bash
+conda create -n SMAC python=3.10
+conda activate SMAC
+```
+
+Now install swig either on the system level e.g. using the following command for Linux:
+```bash
+apt-get install swig
+```
+
+Or install swig inside of an already created conda environment using:
+
+```bash
+conda install gxx_linux-64 gcc_linux-64 swig
+```
+
+## Install SMAC
+You can install SMAC either using PyPI or Conda-forge.
+
+### PYPI
+To install SMAC with PyPI call:
+
+```bash
+pip install smac
+```
+
+Or alternatively, clone the environment from GitHub directly:
+
+```bash
+git clone https://github.com/automl/SMAC3.git && cd SMAC3
+pip install -e ".[dev]"
+```
+
+### Conda-forge
+
+Installing SMAC from the `conda-forge` channel can be achieved by adding `conda-forge` to your channels with:
+
+```bash
+conda config --add channels conda-forge
+conda config --set channel_priority strict
+```
+
+You must have `conda >= 4.9` installed. To update conda or check your current conda version, please follow the instructions from [the official anaconda documentation](https://docs.anaconda.com/anaconda/install/update-version/). Once the `conda-forge` channel has been enabled, SMAC can be installed with:
+
+```bash
+conda install smac
+```
+
+Read [SMAC feedstock](https://github.com/conda-forge/smac-feedstock) for more details.
+
+## Windows (native or via WSL, experimental)
+
+SMAC can be installed under Windows in a WSL (Windows Subsystem for Linux). 
+You can find an instruction on how to do this here: [Experimental](./10_experimental.md).
+However, this is experimental and might not work in each case. 
+If you would like to suggest any changes, please let us know. 
diff --git a/docs/1_installation.rst b/docs/1_installation.rst
deleted file mode 100644
index a835d9cf19..0000000000
--- a/docs/1_installation.rst
+++ /dev/null
@@ -1,78 +0,0 @@
-Installation
-============
-
-Requirements
-~~~~~~~~~~~~
-
-SMAC is written in python3 and therefore requires an environment with python>=3.8.
-Furthermore, the Random Forest used in SMAC requires SWIG as a build dependency. Install it either in your
-environment or on your system directly. The command to install swig on linux machines is the following:
-
-.. code-block::
-
-    apt-get install swig
-
-
-SMAC is tested on Linux and Mac machines with python 3.8, 3.9 and 3.10.
-
-
-Anaconda
-~~~~~~~~
-
-We recommend using Anaconda to create and activate an environment:
-
-.. code-block::
-
-    conda create -n SMAC python=3.10
-    conda activate SMAC
-
-
-If you haven't installed swig yet, you can install it directly inside the Anaconda environment:
-
-.. code-block::
-
-    conda install gxx_linux-64 gcc_linux-64 swig
-
-
-Now install SMAC via PyPI:
-
-.. code-block::
-
-    pip install smac
-
-
-Or alternatively, clone the environment from GitHub directly:
-
-.. code-block::
-
-    git clone https://github.com/automl/SMAC3.git && cd SMAC3
-    pip install -e .[dev]
-
-
-Conda-forge
-~~~~~~~~~~~
-
-Installing SMAC from the `conda-forge` channel can be achieved by adding `conda-forge` to your channels with:
-
-.. code:: bash
-
-    conda config --add channels conda-forge
-    conda config --set channel_priority strict
-
-
-You must have `conda >= 4.9` installed. To update conda or check your current conda version, please follow the instructions from `the official anaconda documentation <https://docs.anaconda.com/anaconda/install/update-version/>`_ . Once the `conda-forge` channel has been enabled, SMAC can be installed with:
-
-.. code:: bash
-
-    conda install smac
-    
-
-Read `SMAC feedstock <https://github.com/conda-forge/smac-feedstock>`_ for more details.
-
-Windows via WSL (Experimental)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-SMAC can be installed under Windows in a WSL (Windows Subsystem for Linux). 
-You can find an instruction on how to do this here: :ref:`Experimental<Experimental>`.
-However, this is experimental and might not work in each case. 
-If you would like to suggest any changes, please let us know. 
diff --git a/docs/2_package_overview.md b/docs/2_package_overview.md
new file mode 100644
index 0000000000..dcd7c0060c
--- /dev/null
+++ b/docs/2_package_overview.md
@@ -0,0 +1,48 @@
+# Package Overview
+
+SMAC supports you in determining well-performing hyperparameter configurations for your algorithms. By being a robust and flexible framework for [Bayesian Optimization][BayesianOptimization], SMAC can improve performance within a few function evaluations. It offers several entry points and pre-sets for typical use cases, such as optimizing hyperparameters, solving low dimensional continuous (artificial) global optimization problems and configuring algorithms to perform well across multiple problem [instances][Instances].
+
+## Features
+
+SMAC has the following characteristics and capabilities:
+
+#### Global Optimizer
+[Bayesian Optimization][BayesianOptimization] is used for sample-efficient optimization.
+
+#### Optimize [Black-Box][Black-Box] Functions
+Optimization is only aware of input and output. It is agnostic to internals of the function.
+
+#### Flexible Hyperparameters
+Use categorical, continuous, hierarchical and/or conditional hyperparameters with the well-integrated [ConfigurationSpace](https://automl.github.io/ConfigSpace). SMAC can optimize *up to 100 hyperparameters* efficiently.
+
+#### Any [Objectives][Objective]
+Optimization with any [objective][Objective] (e.g., accuracy, runtime, cross-validation, ...) is possible.
+
+#### [Multi-Objective][Multi-Objective] Optimization
+Optimize arbitrary number of objectives using scalarized multi-objective algorithms. Both ParEGO [[Know06][Know06]] and mean aggregation strategies are supported.
+
+#### [Multi-Fidelity][Multi-Fidelity] Optimization
+Judge configurations on multiple [budgets][Budget] to discard unsuitable configurations early on. This will result in a massive speed-up, depending on the budgets.
+
+#### [Instances][Instances]
+Find well-performing hyperparameter configurations not only for one instance (e.g. dataset) of an algorithm, but for many.
+
+#### Command-Line Interface
+SMAC can not only be executed within a python file but also from the command line. Consequently, not only algorithms in python can be optimized, but implementations in other languages as well.
+
+!!! note
+    Command-line interface has been temporarily disabled in v2.0. Please fall back to v1.4 if you need it.
+
+## Comparison
+
+The following table provides an overview of SMAC's capabilities in comparison with other optimization tools.
+
+| Package      | Complex Hyperparameter Space | [Multi-Objective][Multi-Objective] | [Multi-Fidelity][Multi-Fidelity] | [Instances][Instances] | Command-Line Interface | Parallelism |
+|--------------|------------------------------|----------------------|---------------------|----------------|------------------------|-------------|
+| HyperMapper  | ✅                            | ✅                    | ❌                   | ❌              | ❌                      | ❌           |
+| Optuna       | ✅                            | ✅                    | ✅                   | ❌              | ✅                      | ✅           |
+| Hyperopt     | ✅                            | ❌                    | ❌                   | ❌              | ✅                      | ✅           |
+| BoTorch      | ❌                            | ✅                    | ✅                   | ❌              | ❌                      | ✅           |
+| OpenBox      | ✅                            | ✅                    | ❌                   | ❌              | ❌                      | ✅           |
+| HpBandSter   | ✅                            | ❌                    | ✅                   | ❌              | ❌                      | ✅           |
+| SMAC         | ✅                            | ✅                    | ✅                   | ✅              | ✅                      | ✅           |
diff --git a/docs/2_package_overview.rst b/docs/2_package_overview.rst
deleted file mode 100644
index fee310a3f4..0000000000
--- a/docs/2_package_overview.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-Package Overview 
-================
-
-SMAC supports you in determining well-performing hyperparameter configurations for your algorithms. By being a robust 
-and flexible framework for :term:`Bayesian Optimization<BO>`, SMAC can improve performance within a few function
-evaluations. It offers several entry points and pre-sets for typical use cases, such as optimizing
-hyperparameters, solving low dimensional continuous (artificial) global optimization problems and configuring algorithms 
-to perform well across multiple problem :term:`instances<Instances>`.
-
-
-Features
---------
-
-SMAC has the following characteristics and capabilities:
-
-Global Optimizer
-    :term:`Bayesian Optimization` is used for sample-efficient optimization.
-
-Optimize :term:`Black-Box` Functions
-    Optimization is only aware of input and output. It is agnostic to internals of the function.
-
-Flexible Hyperparameters
-    Use categorical, continuous, hierarchical and/or conditional hyperparameters with the well-integrated
-    `ConfigurationSpace <https://automl.github.io/ConfigSpace>`_. SMAC can optimize *up to 100 hyperparameters*
-    efficiently.
-
-Any Objectives
-    Optimization with any :term:`objective<Objective>` (e.g., accuracy, runtime, cross-validation, ...) is possible.
-
-:ref:`Multi-Objective<Multi-Objective Optimization>`
-    Optimize arbitrary number of objectives using scalarized multi-objective algorithms. Both ParEGO [Know06]_ and
-    mean aggregation strategies are supported.
-
-:ref:`Multi-Fidelity<Multi-Fidelity Optimization>` Optimization
-    Judge configurations on multiple :term:`budgets<Budget>` to discard unsuitable configurations
-    early on. This will result in a massive speed-up, depending on the budgets.
-    
-:ref:`Instances<Optimization across Instances>`
-    Find well-performing hyperparameter configurations not only for one instance (e.g. dataset) of
-    an algorithm, but for many.
-    
-Command-Line Interface
-    SMAC can not only be executed within a python file but also from the commandline. Consequently,
-    not only algorithms in python can be optimized, but implementations in other languages as well.
-
-    .. note ::
-
-        Command-line interface has been temporarily disabled in v2.0. Please fall back to v1.4 if you need it.
-
-
-Comparison
-----------
-
-The following table provides an overview of SMAC's capabilities in comparison with other optimization tools.
-
-.. csv-table::
-    :header: "Package", "Complex Hyperparameter Space", ":term:`Multi-Objective` ", ":term:`Multi-Fidelity`", ":term:`Instances`", "Command-Line Interface", "Parallelism"
-    :widths: 10, 10, 10, 10, 10, 10, 10
-
-    HyperMapper, ✅, ✅, ❌, ❌, ❌, ❌
-    Optuna, ✅, ✅, ✅, ❌, ✅, ✅
-    Hyperopt, ✅, ❌, ❌, ❌, ✅, ✅
-    BoTorch, ❌, ✅, ✅, ❌, ❌, ✅
-    OpenBox, ✅, ✅, ❌, ❌, ❌, ✅
-    HpBandSter, ✅, ❌, ✅, ❌, ❌, ✅
-    SMAC, ✅, ✅, ✅, ✅, ✅, ✅
diff --git a/docs/3_getting_started.md b/docs/3_getting_started.md
new file mode 100644
index 0000000000..3237db587c
--- /dev/null
+++ b/docs/3_getting_started.md
@@ -0,0 +1,135 @@
+[](){#getting_started}
+# Getting Started
+
+SMAC needs four core components (configuration space, target function, scenario and a facade) to run an
+optimization process, all of which are explained on this page.
+
+They interact in the following way:
+
+<figure markdown="span">
+  ![Interaction of SMAC's components](./images/smac_components_interaction.jpg){ width="300" }
+  <figcaption>Interaction of SMAC's components</figcaption>
+</figure>
+
+
+## Configuration Space
+
+The configuration space defines the search space of the hyperparameters and, therefore, the tunable parameters' legal
+ranges and default values.
+
+```python
+from ConfigSpace import ConfigSpace
+
+cs = ConfigurationSpace({
+    "myfloat": (0.1, 1.5),                # Uniform Float
+    "myint": (2, 10),                     # Uniform Integer
+    "species": ["mouse", "cat", "dog"],   # Categorical
+})
+```
+
+Please see the documentation of [ConfigurationSpace](https://automl.github.io/ConfigSpace) for more details.
+
+
+## Target Function
+
+The target function takes a configuration from the configuration space and returns a performance value.
+For example, you could use a Neural Network to predict on your data and get some validation performance.
+If, for instance, you would tune the learning rate of the Network's optimizer, every learning rate will
+change the final validation performance of the network. This is the target function.
+SMAC tries to find the best performing learning rate by trying different values and evaluating the target function -
+in an efficient way.
+
+```python
+    def train(self, config: Configuration, seed: int) -> float:
+        model = MultiLayerPerceptron(learning_rate=config["learning_rate"])
+        model.fit(...)
+        accuracy = model.validate(...)
+
+        return 1 - accuracy  # SMAC always minimizes (the smaller the better)
+```
+
+!!! note
+    In general, the arguments of the target function depend on the intensifier. However,
+    in all cases, the first argument must be the configuration (arbitrary argument name is possible here) and a seed.
+    If you specified instances in the scenario, SMAC requires ``instance`` as argument additionally. If you use
+    ``SuccessiveHalving`` or ``Hyperband`` as intensifier but you did not specify instances, SMAC passes `budget` as
+    argument to the target function. But don't worry: SMAC will tell you if something is missing or if something is not
+    used.
+
+
+!!! warning
+    SMAC *always* minimizes the value returned from the target function.
+
+
+!!! warning
+    SMAC passes either `instance` or `budget` to the target function but never both.
+
+
+## Scenario
+
+The [Scenario][smac.scenario] is used to provide environment variables. For example, 
+if you want to limit the optimization process by a time limit or want to specify where to save the results.
+
+```python
+from smac import Scenario
+
+scenario = Scenario(
+    configspace=cs,
+    output_directory=Path("your_output_directory")
+    walltime_limit=120,  # Limit to two minutes
+    n_trials=500,  # Evaluated max 500 trials
+    n_workers=8,  # Use eight workers
+    ...
+)
+```
+
+
+## Facade
+
+A [facade][smac.facade.abstract_facade] is the entry point to SMAC, which constructs a default optimization 
+pipeline for you. SMAC offers various facades, which satisfy many common use cases and are crucial to
+achieving peak performance. The idea behind the facades is to provide a simple interface to all of SMAC's components,
+which is easy to use and understand and without the need of deep diving into the material. However, experts are
+invited to change the components to their specific hyperparameter optimization needs. The following
+table (horizontally scrollable) shows you what is supported and reveals the default [components][components]:
+
+| | [Black-Box][smac.facade.blackbox_facade] | [Hyperparameter Optimization][smac.facade.hyperparameter_optimization_facade] | [Multi-Fidelity][smac.facade.multi_fidelity_facade] | [Algorithm Configuration][smac.facade.algorithm_configuration_facade] | [Random][smac.facade.random_facade] | [Hyperband][smac.facade.hyperband_facade] |
+| --- | --- | --- | --- | --- | --- | --- |
+| #Parameters | low | low/medium/high | low/medium/high | low/medium/high | low/medium/high | low/medium/high |
+| Supports Instances | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ |
+| Supports Multi-Fidelity | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ |
+| Initial Design | [Sobol][smac.initial_design.sobol_design] | [Sobol][smac.initial_design.sobol_design] | [Random][smac.initial_design.random_design] | [Default][smac.initial_design.default_design] | [Default][smac.initial_design.default_design] | [Default][smac.initial_design.default_design] |
+| Surrogate Model | [Gaussian Process][smac.model.gaussian_process.gaussian_process] | [Random Forest][smac.model.random_forest.random_forest] | [Random Forest][smac.model.random_forest.random_forest] | [Random Forest][smac.model.random_forest.random_forest] | Not used | Not used |
+| Acquisition Function | [Expected Improvement][smac.acquisition.function.expected_improvement] | [Log Expected Improvement][smac.acquisition.function.expected_improvement] | [Log Expected Improvement][smac.acquisition.function.expected_improvement] | [Expected Improvement][smac.acquisition.function.expected_improvement] | Not used | Not used |
+| Acquisition Maximizer | [Local and Sorted Random Search][smac.acquisition.maximizer.local_and_random_search] | [Local and Sorted Random Search][smac.acquisition.maximizer.local_and_random_search] | [Local and Sorted Random Search][smac.acquisition.maximizer.local_and_random_search] | [Local and Sorted Random Search][smac.acquisition.maximizer.local_and_random_search] | [Local and Sorted Random Search][smac.acquisition.maximizer.random_search] | [Local and Sorted Random Search][smac.acquisition.maximizer.random_search] |
+| Intensifier | [Default][smac.intensifier.intensifier] | [Default][smac.intensifier.intensifier] | [Hyperband][smac.intensifier.hyperband] | [Default][smac.intensifier.intensifier] | [Default][smac.intensifier.intensifier] | [Hyperband][smac.intensifier.hyperband] |
+| Runhistory Encoder | [Default][smac.runhistory.encoder.encoder] | [Log][smac.runhistory.encoder.log_encoder] | [Log][smac.runhistory.encoder.log_encoder] | [Default][smac.runhistory.encoder.encoder] | [Default][smac.runhistory.encoder.encoder] | [Default][smac.runhistory.encoder.encoder] |
+| Random Design Probability | 8.5% | 20% | 20% | 50% | Not used | Not used |
+
+
+!!! info
+    The multi-fidelity facade is the closest implementation to [BOHB](https://github.com/automl/HpBandSter).
+
+
+!!! note
+    We want to emphasize that SMAC is a highly modular optimization framework.
+    The facade accepts many arguments to specify components of the pipeline. Please also note, that in contrast
+    to previous versions, instantiated objects are passed instead of *kwargs*.
+
+
+The facades can be imported directly from the ``smac`` module.
+
+```python
+from smac import BlackBoxFacade as BBFacade
+from smac import HyperparameterOptimizationFacade as HPOFacade
+from smac import MultiFidelityFacade as MFFacade
+from smac import AlgorithmConfigurationFacade as ACFacade
+from smac import RandomFacade as RFacade
+from smac import HyperbandFacade as HBFacade
+
+smac = HPOFacade(scenario=scenario, target_function=train)
+smac = MFFacade(scenario=scenario, target_function=train)
+smac = ACFacade(scenario=scenario, target_function=train)
+smac = RFacade(scenario=scenario, target_function=train)
+smac = HBFacade(scenario=scenario, target_function=train)
+```
\ No newline at end of file
diff --git a/docs/3_getting_started.rst b/docs/3_getting_started.rst
deleted file mode 100644
index dbc2873681..0000000000
--- a/docs/3_getting_started.rst
+++ /dev/null
@@ -1,145 +0,0 @@
-Getting Started
-===============
-
-SMAC needs four core components (configuration space, target function, scenario and a facade) to run an
-optimization process, all of which are explained on this page.
-
-They interact in the following way:
-
-.. image:: images/smac_components_interaction.jpg
-  :width: 400
-  :alt: Interaction of SMAC's components
-
-
-Configuration Space
--------------------
-
-The configuration space defines the search space of the hyperparameters and, therefore, the tunable parameters' legal
-ranges and default values.
-
-.. code-block:: python
-    
-    from ConfigSpace import ConfigSpace
-
-    cs = ConfigurationSpace({
-        "myfloat": (0.1, 1.5),                # Uniform Float
-        "myint": (2, 10),                     # Uniform Integer
-        "species": ["mouse", "cat", "dog"],   # Categorical
-    })
-
-Please see the documentation of `ConfigSpace <https://automl.github.io/ConfigSpace/latest/>`_ for more details.
-
-
-Target Function
----------------
-
-The target function takes a configuration from the configuration space and returns a performance value.
-For example, you could use a Neural Network to predict on your data and get some validation performance.
-If, for instance, you would tune the learning rate of the Network's optimizer, every learning rate will
-change the final validation performance of the network. This is the target function.
-SMAC tries to find the best performing learning rate by trying different values and evaluating the target function -
-in an efficient way.
-
-.. code-block:: python
-    
-    def train(self, config: Configuration, seed: int) -> float:
-        model = MultiLayerPerceptron(learning_rate=config["learning_rate"])
-        model.fit(...)
-        accuracy = model.validate(...)
-
-        return 1 - accuracy  # SMAC always minimizes (the smaller the better)
-
-.. warning::
-
-    SMAC *always* minimizes the value returned from the target function.
-
-
-.. note::
-
-    In general, the arguments of the target function depend on the intensifier. However,
-    in all cases, the first argument must be the configuration (arbitrary argument name is possible here) and a seed.
-    If you specified instances in the scenario, SMAC requires ``instance`` as argument additionally. If you use
-    ``SuccessiveHalving`` or ``Hyperband`` as intensifier but you did not specify instances, SMAC passes `budget` as
-    argument to the target function. But don't worry: SMAC will tell you if something is missing or if something is not
-    used.
-
-
-.. warning::
-
-    SMAC passes either `instance` or `budget` to the target function but never both.
-
-
-Scenario
---------
-
-The :ref:`Scenario<smac.scenario>` is used to provide environment variables. For example, 
-if you want to limit the optimization process by a time limit or want to specify where to save the results.
-
-.. code-block:: python
-
-    from smac import Scenario
-
-    scenario = Scenario(
-        configspace=cs,
-        output_directory=Path("your_output_directory")
-        walltime_limit=120,  # Limit to two minutes
-        n_trials=500,  # Evaluated max 500 trials
-        n_workers=8,  # Use eight workers
-        ...
-    )
-
-
-Facade
-------
-
-A :ref:`facade<smac.facade>` is the entry point to SMAC, which constructs a default optimization 
-pipeline for you. SMAC offers various facades, which satisfy many common use cases and are crucial to
-achieving peak performance. The idea behind the facades is to provide a simple interface to all of SMAC's components,
-which is easy to use and understand and without the need of deep diving into the material. However, experts are
-invited to change the components to their specific hyperparameter optimization needs. The following
-table (horizontally scrollable) shows you what is supported and reveals the default :ref:`components<Components>`:
-
-
-.. csv-table::
-    :header: "", ":ref:`Black-Box<smac.facade.blackbox\\_facade>`", ":ref:`Hyperparameter Optimization<smac.facade.hyperparameter\\_optimization\\_facade>`", ":ref:`Multi-Fidelity<smac.facade.multi\\_fidelity\\_facade>`", ":ref:`Algorithm Configuration<smac.facade.algorithm\\_configuration\\_facade>`", ":ref:`Random<smac.facade.random\\_facade>`", ":ref:`Hyperband<smac.facade.hyperband\\_facade>`"
-
-    "#Parameters", "low", "low/medium/high", "low/medium/high", "low/medium/high", "low/medium/high", "low/medium/high"
-    "Supports Instances", "❌", "✅", "✅", "✅", "❌", "✅"
-    "Supports Multi-Fidelity", "❌", "❌", "✅", "✅", "❌", "✅"
-    "Initial Design", ":ref:`Sobol<smac.initial\\_design.sobol\\_design>`", ":ref:`Sobol<smac.initial\\_design.sobol\\_design>`", ":ref:`Random<smac.initial\\_design.random\\_design>`", ":ref:`Default<smac.initial\\_design.default\\_design>`", ":ref:`Default<smac.initial\\_design.default\\_design>`", ":ref:`Default<smac.initial\\_design.default\\_design>`"
-    "Surrogate Model", ":ref:`Gaussian Process<smac.model.gaussian\\_process.gaussian\\_process>`", ":ref:`Random Forest<smac.model.random\\_forest.random\\_forest>`", ":ref:`Random Forest<smac.model.random\\_forest.random\\_forest>`", ":ref:`Random Forest<smac.model.random\\_forest.random\\_forest>`", "Not used", "Not used"
-    "Acquisition Function", ":ref:`Expected Improvement<smac.acquisition.function.expected\\_improvement>`", ":ref:`Log Expected Improvement<smac.acquisition.function.expected\\_improvement>`", ":ref:`Log Expected Improvement<smac.acquisition.function.expected\\_improvement>`", ":ref:`Expected Improvement<smac.acquisition.function.expected\\_improvement>`", "Not used", "Not used"
-    "Acquisition Maximizer", ":ref:`Local and Sorted Random Search<smac.acquisition.maximizer.local\\_and\\_random\\_search>`", ":ref:`Local and Sorted Random Search<smac.acquisition.maximizer.local\\_and\\_random\\_search>`", ":ref:`Local and Sorted Random Search<smac.acquisition.maximizer.local\\_and\\_random\\_search>`", ":ref:`Local and Sorted Random Search<smac.acquisition.maximizer.local\\_and\\_random\\_search>`", ":ref:`Local and Sorted Random Search<smac.acquisition.maximizer.random\\_search>`", ":ref:`Local and Sorted Random Search<smac.acquisition.maximizer.random\\_search>`"
-    "Intensifier", ":ref:`Default<smac.intensifier.intensifier>`", ":ref:`Default<smac.intensifier.intensifier>`", ":ref:`Hyperband<smac.intensifier.hyperband>`", ":ref:`Hyperband<smac.intensifier.hyperband>`", ":ref:`Default<smac.intensifier.intensifier>`", ":ref:`Hyperband<smac.intensifier.hyperband>`",
-    "Runhistory Encoder", ":ref:`Default<smac.runhistory.encoder.encoder>`", ":ref:`Log<smac.runhistory.encoder.log\\_encoder>`", ":ref:`Log<smac.runhistory.encoder.log\\_encoder>`", ":ref:`Default<smac.runhistory.encoder.encoder>`", ":ref:`Default<smac.runhistory.encoder.encoder>`", ":ref:`Default<smac.runhistory.encoder.encoder>`"
-    "Random Design Probability", "8.5%", "20%", "20%", "50%", "Not used", "Not used"
-
-
-.. note::
-
-    The multi-fidelity facade is the closest implementation to `BOHB <https://github.com/automl/HpBandSter>`_.
-
-
-.. note::
-
-    We want to emphasize that SMAC is a highly modular optimization framework.
-    The facade accepts many arguments to specify components of the pipeline. Please also note, that in contrast
-    to previous versions, instantiated objects are passed instead of *kwargs*.
-
-
-The facades can be imported directly from the ``smac`` module.
-
-.. code-block:: python
-
-    from smac import BlackBoxFacade as BBFacade
-    from smac import HyperparameterOptimizationFacade as HPOFacade
-    from smac import MultiFidelityFacade as MFFacade
-    from smac import AlgorithmConfigurationFacade as ACFacade
-    from smac import RandomFacade as RFacade
-    from smac import HyperbandFacade as HBFacade
-
-    smac = HPOFacade(scenario=scenario, target_function=train)
-    smac = MFFacade(scenario=scenario, target_function=train)
-    smac = ACFacade(scenario=scenario, target_function=train)
-    smac = RFacade(scenario=scenario, target_function=train)
-    smac = HBFacade(scenario=scenario, target_function=train)
diff --git a/docs/4_minimal_example.md b/docs/4_minimal_example.md
new file mode 100644
index 0000000000..b8df179a4a
--- /dev/null
+++ b/docs/4_minimal_example.md
@@ -0,0 +1,32 @@
+# Minimal Example
+
+The following code optimizes a support vector machine on the iris dataset.
+
+
+```python
+from ConfigSpace import Configuration, ConfigurationSpace
+
+import numpy as np
+from smac import HyperparameterOptimizationFacade, Scenario
+from sklearn import datasets
+from sklearn.svm import SVC
+from sklearn.model_selection import cross_val_score
+
+iris = datasets.load_iris()
+
+
+def train(config: Configuration, seed: int = 0) -> float:
+    classifier = SVC(C=config["C"], random_state=seed)
+    scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
+    return 1 - np.mean(scores)
+
+
+configspace = ConfigurationSpace({"C": (0.100, 1000.0)})
+
+# Scenario object specifying the optimization environment
+scenario = Scenario(configspace, deterministic=True, n_trials=200)
+
+# Use SMAC to find the best configuration/hyperparameters
+smac = HyperparameterOptimizationFacade(scenario, train)
+incumbent = smac.optimize()
+```
\ No newline at end of file
diff --git a/docs/4_minimal_example.rst b/docs/4_minimal_example.rst
deleted file mode 100644
index c4eb2eb9a4..0000000000
--- a/docs/4_minimal_example.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-Minimal Example
-===============
-
-The following code optimizes a support vector machine on the iris dataset.
-
-
-.. code-block:: python
-
-    from ConfigSpace import Configuration, ConfigurationSpace
-
-    import numpy as np
-    from smac import HyperparameterOptimizationFacade, Scenario
-    from sklearn import datasets
-    from sklearn.svm import SVC
-    from sklearn.model_selection import cross_val_score
-
-    iris = datasets.load_iris()
-
-
-    def train(config: Configuration, seed: int = 0) -> float:
-        classifier = SVC(C=config["C"], random_state=seed)
-        scores = cross_val_score(classifier, iris.data, iris.target, cv=5)
-        return 1 - np.mean(scores)
-
-
-    configspace = ConfigurationSpace({"C": (0.100, 1000.0)})
-
-    # Scenario object specifying the optimization environment
-    scenario = Scenario(configspace, deterministic=True, n_trials=200)
-
-    # Use SMAC to find the best configuration/hyperparameters
-    smac = HyperparameterOptimizationFacade(scenario, train)
-    incumbent = smac.optimize()
\ No newline at end of file
diff --git a/docs/6_references.rst b/docs/6_references.md
similarity index 67%
rename from docs/6_references.rst
rename to docs/6_references.md
index f75d7048bb..d64e0be761 100644
--- a/docs/6_references.rst
+++ b/docs/6_references.md
@@ -1,22 +1,20 @@
-References
-==========
+# References
 
-
-.. [LJDR18] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar; 
+* [](){#LJDR18}[LJDR18] L. Li, K. Jamieson, G. DeSalvo, A. Rostamizadeh, A. Talwalkar; 
     Hyperband: A Novel Bandit-Based Approach to Hyperparameter Optimization; 
     https://jmlr.org/papers/v18/16-558.html
 
 
-.. [HSSL22] Carl Hvarfner, Danny Stoll, Artur Souza, Marius Lindauer, Frank Hutter, Luigi Nardi; 
+* [](){#HSSL22}[HSSL22] Carl Hvarfner, Danny Stoll, Artur Souza, Marius Lindauer, Frank Hutter, Luigi Nardi; 
     πBO: Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization; 
     https://arxiv.org/pdf/2204.11051.pdf
 
 
-.. [Know06] J. Knowles; 
+* [](){#Know06}[Know06] J. Knowles; 
     ParEGO: A Hybrid Algorithm with on-Line Landscape Approximation for Expensive Multiobjective Optimization Problems; 
     https://www.semanticscholar.org/paper/ParEGO%3A-a-hybrid-algorithm-with-on-line-landscape-Knowles/73b5b196b35fb23e1f908d73b787c2c2942fadb5
 
 
-.. [SKKS10] N. Srinivas, S. M. Kakade, A. Krause, M. Seeger; 
+* [](){#SKKS10}[SKKS10] N. Srinivas, S. M. Kakade, A. Krause, M. Seeger; 
     Gaussian Process Optimization in the Bandit Setting: No Regret and Experimental Design; 
     https://arxiv.org/pdf/0912.3995.pdf
\ No newline at end of file
diff --git a/docs/7_glossary.md b/docs/7_glossary.md
new file mode 100644
index 0000000000..c305a55964
--- /dev/null
+++ b/docs/7_glossary.md
@@ -0,0 +1,29 @@
+# Glossary
+
+- [](){#BB}**BB**: See `Black-Box`.
+- [](){#BO}**BO**: See `Bayesian Optimization`.
+- [](){#BOHB}**BOHB**: [Bayesian optimization and Hyperband](https://arxiv.org/abs/1807.01774).
+- [](){#CLI}**CLI**: Command-Line Interface.
+- [](){#CV}**CV**: Cross-Validation.
+- [](){#GP}**GP**: Gaussian Process.
+- [](){#GP-MCMC}**GP-MCMC**: Gaussian Process with Markov-Chain Monte-Carlo.
+- [](){#HB}**HB**: See `Hyperband`.
+- [](){#HP}**HP**: Hyperparameter.
+- [](){#MF}**MF**: See `Multi-Fidelity`.
+- [](){#RF}**RF**: Random Forest.
+- [](){#ROAR}**ROAR**: See `Random Online Adaptive Racing`.
+- [](){#SMAC}**SMAC**: Sequential Model-Based Algorithm Configuration.
+- [](){#SMBO}**SMBO**: Sequential Mode-Based Optimization.
+- [](){#BayesianOptimization}**Bayesian Optimization**: Bayesian optimization is a sequential design strategy for global optimization of black-box functions that does not assume any functional forms. It is usually employed to optimize expensive-to-evaluate functions. A Bayesian optimization weights exploration and exploitation to find the minimum of its objective.
+- [](){#Black-Box}**Black-Box**: Refers to an algorithm being optimized, where only input and output are observable.
+- [](){#Budget}**Budget**: Budget is another word for fidelity. Examples are the number of training epochs or the size of the data subset the algorithm is trained on. However, budget can also be used in the context of instances. For example, if you have 100 instances (let's say we optimize across datasets) and you want to run your algorithm on 10 of them, then the budget is 10.
+- [](){#Hyperband}**Hyperband**: [Hyperband](https://arxiv.org/abs/1603.06560). A novel bandit-based algorithm for hyperparameter optimization. Hyperband is an extension of successive halving and therefore works with multi-fidelities.
+- [](){#Incumbent}**Incumbent**: The incumbent is the current best known configuration.
+- [](){#Instances}**Instances**: Often you want to optimize across different datasets, subsets, or even different transformations (e.g. augmentation). In general, each of these is called an instance. Configurations are evaluated on multiple instances so that a configuration is found which performs superior on all instances instead of only a few.
+- [](){#Intensification}**Intensification**: A mechanism that governs how many evaluations to perform with each configuration and when to trust a configuration enough to make it the new current best known configuration (the incumbent).
+- [](){#Multi-Fidelity}**Multi-Fidelity**: Multi-fidelity refers to running an algorithm on multiple budgets (such as number of epochs or subsets of data) and thereby evaluating the performance prematurely.
+- [](){#Multi-Objective}**Multi-Objective**: A multi-objective optimization problem is a problem with more than one objective. The goal is to find a solution that is optimal or at least a good compromise in all objectives.
+- [](){#Objective}**Objective**: An objective is a metric to evaluate the quality or performance of an algorithm.
+- [](){#Random Online Adaptive Racing}**Random Online Adaptive Racing**: Random Online Adaptive Racing. A simple model-free instantiation of the general `SMBO` framework. It selects configurations uniformly at random and iteratively compares them against the current incumbent using the intensification mechanism. See [SMAC extended](https://ai.dmi.unibas.ch/research/reading_group/hutter-et-al-tr2010.pdf) chapter 3.2 for details.
+- [](){#Target Function}**Target Function**: Your model, which returns a cost based on the given config, seed, budget, and/or instance.
+- [](){#Trial}**Trial**: Trial is a single run of a target function on a combination of configuration, seed, budget and/or instance.
diff --git a/docs/7_glossary.rst b/docs/7_glossary.rst
deleted file mode 100644
index 6e60e70957..0000000000
--- a/docs/7_glossary.rst
+++ /dev/null
@@ -1,101 +0,0 @@
-Glossary
-========
-
-.. glossary::
-
-    BB
-        See :term:`Black-Box`.
-
-    BO
-        See :term:`Bayesian Optimization`.
-
-    BOHB
-        `Bayesian optimization and Hyperband <https://arxiv.org/abs/1807.01774>`_.
-
-    CLI
-        Command-Line Interface.
-
-    CV
-        Cross-Validation.
-
-    GP
-        Gaussian Process.
-
-    GP-MCMC
-        Gaussian Process with Markov-Chain Monte-Carlo.
-
-    HB
-        See :term:`Hyperband`.
-
-    HP
-        Hyperparameter.
-
-    MF
-        See :term:`Multi-Fidelity`.
-
-    RF
-        Random Forest.
-
-    ROAR
-        See :term:`Random Online Adaptive Racing`.
-
-    SMAC
-        Sequential Model-Based Algorithm Configuration.
-
-    SMBO
-        Sequential Mode-Based Optimization.
-
-    Bayesian Optimization
-        Bayesian optimization is a sequential design strategy for global optimization of black-box functions that does 
-        not assume any functional forms. It is usually employed to optimize expensive-to-evaluate functions.
-        A Bayesian optimization weights exploration and exploitation to find the minimum of its objective.
-
-    Black-Box
-        Refers to an algorithm being optimized, where only input and output are observable.
-
-    Budget
-        Budget is another word for fidelity. Examples are the number of training epochs or the size of
-        the data subset the algorithm is trained on. However, budget can also be used in the context of
-        instances. For example, if you have 100 instances (let's say we optimize across datasets) and you want to run
-        your algorithm on 10 of them, then the budget is 10.
-
-    Hyperband
-        `Hyperband <https://arxiv.org/abs/1603.06560>`_. A novel bandit-based algorithm for hyperparameter
-        optimization. Hyperband is an extension of successive halving and therefore works with
-        multi-fidelities.
-
-    Incumbent
-        The incumbent is the current best known configuration.
-
-    Instances
-        Often you want to optimize across different datasets, subsets, or even different transformations (e.g.
-        augmentation). In general, each of these is called an instance. Configurations are evaluated on multiple
-        instances so that a configuration is found which performs superior on all instances instead of only
-        a few.
-
-    Intensification
-        A mechanism that governs how many evaluations to perform with each configuration and when to trust a
-        configuration enough to make it the new current best known configuration (the incumbent).
-
-    Multi-Fidelity
-        Multi-fidelity refers to running an algorithm on multiple budgets (such as number of epochs or
-        subsets of data) and thereby evaluating the performance prematurely.
-
-    Multi-Objective
-        A multi-objective optimization problem is a problem with more than one objective.
-        The goal is to find a solution that is optimal or at least a good compromise in all objectives.
-
-    Objective
-        An objective is a metric to evaluate the quality or performance of an algorithm.
-
-    Random Online Adaptive Racing
-        Random Online Adaptive Racing. A simple model-free instantiation of the general :term:`SMBO` framework.
-        It selects configurations uniformly at random and iteratively compares them against the current incumbent
-        using the intensification mechanism. See `SMAC extended <https://ai.dmi.unibas.ch/research/reading_group/hutter-et-al-tr2010.pdf>`_
-        chapter 3.2 for details.
-
-    Target Function
-        Your model, which returns a cost based on the given config, seed, budget, and/or instance.
-
-    Trial
-        Trial is a single run of a target function on a combination of configuration, seed, budget and/or instance.
diff --git a/docs/8_faq.rst b/docs/8_faq.md
similarity index 61%
rename from docs/8_faq.rst
rename to docs/8_faq.md
index 490d662727..250b3482e5 100644
--- a/docs/8_faq.rst
+++ b/docs/8_faq.md
@@ -1,30 +1,27 @@
-F.A.Q.
-======
+# F.A.Q.
 
-
-Should I use SMAC2 or SMAC3?
-  SMAC3 is a reimplementation of the original SMAC tool (`Sequential Model-Based Optimization for
-  General Algorithm Configuration <https://ml.informatik.uni-freiburg.de/wp-content/uploads/papers/11-LION5-SMAC.pdf>`_, Hutter et al., 2021). However, the reimplementation slightly differs from the original
+#### Should I use SMAC2 or SMAC3?
+  SMAC3 is a reimplementation of the original SMAC tool ([Sequential Model-Based Optimization for General Algorithm Configuration](https://ml.informatik.uni-freiburg.de/wp-content/uploads/papers/11-LION5-SMAC.pdf), Hutter et al., 2021). However, the reimplementation slightly differs from the original
   SMAC. For comparisons against the original SMAC, we refer to a stable release of SMAC (v2) in Java
-  which can be found `here <http://www.cs.ubc.ca/labs/beta/Projects/SMAC/>`_.
+  which can be found [here](https://www.cs.ubc.ca/labs/algorithms/Projects/SMAC/).
   Since SMAC3 is actively maintained, we recommend to use SMAC3 for any AutoML applications.
 
 
-SMAC cannot be imported.
+#### SMAC cannot be imported.
   Try to either run SMAC from SMAC's root directory or try to run the installation first.
 
 
-pyrfr raises cryptic import errors.
+#### pyrfr raises cryptic import errors.
   Ensure that the gcc used to compile the pyrfr is the same as used for linking
   during execution. This often happens with Anaconda. See
-  :ref:`Installation <installation>` for a solution.
+  [Installation](1_installation.md) for a solution.
 
 
-How can I use :term:`BOHB` and/or `HpBandSter <https://github.com/automl/HpBandSter>`_ with SMAC?
-  The facade MultiFidelityFacade is the closest implementation to :term:`BOHB` and/or `HpBandSter <https://github.com/automl/HpBandSter>`_.
+#### How can I use :term:`BOHB` and/or [HpBandSter](https://github.com/automl/HpBandSter) with SMAC?
+  The facade MultiFidelityFacade is the closest implementation to :term:`BOHB` and/or [HpBandSter](https://github.com/automl/HpBandSter).
 
 
-I discovered a bug or SMAC does not behave as expected. Where should I report to?
+#### I discovered a bug or SMAC does not behave as expected. Where should I report to?
   Open an issue in our issue list on GitHub. Before you report a bug, please make sure that:
 
   * Your bug hasn't already been reported in our issue tracker.
@@ -38,13 +35,13 @@ I discovered a bug or SMAC does not behave as expected. Where should I report to
   * Feel free to add a screenshot showing the issue.
 
 
-I want to contribute code or discuss a new idea. Where should I report to?
-  SMAC uses the `GitHub issue-tracker <https://github.com/automl/SMAC3/issues>`_ to also take care
+#### I want to contribute code or discuss a new idea. Where should I report to?
+  SMAC uses the [GitHub issue-tracker](https://github.com/automl/SMAC3/issues) to also take care
   of questions and feedback and is the preferred location for discussing new features and ongoing work. Please also have a look at our
-  `contribution guide <https://github.com/automl/SMAC3/blob/main/CONTRIBUTING.md>`_.
+  [contribution guide](https://github.com/automl/SMAC3/blob/main/CONTRIBUTING.md).
 
 
-What is the meaning of *deterministic*?
+#### What is the meaning of *deterministic*?
   If the ``deterministic`` flag is set to `False` the target function is assumed to be non-deterministic.
   To evaluate a configuration of a non-deterministic algorithm, multiple runs with different seeds will be evaluated
   to determine the performance of that configuration on one instance.
@@ -53,6 +50,6 @@ What is the meaning of *deterministic*?
   target function.
 
 
-Why does SMAC not run on Colab/Mac and crashes with the error "Child process not yet created"?
+#### Why does SMAC not run on Colab/Mac and crashes with the error "Child process not yet created"?
   SMAC uses pynisher to enforce time and memory limits on the target function runner. However, pynisher may not always
   work on specific setups. To overcome this error, it is recommended to remove limitations to make SMAC run.
diff --git a/docs/9_license.rst b/docs/9_license.md
similarity index 59%
rename from docs/9_license.rst
rename to docs/9_license.md
index af0142ed40..a1f2f4ca70 100644
--- a/docs/9_license.rst
+++ b/docs/9_license.md
@@ -1,9 +1,8 @@
-License
-=======
+# License
 
 This program is free software: you can redistribute it and/or modify it under the terms of the 3-clause BSD license
-(please see the LICENSE file).
+(please see the [LICENSE](https://github.com/automl/SMAC3/blob/main/LICENSE.txt) file).
 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 You should have received a copy of the 3-clause BSD license along with this program
-(see LICENSE file). If not, see `<https://opensource.org/licenses/BSD-3-Clause>`_.
\ No newline at end of file
+(see [LICENSE](https://github.com/automl/SMAC3/blob/main/LICENSE.txt) file). If not, see [BSD-3-Clause license](https://opensource.org/license/BSD-3-Clause).
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d0d8f6e2ba..0000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-SPHINXBUILD = sphinx-build
-BUILDDIR = build
-SPHINXOPTS =
-ALLSPHINXOPTS = $(SPHINXOPTS) .
-
-.PHONY: clean buildapi linkcheck html docs html-noexamples
-
-clean:
-	rm -rf $(BUILDDIR)/*
-	rm -rf api
-	rm -rf examples
-
-linkcheck:
-	SPHINX_GALLERY_PLOT=False $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-html:
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-html-noexamples:
-	SPHINX_GALLERY_PLOT=False $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(SOURCEDIR) $(BUILDDIR)/html
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
-
-docs: html linkcheck
-
diff --git a/docs/advanced_usage/10_continue.rst b/docs/advanced_usage/10_continue.md
similarity index 91%
rename from docs/advanced_usage/10_continue.rst
rename to docs/advanced_usage/10_continue.md
index e29c3f9c96..b58b91c8f7 100644
--- a/docs/advanced_usage/10_continue.rst
+++ b/docs/advanced_usage/10_continue.md
@@ -1,5 +1,4 @@
-Continue
-========
+# Continue
 
 SMAC can automatically restore states where it left off if a run was interrupted or prematurely finished. To do so, 
 it reads in old files (derived from scenario's name, output_directory and seed) and obtains the scenario information
@@ -20,4 +19,4 @@ The behavior can be controlled by setting the parameter ``overwrite`` in the fac
     and the old run is not affected.
 
 
-Please have a look at our :ref:`continue example<Continue an Optimization>`.
\ No newline at end of file
+Please have a look at our [continue example](../examples/1%20Basics/5_continue.html).
\ No newline at end of file
diff --git a/docs/advanced_usage/11_reproducibility.rst b/docs/advanced_usage/11_reproducibility.md
similarity index 76%
rename from docs/advanced_usage/11_reproducibility.rst
rename to docs/advanced_usage/11_reproducibility.md
index 2f9f542f30..24cd4f4be7 100644
--- a/docs/advanced_usage/11_reproducibility.rst
+++ b/docs/advanced_usage/11_reproducibility.md
@@ -1,4 +1,3 @@
-Reproducibility
-===============
+# Reproducibility
 
 Reproducibility can only be ensured if one worker is used and no time (wallclock or CPU time) is involved.
\ No newline at end of file
diff --git a/docs/advanced_usage/12_optimizations.rst b/docs/advanced_usage/12_optimizations.md
similarity index 94%
rename from docs/advanced_usage/12_optimizations.rst
rename to docs/advanced_usage/12_optimizations.md
index 6e8d2e82d8..9e9382d2ab 100644
--- a/docs/advanced_usage/12_optimizations.rst
+++ b/docs/advanced_usage/12_optimizations.md
@@ -1,5 +1,4 @@
-Optimizations
-=============
+# Optimizations
 
 SMAC might run faster or slower depending on the user specifications. In general it
 applies that the more you know about the underlying target function, the better you can optimize the optimization
@@ -17,4 +16,4 @@ The following list might help you to make the optimization process more efficien
 
   - High target function evaluation times: As many ``n_workers`` as cores.
   - Low target function evaluation times: Only one worker because the communication might take longer than evaluating
-    on a single thread.
\ No newline at end of file
+    on a single thread.   
\ No newline at end of file
diff --git a/docs/advanced_usage/1_components.rst b/docs/advanced_usage/1_components.md
similarity index 70%
rename from docs/advanced_usage/1_components.rst
rename to docs/advanced_usage/1_components.md
index 9b3235e7ed..6b6c5aa67b 100644
--- a/docs/advanced_usage/1_components.rst
+++ b/docs/advanced_usage/1_components.md
@@ -1,22 +1,21 @@
-Components
-==========
+# Components
+[](){#components}
 
-In addition to the basic components mentioned in :ref:`Getting Started`, all other components are
+In addition to the basic components mentioned in [Getting Started][getting_started], all other components are
 explained in the following paragraphs to give a better picture of SMAC. These components are all used to guide
 the optimization process and simple changes can influence the results drastically.
 
 Before diving into the components, we shortly want to explain the main Bayesian optimization loop in SMAC.
-The :term:`SMBO` receives all instantiated components from the facade and the logic happens here.
+The [SMBO][SMBO] receives all instantiated components from the facade and the logic happens here.
 In general, a while loop is used to ask for the next trial, submit it to the runner, and wait for the runner to 
-finish the evaluation. Since the runner and the :ref:`SMBO<smac.main.smbo>`
+finish the evaluation. Since the runner and the [`SMBO`][smac.main.smbo]
 object are decoupled, the while loop continues and asks for even 
 more trials (e.g., in case of multi-threading), which also can be submitted to the runner. If all workers are
 occupied, SMAC will wait until a new worker is available again. Moreover, limitations like wallclock time and remaining 
 trials are checked in every iteration.
 
 
-:ref:`Surrogate Model<smac.model.abstract\\_model>`
----------------------------------------------------
+## [Surrogate Model][smac.facade.abstract_facade]
 
 The surrogate model is used to approximate the objective function of configurations. In previous versions, the model was 
 referred to as the Empirical Performance Model (EPM). Mostly, Bayesian optimization is used/associated with Gaussian
@@ -25,49 +24,45 @@ higher dimensional and complex spaces.
 
 The data used to train the surrogate model is collected by the runhistory encoder (receives data from the runhistory 
 and transforms it). If budgets are
-involved, the highest budget which satisfies ``min_trials`` (defaults to 1) in :ref:`smac.main.config_selector` is
+involved, the highest budget which satisfies ``min_trials`` (defaults to 1) in [smac.main.config_selector][smac.main.config_selector] is
 used. If no budgets are used, all observations are used.
 
 If you are using instances, it is recommended to use instance features. The model is trained on each instance 
 associated with its features. Imagine you have two hyperparameters, two instances and no instance features, the model 
 would be trained on:
 
-.. csv-table::
-    :header: "HP 1", "HP 2", "Objective Value"
-
-    "0.1", "0.8", "0.5"
-    "0.1", "0.8", "0.75"
-    "505", "7", "2.4"
-    "505", "7", "1.3"
+| HP 1  | HP 2  | Objective Value |
+|-------|-------|-----------------|
+| 0.1   | 0.8   | 0.5             |
+| 0.1   | 0.8   | 0.75            |
+| 505   | 7     | 2.4             |
+| 505   | 7     | 1.3             |
 
 You can see that the same inputs lead to different objective values because of two instances. If you associate
 each instance with a feature, you would end-up with the following data points:
 
-.. csv-table::
-    :header: "HP 1", "HP 2", "Instance Feature", "Objective Value"
-
-    "0.1", "0.8", "0", "0.5"
-    "0.1", "0.8", "1", "0.75"
-    "505", "7", "0", "2.4"
-    "505", "7", "1", "1.3"
+| HP 1  | HP 2  | Instance Feature | Objective Value |
+|-------|-------|------------------|-----------------|
+| 0.1   | 0.8   | 0                | 0.5             |
+| 0.1   | 0.8   | 1                | 0.75            |
+| 505   | 7     | 0                | 2.4             |
+| 505   | 7     | 1                | 1.3             |
 
 
 The steps to receiving data are as follows:
 
-#. The intensifier requests new configurations via ``next(self.config_generator)``.
-#. The config selector collects the data via the runhistory encoder which iterates over the runhistory trials.
-#. The runhistory encoder only collects trials which are in ``considered_states`` and timeout trials. Also, only the
+* The intensifier requests new configurations via ``next(self.config_generator)``.
+* The config selector collects the data via the runhistory encoder which iterates over the runhistory trials.
+* The runhistory encoder only collects trials which are in ``considered_states`` and timeout trials. Also, only the
    highest budget is considered if budgets are used. In this step, multi-objective values are scalarized using the
    ``normalize_costs`` function (uses ``objective_bounds`` from the runhistory) and the multi-objective algorithm.
    For example, when ParEGO is used, the scalarization would be different in each training.
-#. The selected trial objectives are transformed (e.g., log-transformed, depending on the selected
+* The selected trial objectives are transformed (e.g., log-transformed, depending on the selected
    encoder).
-#. The hyperparameters might still have inactive values. The model takes care of that after the collected data
+* The hyperparameters might still have inactive values. The model takes care of that after the collected data
    are passed to the model.
 
-
-:ref:`Acquisition Function<smac.acquisition.function>`
-------------------------------------------------------
+## [Acquisition Function][smac.acquisition.function.abstract_acquisition_function]
 
 Acquisition functions are mathematical techniques that guide how the parameter space should be explored during Bayesian 
 optimization. They use the predicted mean and predicted variance generated by the surrogate model. 
@@ -77,32 +72,28 @@ a bunch of different acquisition functions (Lower Confidence Bound, Expected Imp
 Thompson, integrated acquisition functions and prior acquisition functions). We refer to literature 
 for more information about acquisition functions.
 
-.. note ::
-
+!!! note
     The acquisition function calculates the acquisition value for each configuration. However, the configurations
     are provided by the acquisition maximizer. Therefore, the acquisition maximizer is responsible for receiving
     the next configurations.
 
 
-:ref:`Acquisition Maximizer<smac.acquisition.maximizer>`
--------------------------------------------------------
+## [Acquisition Maximize][smac.acquisition.maximizer.abstract_acquisition_maximizer]
 
 The acquisition maximizer is a wrapper for the acquisition function. It returns the next configurations. SMAC
 supports local search, (sorted) random search, local and (sorted) random search, and differential evolution.
 While local search checks neighbours of the best configurations, random search makes sure to explore the configuration
 space. When using sorted random search, random configurations are sorted by the value of the acquisition function.
 
-.. warning ::
-
+!!! warning
     Pay attention to the number of challengers: If you experience RAM issues or long computational times in the
     acquisition function, you might lower the number of challengers.
 
-The acquisition maximizer also incorporates the `Random Design`_. Please see the
-:ref:`ChallengerList<smac.acquisition.maximizer.helpers>` for more information.
+The acquisition maximizer also incorporates the [Random Design][random-design]. Please see the
+[ChallengerList][smac.acquisition.maximizer.helpers] for more information.
 
 
-:ref:`Initial Design<smac.initial\\_design>`
-------------------------------------------
+## [Initial Design][smac.initial_design.abstract_initial_design]
 
 The surrogate model needs data to be trained. Therefore, the initial design is used to generate the initial data points.
 We provide random, latin hypercube, sobol, factorial and default initial designs. The default initial design uses
@@ -114,10 +105,8 @@ a multidimensional distribution.
 The initial design configurations are yielded by the config selector first. Moreover, the config selector keeps
 track of which configurations already have been returned to make sure a configuration is not returned twice.
 
-.. _Random Design:
-
-:ref:`Random Design<smac.random\\_design>`
-------------------------------------------
+[](){#random-design}
+## [Random Design][smac.initial_design.random_design]
 
 The random design is used in the acquisition maximizer to tell whether the next configuration should be
 random or sampled from the acquisition function. For example, if we use a random design with a probability of 
@@ -129,15 +118,13 @@ are *guaranteed* to find the best configuration over time.
 In addition to simple probability random design, we also provide annealing and modulus random design.
 
 
-:ref:`Intensifier<smac.intensifier>`
-------------------------------------
+## [Intensifier][smac.intensifier.abstract_intensifier]
 
 The intensifier compares different configurations based on evaluated :term:`trial<Trial>` so far. It decides
 which configuration should be `intensified` or, in other words, if a configuration is worth to spend more time on (e.g.,
 evaluate another seed pair, evaluate on another instance, or evaluate on a higher budget).
 
-.. warning ::
-
+!!! warning
     Always pay attention to ``max_config_calls`` or ``n_seeds``: If this argument is set high, the intensifier might 
     spend a lot of time on a single configuration.
 
@@ -157,8 +144,7 @@ All intensifiers support multi-objective, multi-fidelity, and multi-threading:
   repeated as often as needed. Intensifier are not required to receive results as the results are directly taken from
   the runhistory.
 
-.. note ::
-
+!!! note
     All intensifiers are working on the runhistory and recognize previous logged trials (e.g., if the user already
     evaluated something beforehand). Previous configurations (in the best case, also complete trials) are added to the 
     queue/tracker again so that they are integrated into the intensification process.
@@ -166,8 +152,7 @@ All intensifiers support multi-objective, multi-fidelity, and multi-threading:
     That means continuing a run as well as incorporating user inputs are natively supported.
 
 
-:ref:`Configuration Selector<smac.main.config\\_selector>`
-----------------------------------------------------------
+## [Configuration Selector][smac.main.config_selector]
 
 The configuration selector uses the initial design, surrogate model, acquisition maximizer/function, runhistory,
 runhistory encoder, and random design to select the next configuration. The configuration selector is directly
@@ -175,113 +160,104 @@ used by the intensifier and is called everytime a new configuration is requested
 
 The idea behind the configuration selector is straight forward:
 
-#. Yield the initial design configurations.
-#. Train the surrogate model with the data from the runhistory encoder.
-#. Get the next ``retrain_after`` configurations from the acquisition function/maximizer and yield them.
-#. After all ``retrain_after`` configurations were yield, go back to step 2.
-
-.. note ::
+* Yield the initial design configurations.
+* Train the surrogate model with the data from the runhistory encoder.
+* Get the next ``retrain_after`` configurations from the acquisition function/maximizer and yield them.
+* After all ``retrain_after`` configurations were yield, go back to step 2.
 
+!!! note
     The configuration selector is a generator and yields configurations. Therefore, the current state of the 
     selector is saved and when the intensifier calls ``next``, the selector continues there where it stopped.
 
-.. note ::
-
+!!! note
     Everytime the surrogate model is trained, the multi-objective algorithm is updated via 
     ``update_on_iteration_start``.
 
 
-:ref:`Multi-Objective Algorithm<smac.multi\\_objective>`
---------------------------------------------------------
+## [Multi-Objective Algorithm][smac.multi_objective.abstract_multi_objective_algorithm]
 
 The multi-objective algorithm is used to scalarize multi-objective values. The multi-objective algorithm 
 gets normalized objective values passed and returns a single value. The resulting value (called by the 
 runhistory encoder) is then used to train the surrogate model.
 
-.. warning ::
-
+!!! warning
     Depending on the multi-objective algorithm, the values for the runhistory encoder might differ each time 
     the surrogate model is trained. Let's take ParEGO for example:
     Everytime a new configuration is sampled (see ConfigSelector), the objective weights are updated. Therefore,
     the scalarized values are different and the acquisition maximizer might return completely different configurations.
 
 
-:ref:`RunHistory<smac.runhistory.runhistory>`
----------------------------------------------
+## [RunHistory][smac.runhistory.runhistory]
 
 The runhistory holds all (un-)evaluated trials of the optimization run. You can use the runhistory to 
 get (running) configs, (running) trials, trials of a specific config, and more.
 The runhistory encoder iterates over the runhistory to receive data for the surrogate model. The following 
 code shows how to iterate over the runhistory:
 
-.. code-block:: python
-
-    smac = HPOFacade(...)
-
-    # Iterate over all trials
-    for trial_info, trial_value in smac.runhistory.items():
-        # Trial info
-        config = trial_info.config
-        instance = trial_info.instance
-        budget = trial_info.budget
-        seed = trial_info.seed
-
-        # Trial value
-        cost = trial_value.cost
-        time = trial_value.time
-        status = trial_value.status
-        starttime = trial_value.starttime
-        endtime = trial_value.endtime
-        additional_info = trial_value.additional_info
-
-    # Iterate over all configs
-    for config in smac.runhistory.get_configs():
-        # Get the cost of all trials of this config
-        average_cost = smac.runhistory.average_cost(config)
-
-.. warning ::
-
+```python
+smac = HPOFacade(...)
+
+# Iterate over all trials
+for trial_info, trial_value in smac.runhistory.items():
+    # Trial info
+    config = trial_info.config
+    instance = trial_info.instance
+    budget = trial_info.budget
+    seed = trial_info.seed
+
+    # Trial value
+    cost = trial_value.cost
+    time = trial_value.time
+    status = trial_value.status
+    starttime = trial_value.starttime
+    endtime = trial_value.endtime
+    additional_info = trial_value.additional_info
+
+# Iterate over all configs
+for config in smac.runhistory.get_configs():
+    # Get the cost of all trials of this config
+    average_cost = smac.runhistory.average_cost(config)
+```
+
+!!! warning
     The intensifier uses a callback to update the incumbent everytime a new trial is added to the runhistory.
 
 
-:ref:`RunHistory Encoder<smac.runhistory.encoder>`
---------------------------------------------------
-
+## [RunHistory Encoder][smac.runhistory.encoder.abstract_encoder]
 The runhistory encoder is used to encode the runhistory data into a format that can be used by the surrogate model.
 Only trials with the status ``considered_states`` and timeout trials are considered. Multi-objective values are 
 scalarized using the ``normalize_costs`` function (uses ``objective_bounds`` from the runhistory). Afterwards, the 
 normalized value is processed by the multi-objective algorithm. 
 
 
-:ref:`Callback<smac.callback>`
-------------------------------
+## [Callback][smac.callback.callback]
 
 Callbacks provide the ability to easily execute code before, inside, and after the Bayesian optimization loop.
 To add a callback, you have to inherit from ``smac.Callback`` and overwrite the methods (if needed).
 Afterwards, you can pass the callbacks to any facade. 
 
-.. code-block:: python
-
-    from smac import MultiFidelityFacade, Callback
+```python
+from smac import MultiFidelityFacade, Callback
 
 
-    class CustomCallback(Callback):
-        def on_start(self, smbo: SMBO) -> None:
-            pass
+class CustomCallback(Callback):
+    def on_start(self, smbo: SMBO) -> None:
+        pass
 
-        def on_end(self, smbo: SMBO) -> None:
-            pass
+    def on_end(self, smbo: SMBO) -> None:
+        pass
 
-        def on_iteration_start(self, smbo: SMBO) -> None:
-            pass
+    def on_iteration_start(self, smbo: SMBO) -> None:
+        pass
 
-        def on_iteration_end(self, smbo: SMBO, info: RunInfo, value: RunValue) -> bool | None:
-            # We just do a simple printing here
-            print(info, value)
+    def on_iteration_end(self, smbo: SMBO, info: RunInfo, value: RunValue) -> bool | None:
+        # We just do a simple printing here
+        print(info, value)
 
 
-    smac = MultiFidelityFacade(
-        ...
-        callbacks=[CustomCallback()]
-    )
-    smac.optimize()
\ No newline at end of file
+smac = MultiFidelityFacade(
+    ...
+    callbacks=[CustomCallback()]
+)
+smac.optimize()
+```
\ No newline at end of file
diff --git a/docs/advanced_usage/2_multi_fidelity.rst b/docs/advanced_usage/2_multi_fidelity.md
similarity index 66%
rename from docs/advanced_usage/2_multi_fidelity.rst
rename to docs/advanced_usage/2_multi_fidelity.md
index 4bcf11c64f..d3c62c3ab2 100644
--- a/docs/advanced_usage/2_multi_fidelity.rst
+++ b/docs/advanced_usage/2_multi_fidelity.md
@@ -1,11 +1,10 @@
-Multi-Fidelity Optimization
-===========================
+# Multi-Fidelity Optimization
 
 Multi-fidelity refers to running an algorithm on multiple budgets (such as number of epochs or
 subsets of data) and thereby evaluating the performance prematurely. You can run a multi-fidelity optimization
-when using :ref:`Successive Halving<smac.intensifier.successive\\_halving>` or 
-:ref:`Hyperband<smac.intensifier.hyperband>`. `Hyperband` is the default intensifier in the 
-:ref:`multi-fidelity facade<smac.facade.multi\\_fidelity\\_facade>` and requires the arguments 
+when using [Successive Halving][smac.intensifier.successive_halving] or 
+[Hyperband][smac.intensifier.hyperband]. `Hyperband` is the default intensifier in the 
+[multi-fidelity facade][smac.facade.multi_fidelity_facade] and requires the arguments 
 ``min_budget`` and ``max_budget`` in the scenario if no instances are used.
 
 In general, multi-fidelity works for both real-valued and instance budgets. In the real-valued case,
@@ -14,5 +13,5 @@ target function but ``min_budget`` and ``max_budget`` are used internally to det
 each stage. That's also the reason why ``min_budget`` and ``max_budget`` are *not required* when using instances: 
 The ``max_budget`` is simply the max number of instances, whereas the ``min_budget`` is simply 1.
 
-Please have a look into our :ref:`multi-fidelity examples<Multi-Fidelity and Multi-Instances>` to see how to use
+Please have a look into our [multi-fidelity examples](Multi-Fidelity and Multi-Instances) to see how to use
 multi-fidelity optimization in real-world applications.
\ No newline at end of file
diff --git a/docs/advanced_usage/3_multi_objective.rst b/docs/advanced_usage/3_multi_objective.md
similarity index 61%
rename from docs/advanced_usage/3_multi_objective.rst
rename to docs/advanced_usage/3_multi_objective.md
index 4fecde06d1..eea3819624 100644
--- a/docs/advanced_usage/3_multi_objective.rst
+++ b/docs/advanced_usage/3_multi_objective.md
@@ -1,9 +1,8 @@
-Multi-Objective Optimization
-============================
+# Multi-Objective Optimization
 
 Often we do not only want to optimize just a single objective, but multiple instead. SMAC offers a multi-objective 
 optimization interface to do exactly that. Right now, the algorithm used for this is a mean aggregation strategy or 
-ParEGO [Know06]_. In both cases, multiple objectives are aggregated into a single scalar objective, which is then 
+ParEGO [[Know06][Know06]]. In both cases, multiple objectives are aggregated into a single scalar objective, which is then 
 optimized by SMAC. However, the run history still keeps the original objectives.
 
 
@@ -18,24 +17,24 @@ The basic recipe is as follows:
   multi-objective algorithm default.
 
 
-.. warning ::
+!!! warning
 
-   The multi-objective algorithm influences which configurations are sampled next. More specifically, 
-   since only one surrogate model is trained, multiple objectives have to be scalarized into a single objective.
-   This scalarized value is used to train the surrogate model, which is used by the acquisition function/maximizer
-   to sample the next configurations.  
+    The multi-objective algorithm influences which configurations are sampled next. More specifically, 
+    since only one surrogate model is trained, multiple objectives have to be scalarized into a single objective.
+    This scalarized value is used to train the surrogate model, which is used by the acquisition function/maximizer
+    to sample the next configurations.  
 
 
 You receive the incumbents (points on the Pareto front) after the optimization process directly. Alternatively, you can 
 use the method ``get_incumbents`` in the intensifier.
 
-.. code-block:: python
+```python
 
-   smac = ...
-   incumbents = smac.optimize()
+  smac = ...
+  incumbents = smac.optimize()
 
-   # Or you use the intensifier
-   incumbents = smac.intensifier.get_incumbents()
+  # Or you use the intensifier
+  incumbents = smac.intensifier.get_incumbents()
+```
 
-
-We show an example of how to use multi-objective with plots in our :ref:`examples<Multi-Objective>`.
+We show an example of how to use multi-objective with plots in our [examples](../examples/3%20Multi-Objective/1_schaffer.html).
diff --git a/docs/advanced_usage/4_instances.rst b/docs/advanced_usage/4_instances.md
similarity index 55%
rename from docs/advanced_usage/4_instances.rst
rename to docs/advanced_usage/4_instances.md
index b9d185b1a1..4c1156ff24 100644
--- a/docs/advanced_usage/4_instances.rst
+++ b/docs/advanced_usage/4_instances.md
@@ -1,5 +1,4 @@
-Optimization across Instances
-=============================
+# Optimization across Instances
 
 Often you want to optimize the cost across different datasets, subsets, or even different
 augmentations. For this purpose, you can use instances.
@@ -7,33 +6,33 @@ augmentations. For this purpose, you can use instances.
 To work with instances, you need to add your pre-defined instance names to the scenario object.
 In the following example, we want to use five different subsets, identified by its id:
 
-.. code-block:: python
-
-    instances = ["d0", "d1", "d2", "d3", "d4"]
-    scenario = Scenario(
-      ...
-      "instances": instances,
-      ...
-    )
+```python
+instances = ["d0", "d1", "d2", "d3", "d4"]
+scenario = Scenario(
+  ...
+  "instances": instances,
+  ...
+)
+```
 
 
 Additionally to the instances, there is the option to define ``instance_features``. Those instance features are
 used to expand the internal X matrix and thus play a role in training the underlying surrogate model. For example, if I 
 want to add the number of samples and the mean of each subset, I can do as follows:
 
-.. code-block:: bash
-
-    instance_features = {
-      "d0": [121, 0.6],
-      "d1": [140, 0.65],
-      "d2": [99, 0.45],
-      "d3": [102, 0.59],
-      "d4": [132, 0.48],
-    }
-
-    scenario = Scenario(
-      ...
-      "instances": instances,
-      "instance_features": instance_features
-      ...
-    )
+```python
+  instance_features = {
+    "d0": [121, 0.6],
+    "d1": [140, 0.65],
+    "d2": [99, 0.45],
+    "d3": [102, 0.59],
+    "d4": [132, 0.48],
+  }
+
+  scenario = Scenario(
+    ...
+    instances=instances,
+    instance_features=instance_features,
+    ...
+  )
+```
diff --git a/docs/advanced_usage/5.1_warmstarting.md b/docs/advanced_usage/5.1_warmstarting.md
new file mode 100644
index 0000000000..38189b594f
--- /dev/null
+++ b/docs/advanced_usage/5.1_warmstarting.md
@@ -0,0 +1,117 @@
+# Warmstarting SMAC
+
+With the ask and tell interface, we can support warmstarting SMAC. We can communicate rich
+information about the previous trials to SMAC using `TrialInfo` and `TrialValue` instances.
+
+We can communicate using the following objects:
+
+```python
+class TrialValue:
+    """Values of a trial.
+
+    Parameters
+    ----------
+    cost : float | list[float]
+    time : float, defaults to 0.0
+    status : StatusType, defaults to StatusType.SUCCESS
+    starttime : float, defaults to 0.0
+    endtime : float, defaults to 0.0
+    additional_info : dict[str, Any], defaults to {}
+    """
+
+class TrialInfo:
+    """Information about a trial.
+
+    Parameters
+    ----------
+    config : Configuration
+    instance : str | None, defaults to None
+    seed : int | None, defaults to None
+    budget : float | None, defaults to None
+    """
+```
+
+## Usage Example
+See [`examples/1_basics/8_warmstart.py`](../../examples/1%20Basics/8_warmstart.py).
+
+
+```python
+from __future__ import annotations
+
+from smac.scenario import Scenario
+from smac.facade import HyperparameterOptimizationFacade
+from ConfigSpace import Configuration, ConfigurationSpace, Float
+from smac.runhistory.dataclasses import TrialValue, TrialInfo
+
+
+class Rosenbrock2D:
+    @property
+    def configspace(self) -> ConfigurationSpace:
+        cs = ConfigurationSpace(seed=0)
+        x0 = Float("x0", (-5, 10), default=-3)
+        x1 = Float("x1", (-5, 10), default=-4)
+        cs.add([x0, x1])
+
+        return cs
+
+    def evaluate(self, config: Configuration, seed: int = 0) -> float:
+        """The 2-dimensional Rosenbrock function as a toy model.
+        The Rosenbrock function is well know in the optimization community and
+        often serves as a toy problem. It can be defined for arbitrary
+        dimensions. The minimium is always at x_i = 1 with a function value of
+        zero. All input parameters are continuous. The search domain for
+        all x's is the interval [-5, 10].
+        """
+        x1 = config["x0"]
+        x2 = config["x1"]
+
+        cost = 100.0 * (x2 - x1**2.0) ** 2.0 + (1 - x1) ** 2.0
+        return cost
+
+
+if __name__ == "__main__":
+    SEED = 12345
+    task = Rosenbrock2D()
+
+    # Previous evaluations
+    # X vectors need to be connected to the configuration space
+    configurations = [
+        Configuration(task.configspace, {'x0':1, 'x1':2}),
+        Configuration(task.configspace, {'x0':-1, 'x1':3}),
+        Configuration(task.configspace, {'x0':5, 'x1':5}),
+    ]
+    costs = [task.evaluate(c, seed=SEED) for c in configurations]
+
+    # Define optimization problem and budget
+    scenario = Scenario(task.configspace, deterministic=False, n_trials=30)
+    intensifier = HyperparameterOptimizationFacade.get_intensifier(scenario, max_config_calls=1)
+    smac = HyperparameterOptimizationFacade(
+        scenario,
+        task.evaluate,
+        intensifier=intensifier,
+        overwrite=True,
+
+        # Modify the initial design to use our custom initial design
+        initial_design=HyperparameterOptimizationFacade.get_initial_design(
+            scenario, 
+            n_configs=0,  # Do not use the default initial design
+            additional_configs=configurations  # Use the configurations previously evaluated as initial design
+                                            # This only passes the configurations but not the cost!
+                                            # So in order to actually use the custom, pre-evaluated initial design
+                                            # we need to tell those trials, like below.
+        )
+    )
+
+    # Convert previously evaluated configurations into TrialInfo and TrialValue instances to pass to SMAC
+    trial_infos = [TrialInfo(config=c, seed=SEED) for c in configurations]
+    trial_values = [TrialValue(cost=c) for c in costs]
+
+    # Warmstart SMAC with the trial information and values
+    for info, value in zip(trial_infos, trial_values):
+        smac.tell(info, value)
+
+    # Optimize as usual
+    smac.optimize()
+```
+
+For more details on ask and tell consult [`advanced_usage/5_ask_and_tell`](../advanced_usage/5_ask_and_tell.md).
diff --git a/docs/advanced_usage/5.1_warmstarting.rst b/docs/advanced_usage/5.1_warmstarting.rst
deleted file mode 100644
index fb255681e3..0000000000
--- a/docs/advanced_usage/5.1_warmstarting.rst
+++ /dev/null
@@ -1,120 +0,0 @@
-Warmstarting SMAC
-======================================
-
-With the ask and tell interface, we can support warmstarting SMAC. We can communicate rich
-information about the previous trials to SMAC using `TrialInfo` and `TrialValue` instances.
-
-We can communicate using the following objects:
-
-.. code-block:: python
-
-    class TrialValue:
-        """Values of a trial.
-
-        Parameters
-        ----------
-        cost : float | list[float]
-        time : float, defaults to 0.0
-        status : StatusType, defaults to StatusType.SUCCESS
-        starttime : float, defaults to 0.0
-        endtime : float, defaults to 0.0
-        additional_info : dict[str, Any], defaults to {}
-        """
-
-    class TrialInfo:
-        """Information about a trial.
-
-        Parameters
-        ----------
-        config : Configuration
-        instance : str | None, defaults to None
-        seed : int | None, defaults to None
-        budget : float | None, defaults to None
-        """
-
-
-Usage Example
-~~~~~~~~~~~~~
-See `examples/1_basics/8_warmstart.py`.
-
-
-.. code-block:: python
-
-    from __future__ import annotations
-
-    from smac.scenario import Scenario
-    from smac.facade import HyperparameterOptimizationFacade
-    from ConfigSpace import Configuration, ConfigurationSpace, Float
-    from smac.runhistory.dataclasses import TrialValue, TrialInfo
-
-
-    class Rosenbrock2D:
-        @property
-        def configspace(self) -> ConfigurationSpace:
-            cs = ConfigurationSpace(seed=0)
-            x0 = Float("x0", (-5, 10), default=-3)
-            x1 = Float("x1", (-5, 10), default=-4)
-            cs.add([x0, x1])
-
-            return cs
-
-        def evaluate(self, config: Configuration, seed: int = 0) -> float:
-            """The 2-dimensional Rosenbrock function as a toy model.
-            The Rosenbrock function is well know in the optimization community and
-            often serves as a toy problem. It can be defined for arbitrary
-            dimensions. The minimium is always at x_i = 1 with a function value of
-            zero. All input parameters are continuous. The search domain for
-            all x's is the interval [-5, 10].
-            """
-            x1 = config["x0"]
-            x2 = config["x1"]
-
-            cost = 100.0 * (x2 - x1**2.0) ** 2.0 + (1 - x1) ** 2.0
-            return cost
-
-
-    if __name__ == "__main__":
-        SEED = 12345
-        task = Rosenbrock2D()
-
-        # Previous evaluations
-        # X vectors need to be connected to the configuration space
-        configurations = [
-            Configuration(task.configspace, {'x0':1, 'x1':2}),
-            Configuration(task.configspace, {'x0':-1, 'x1':3}),
-            Configuration(task.configspace, {'x0':5, 'x1':5}),
-        ]
-        costs = [task.evaluate(c, seed=SEED) for c in configurations]
-
-        # Define optimization problem and budget
-        scenario = Scenario(task.configspace, deterministic=False, n_trials=30)
-        intensifier = HyperparameterOptimizationFacade.get_intensifier(scenario, max_config_calls=1)
-        smac = HyperparameterOptimizationFacade(
-            scenario,
-            task.evaluate,
-            intensifier=intensifier,
-            overwrite=True,
-
-            # Modify the initial design to use our custom initial design
-            initial_design=HyperparameterOptimizationFacade.get_initial_design(
-                scenario, 
-                n_configs=0,  # Do not use the default initial design
-                additional_configs=configurations  # Use the configurations previously evaluated as initial design
-                                                # This only passes the configurations but not the cost!
-                                                # So in order to actually use the custom, pre-evaluated initial design
-                                                # we need to tell those trials, like below.
-            )
-        )
-
-        # Convert previously evaluated configurations into TrialInfo and TrialValue instances to pass to SMAC
-        trial_infos = [TrialInfo(config=c, seed=SEED) for c in configurations]
-        trial_values = [TrialValue(cost=c) for c in costs]
-
-        # Warmstart SMAC with the trial information and values
-        for info, value in zip(trial_infos, trial_values):
-            smac.tell(info, value)
-
-        # Optimize as usual
-        smac.optimize()
-
-For more details on ask and tell consult `advanced_usage/5_ask_and_tell`.
diff --git a/docs/advanced_usage/5_ask_and_tell.rst b/docs/advanced_usage/5_ask_and_tell.md
similarity index 84%
rename from docs/advanced_usage/5_ask_and_tell.rst
rename to docs/advanced_usage/5_ask_and_tell.md
index c62c78606f..498ecacefd 100644
--- a/docs/advanced_usage/5_ask_and_tell.rst
+++ b/docs/advanced_usage/5_ask_and_tell.md
@@ -1,15 +1,14 @@
-Ask-and-Tell Interface
-======================
+# Ask-and-Tell Interface
 
 SMAC provides an ask-and-tell interface in v2.0, giving the user the opportunity to ask for the next trial 
 and report the results of the trial. 
 
-.. warning ::
+!!! warning
 
     When specifying ``n_trials`` in the scenario and trials have been registered by the user, SMAC will 
     count the users trials as well. However, the wallclock time will first start when calling ``optimize``.
 
-.. warning ::
+!!! warning
 
     It might be the case that not all user-provided trials can be considered. Take Successive Halving, for example, 
     when specifying the min and max budget, intermediate budgets are calculated. If the user provided trials with
@@ -17,4 +16,4 @@ and report the results of the trial.
     into the intensification process.
 
 
-Please have a look at our :ref:`ask-and-tell example<Ask-and-Tell>`.
+Please have a look at our [ask-and-tell example](../examples/1%20Basics/3_ask_and_tell.html).
diff --git a/docs/advanced_usage/6_commandline.rst b/docs/advanced_usage/6_commandline.md
similarity index 61%
rename from docs/advanced_usage/6_commandline.rst
rename to docs/advanced_usage/6_commandline.md
index e33658ba55..1a7d60628b 100644
--- a/docs/advanced_usage/6_commandline.rst
+++ b/docs/advanced_usage/6_commandline.md
@@ -1,25 +1,24 @@
-Command-Line Interface
-======================
+# Command-Line Interface
 
 The command-line interface enables the user to run target functions which are non-python code. 
 The passed and further called script (using `Popen`) needs to return a standard output which is then interpreted 
 to perform the optimization process. 
 
-.. note ::
+!!! note
 
     In SMAC v2.0, SMAC can not be called from the command-line directly. Instead, the user should use the python 
     interface to call SMAC. The command-line interface is still available in SMAC v1.4.
 
 
 
-Call of the Target Function
----------------------------
+## Call of the Target Function
 
-The following example shows how the script is called:
 
-.. code-block:: bash
+The following example shows how the script is called:
 
-    filename --instance=test --instance_features=test --seed=0 --hyperparameter1=5323
+```bash
+filename --instance=test --instance_features=test --seed=0 --hyperparameter1=5323
+```
 
 However, as for the python target function, the arguments like instance or budget are depending on which
 components are used. The hyperparameters are depending on the configuration space. The variable ``filename`` could be 
@@ -28,65 +27,62 @@ something like ``./path/to/your/script.sh``.
 We recommend using the following code to receive the arguments in a bash script. Please note that the user is not limited
 to bash scripts but can also use executables, python scripts or anything else.
 
-.. note ::
+!!! note
 
     Since the script is called wih the filename only, make sure to mark the type of the file (e.g., ``#!/bin/bash`` 
     or ``#!/usr/bin/env python``).
 
-.. warning ::
+!!! warning
 
     Everytime an instance is passed, also an instance feature in form of a comma-separated list (no spaces) of floats is
     passed. If no instance feature for the instance is given, an empty list is passed.
 
 
-.. code-block:: bash
-
-    #!/bin/bash
+```bash
+#!/bin/bash
 
-    # Set arguments first
-    for argument in "$@"
-    do
-        key=$(echo $argument | cut -f1 -d=)
-        value=$(echo $argument | cut -f2 -d=)   
+# Set arguments first
+for argument in "$@"
+do
+    key=$(echo $argument | cut -f1 -d=)
+    value=$(echo $argument | cut -f2 -d=)   
 
-        if [[ $key == *"--"* ]]; then
-            v="${key/--/}"
-            declare $v="${value}" 
-        fi
-    done
+    if [[ $key == *"--"* ]]; then
+        v="${key/--/}"
+        declare $v="${value}" 
+    fi
+done
 
-    echo $instance
-    echo $hyperparameter1
+echo $instance
+echo $hyperparameter1
+```
 
 
-Return of the Target Function
------------------------------
+## Return of the Target Function
 
 The script must return an stdout (echo or print) in the following form (white-spaces are ignored):
 
-.. code-block:: 
-
-    cost=0.5; runtime=0.01; status=SUCCESS; additional_info=test (single-objective)
-    cost=0.5, 0.4; runtime=0.01; status=SUCCESS; additional_info=test (multi-objective)
+```
+cost=0.5; runtime=0.01; status=SUCCESS; additional_info=test (single-objective)
+cost=0.5, 0.4; runtime=0.01; status=SUCCESS; additional_info=test (multi-objective)
+```
 
 All arguments are optional except cost and are separated by a semicolon. The string of the status must match
-one of the values from :ref:`StatusType<smac.runhistory.enumerations>`.
+one of the values from [`StatusType`][smac.runhistory.enumerations].
 
 
-Start the Optimization
-----------------------
+## Start the Optimization
 
 The optimization will be started by the normal python interface. The only difference is that you need to pass
 a string as target function instead of a python function.
 
-.. warning ::
+!! warning
 
     Your script needs to have rights to be executed (e.g., update the rights with ``chmod``).
 
-.. code-block:: python
-
-    ...
-    smac = BlackBoxFacade(scenario, target_function="./path/to/your/script.sh")
-    incumbent = smac.optimize()
-    ...
-
+```python
+...
+smac = BlackBoxFacade(scenario, target_function="./path/to/your/script.sh")
+incumbent = smac.optimize()
+...
+```
diff --git a/docs/advanced_usage/7_stopping_criteria.rst b/docs/advanced_usage/7_stopping_criteria.md
similarity index 74%
rename from docs/advanced_usage/7_stopping_criteria.rst
rename to docs/advanced_usage/7_stopping_criteria.md
index 508046d749..06212e01fe 100644
--- a/docs/advanced_usage/7_stopping_criteria.rst
+++ b/docs/advanced_usage/7_stopping_criteria.md
@@ -1,12 +1,10 @@
-Stopping Criteria
-=================
+# Stopping Criteria
 
 In addition to the standard stopping criteria like number of trials or wallclock time, SMAC also provides 
 more advanced criteria.
 
 
-Termination Cost Threshold
---------------------------
+## Termination Cost Threshold
 
 SMAC can stop the optimization process after a user-defined cost was reached. In each iteration, the average cost 
 (using ``average_cost`` from the run history) from the incumbent is compared to the termination cost threshold. If one
@@ -16,20 +14,19 @@ In other words, the process can be stopped even if the incumbent has not been ev
 highest fidelity, or on all seeds.
 
 
-.. code-block:: python
-
-    scenario = Scenario(
-        ...
-        objectives=["accuracy", "runtime"],
-        termination_cost_threshold=[0.1, np.inf]
-        ...
-    )
+```python
+scenario = Scenario(
+    ...
+    objectives=["accuracy", "runtime"],
+    termination_cost_threshold=[0.1, np.inf]
+    ...
+)
+```
 
 In the code above, the optimization process is stopped if the average accuracy of the incumbent is below 0.1. The 
 runtime is ignored completely as it is set to infinity. Note here again that SMAC minimizes the objective values.
 
 
-Automatically Stopping
-----------------------
+## Automatically Stopping
 
-Coming in the next version of SMAC.
\ No newline at end of file
+Coming soon. 😊
\ No newline at end of file
diff --git a/docs/advanced_usage/8_logging.md b/docs/advanced_usage/8_logging.md
new file mode 100644
index 0000000000..7d24e84e27
--- /dev/null
+++ b/docs/advanced_usage/8_logging.md
@@ -0,0 +1,66 @@
+# Logging
+
+Logging is a crucial part of the optimization, which should be customizable by the user. This page gives you the
+overview how to customize the logging experience with SMAC.
+
+## Level
+
+The easiest way to change the logging behaviour is to change the level of the global logger. SMAC does this for you
+if you specify the ``logging_level`` in any facade.
+
+```python 
+smac = Facade(
+    ...
+    logging_level=20,
+    ...
+)
+```
+
+The table shows you the specific levels:
+
+| Name      | Level    |
+|-----------|----------|
+| 0         | SHOW ALL |
+| 10        | DEBUG    |
+| 20        | INFO     |
+| 30        | WARNING  |
+| 40        | ERROR    |
+| 50        | CRITICAL |
+
+
+## Custom File
+
+Sometimes, the user wants to disable or highlight specify modules. You can do that by passing a custom yaml
+file to the facade instead.
+
+```python 
+smac = Facade(
+    ...
+    logging_level="path/to/your/logging.yaml",
+    ...
+)
+```
+
+The following file shows you how to display only error messages from the intensifier 
+but keep the level of everything else on INFO:
+
+```yaml
+version: 1
+disable_existing_loggers: false
+formatters:
+    simple:
+        format: '[%(levelname)s][%(filename)s:%(lineno)d] %(message)s'
+handlers:
+    console:
+        class: logging.StreamHandler
+        level: INFO
+        formatter: simple
+        stream: ext://sys.stdout
+loggers:
+    smac.intensifier:
+        level: ERROR
+        handlers: [console]
+root:
+    level: INFO
+    handlers: [console]
+```
diff --git a/docs/advanced_usage/8_logging.rst b/docs/advanced_usage/8_logging.rst
deleted file mode 100644
index f845de9c93..0000000000
--- a/docs/advanced_usage/8_logging.rst
+++ /dev/null
@@ -1,73 +0,0 @@
-Logging
-=======
-
-Logging is a crucial part of the optimization, which should be customizable by the user. This page gives you the
-overview how to customize the logging experience with SMAC.
-
-Level
------
-
-The easiest way to change the logging behaviour is to change the level of the global logger. SMAC does this for you
-if you specify the ``logging_level`` in any facade.
-
-.. code-block:: python 
-
-    smac = Facade(
-        ...
-        logging_level=20,
-        ...
-    )
-
-
-The table shows you the specific levels:
-
-.. csv-table::
-    :header: "Name", "Level"
-
-    0, SHOW ALL
-    10, DEBUG
-    20, INFO 
-    30, WARNING
-    40, ERROR 
-    50, CRITICAL
-
-
-Custom File
------------
-
-Sometimes, the user wants to disable or highlight specify modules. You can do that by passing a custom yaml
-file to the facade instead.
-
-.. code-block:: python 
-
-    smac = Facade(
-        ...
-        logging_level="path/to/your/logging.yaml",
-        ...
-    )
-
-
-The following file shows you how to display only error messages from the intensifier 
-but keep the level of everything else on INFO:
-
-.. code-block:: yaml
-
-    version: 1
-    disable_existing_loggers: false
-    formatters:
-        simple:
-            format: '[%(levelname)s][%(filename)s:%(lineno)d] %(message)s'
-    handlers:
-        console:
-            class: logging.StreamHandler
-            level: INFO
-            formatter: simple
-            stream: ext://sys.stdout
-    loggers:
-        smac.intensifier:
-            level: ERROR
-            handlers: [console]
-    root:
-        level: INFO
-        handlers: [console]
-
diff --git a/docs/advanced_usage/9_parallelism.rst b/docs/advanced_usage/9_parallelism.md
similarity index 71%
rename from docs/advanced_usage/9_parallelism.rst
rename to docs/advanced_usage/9_parallelism.md
index 9912c782f5..1c3633c92f 100644
--- a/docs/advanced_usage/9_parallelism.rst
+++ b/docs/advanced_usage/9_parallelism.md
@@ -1,42 +1,41 @@
-Parallelism
-===========
+# Parallelism
 
 SMAC supports multiple workers natively via Dask. Just specify ``n_workers`` in the scenario and you are ready to go. 
 
 
-.. note :: 
+!!! note
     
     Please keep in mind that additional workers are only used to evaluate trials. The main thread still orchestrates the
     optimization process, including training the surrogate model.
 
 
-.. warning ::
+!!! warning
 
     Using high number of workers when the target function evaluation is fast might be counterproductive due to the 
     overhead of communcation. Consider using only one worker in this case.
 
 
-.. warning ::
+!!! warning
 
     When using multiple workers, SMAC is not reproducible anymore.
 
 
-Running on a Cluster
---------------------
+## Running on a Cluster
+
 You can also pass a custom dask client, e.g. to run on a slurm cluster.
-See our :ref:`parallelism example<Parallelization-on-Cluster>`.
+See our [parallelism example](../examples/1%20Basics/7_parallelization_cluster.html).
 
-.. warning ::
+!!! warning
 
     On some clusters you cannot spawn new jobs when running a SLURMCluster inside a
     job instead of on the login node. No obvious errors might be raised but it can hang silently.
 
-.. warning ::
+!!! warning
 
     Sometimes you need to modify your launch command which can be done with
-    ``SLURMCluster.job_class.submit_command``. 
-
-.. code-block:: python
+    `SLURMCluster.job_class.submit_command`.    
 
-    cluster.job_cls.submit_command = submit_command
-    cluster.job_cls.cancel_command = cancel_command
+```python
+cluster.job_cls.submit_command = submit_command
+cluster.job_cls.cancel_command = cancel_command
+```
\ No newline at end of file
diff --git a/docs/advanced_usage/index.rst b/docs/advanced_usage/index.rst
deleted file mode 100644
index 87feb3b8a6..0000000000
--- a/docs/advanced_usage/index.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-Advanced Usage
-==============
-
-In this chapter, we will discuss some more advanced usage of the library. If you want to customize 
-SMAC to your needs, we strongly recommend reading all pages. Since SMAC holds a lot of complex dependencies,
-we can not guarantee that your customization will work. However, we can give you some hints on how SMAC
-reacts to certain things.
-
-
-Navigation
-----------
-
-.. toctree::
-    :maxdepth: 2
-
-    1_components
-    2_multi_fidelity
-    3_multi_objective
-    4_instances
-    5_ask_and_tell
-    5.1_warmstarting
-    6_commandline
-    7_stopping_criteria
-    8_logging
-    9_parallelism
-    10_continue
-    11_reproducibility
-    12_optimizations
-
diff --git a/docs/api_generator.py b/docs/api_generator.py
new file mode 100644
index 0000000000..f36290f89f
--- /dev/null
+++ b/docs/api_generator.py
@@ -0,0 +1,46 @@
+"""Generate the code reference pages and navigation.
+
+# https://mkdocstrings.github.io/recipes/
+"""
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+
+import mkdocs_gen_files
+
+logger = logging.getLogger(__name__)
+
+source_path = "smac"
+
+# Modules whose members should not include inherited attributes or methods
+# NOTE: Given the current setup, we can only operate at a module level.
+# Ideally we specify options (at least at a module level) and we render
+# them into strings using a yaml parser. For now this is fine though
+NO_INHERITS = ("sklearn.evaluation",)
+TAB = "    "
+
+for path in sorted(Path(source_path).rglob("*.py")):
+    module_path = path.relative_to(source_path).with_suffix("")
+    doc_path = path.relative_to(source_path).with_suffix(".md")
+    full_doc_path = Path("api/smac", doc_path)
+
+    parts = tuple(module_path.parts)
+
+    if parts[-1] in ("__main__", "__version__", "__init__"):
+        continue
+
+    if any(part.startswith("_") for part in parts):
+        continue
+
+    with mkdocs_gen_files.open(full_doc_path, "w") as fd:
+        if parts[0] != source_path:
+            parts = (source_path,) + parts
+        ident = ".".join(parts)
+        fd.write(f"::: {ident}")
+
+        if ident.endswith(NO_INHERITS):
+            fd.write(f"\n{TAB}options:")
+            fd.write(f"\n{TAB}{TAB}inherited_members: false")
+
+    mkdocs_gen_files.set_edit_path(full_doc_path, path)
\ No newline at end of file
diff --git a/docs/example_runner.py b/docs/example_runner.py
new file mode 100644
index 0000000000..39fd92a707
--- /dev/null
+++ b/docs/example_runner.py
@@ -0,0 +1,301 @@
+"""Generates the examples pages."""
+from __future__ import annotations
+
+import logging
+import os
+import textwrap
+from dataclasses import dataclass
+from itertools import takewhile
+from pathlib import Path
+from typing import Any
+from typing_extensions import override
+
+import mkdocs_gen_files
+from more_itertools import first_true, peekable
+
+logger = logging.getLogger("mkdocs")
+
+RUN_EXAMPLES_ENV_VAR = "SMAC_DOC_RENDER_EXAMPLES"
+
+
+@dataclass
+class CodeSegment:
+    lines: list[str]
+    session: str
+    exec: bool
+
+    def code(self, code: list[str]) -> str:
+        points_start = first_true(code, pred=lambda _l: _l.startswith("# 1."))
+        if points_start is not None:
+            points_start_index = code.index(points_start)
+
+            points = [""]
+            points.extend([_l.lstrip("#")[1:] for _l in code[points_start_index:]])
+            points.append("")
+
+            body = code[:points_start_index]
+        else:
+            points = []
+            body = code
+
+        # Trim off any excess leading lines which only have whitespace
+        while body and body[0].strip() == "":
+            body.pop(0)
+
+        hl_lines = []
+        for i, line in enumerate(body):
+            _l = line.rstrip()
+            if "<!>" in _l:
+                hl_lines.append(str(i + 1))
+                _l = _l.replace("<!>", "").rstrip()
+
+            for strip in ["# !", "#!", "#"]:
+                if _l.endswith(strip):
+                    _l = _l[: -len(strip)]
+
+            body[i] = _l
+
+        if any(hl_lines):
+            hl_lines = " ".join(hl_lines)
+            hl_string = f'hl_lines="{hl_lines}"'
+        else:
+            hl_string = ""
+
+        # We generate two tabs if executing
+        if self.exec:
+            indented_body = "\n".join(f"    {_l}" for _l in body)
+
+            code_annotations = " ".join(
+                [
+                    "{",
+                    ".python",
+                    ".annotate",
+                    hl_string,
+                    "}",
+                ],
+            )
+            tab1 = "\n".join(
+                [
+                    '=== "Code"',
+                    "",
+                    f"    ``` {code_annotations}",
+                    indented_body,
+                    "    ```",
+                    *[f"    {point}" for point in points],
+                    "",
+                ],
+            )
+
+            run_annotations = " ".join(
+                [
+                    "{",
+                    ".python",
+                    f"session='{self.session}'",
+                    'exec="True"',
+                    'result="python"',
+                    "}",
+                ],
+            )
+
+            tab2 = "\n".join(
+                [
+                    '=== "Run"',
+                    "",
+                    f"    ``` {run_annotations}",
+                    indented_body,
+                    "    ```",
+                ],
+            )
+
+            return "\n".join([tab1, "", tab2])
+
+        annotations = " ".join(["{", ".python", ".annotate", hl_string, "}"])
+        top = f"```{annotations}"
+        bottom = "```"
+
+        s = [top, *body, bottom, *points]
+        body = "\n".join(s)
+        return body
+
+    @override
+    def __str__(self) -> str:
+        return self.code(self.lines)
+
+
+@dataclass
+class CommentSegment:
+    lines: list[str]
+
+    @override
+    def __str__(self) -> str:
+        return "\n".join(self.lines)
+
+
+@dataclass
+class Example:
+    name: str
+    filepath: Path
+    description: str
+    segments: list[CodeSegment | CommentSegment]
+
+    @classmethod
+    def should_execute(cls, *, name: str, runnable: bool) -> bool:
+        if not runnable:
+            return False
+
+        env_var = os.environ.get(RUN_EXAMPLES_ENV_VAR, "all")
+        if env_var in ("false", "", "0", "no", "off"):
+            return False
+
+        if env_var == "all":
+            return True
+
+        examples_to_exec = [
+            example.lstrip().rstrip() for example in env_var.lower().split(",")
+        ]
+        return name.lower() in examples_to_exec
+
+    @classmethod
+    def header_flags(cls, line: str) -> dict[str, Any] | None:
+        prefix = "# Flags:"
+        if not line.startswith(prefix):
+            return None
+
+        line = line[len(prefix) :]
+        flags = [line.strip() for line in line.split(",")]
+
+        results = {}
+
+        results["doc-runnable"] = any(flag.lower() == "doc-runnable" for flag in flags)
+        return results
+
+    @classmethod
+    def from_file(cls, path: Path) -> Example:
+        with path.open() as f:
+            lines = f.readlines()
+
+        lines = iter(lines)
+
+        # First line is the name of the example to show
+        name = next(lines).strip().replace('"""', "")
+        potential_flag_line = next(lines)
+        flags = cls.header_flags(potential_flag_line)
+        if flags is None:
+            # Prepend the potential flag line back to the lines
+            lines = iter([potential_flag_line, *lines])
+            flags = {}
+
+        # Lines leading up to the second triple quote are the description
+        description = "".join(takewhile(lambda _l: not _l.startswith('"""'), lines))
+
+        segments: list[CodeSegment | CommentSegment] = []
+
+        # The rest is interspersed with triple quotes and code blocks
+        # We need to wrap the code blocks in triple backticks while
+        # removing the triple quotes for the comment blocks
+        remaining = peekable(lines)
+        while remaining.peek(None) is not None:
+            # If we encounter triple backticks we remove them and just add the lines
+            # in, up until the point we hit the next set of backticks
+            if remaining.peek().startswith('"""'):
+                # Skip the triple quotes
+                next(remaining)
+                ls = list(takewhile(lambda _l: not _l.startswith('"""'), remaining))
+                comment_segment = CommentSegment([line.rstrip() for line in ls])
+                segments.append(comment_segment)
+
+            # Otherwise we wrap the line in triple backticks until we hit the next
+            # set of triple quotes
+            else:
+                ls = list(takewhile(lambda _l: not _l.startswith('"""'), remaining))
+                code_segment = CodeSegment(
+                    [line.rstrip() for line in ls],
+                    session=name,
+                    exec=cls.should_execute(
+                        name=name,
+                        runnable=flags.get("doc-runnable", False),
+                    ),
+                )
+                segments.append(code_segment)
+
+                remaining.prepend('"""')  # Stick back in so we can find it next itr
+
+        return cls(name, path, description, segments)
+
+    def header(self) -> str:
+        return f"# {self.name}"
+
+    def description_header(self) -> str:
+        return "\n".join(
+            [
+                "## Description",
+                self.description,
+            ],
+        )
+
+    def generate_doc(self) -> str:
+        return "\n".join(
+            [
+                self.header(),
+                self.copy_section(),
+                self.description_header(),
+                *map(str, self.segments),
+            ],
+        )
+
+    def copy_section(self) -> str:
+        body = "\n".join(
+            [
+                "```python",
+                *[
+                    "\n".join(segment.lines)
+                    for segment in self.segments
+                    if isinstance(segment, CodeSegment)
+                ],
+                "```",
+            ],
+        )
+        indented_body = textwrap.indent(body, " " * 4)
+        header = (
+            f'??? quote "Expand to copy'
+            f' `{self.filepath}` :material-content-copy: (top right)"'
+        )
+        return "\n".join(
+            [
+                header,
+                "",
+                indented_body,
+                "",
+            ],
+        )
+
+
+if os.environ.get(RUN_EXAMPLES_ENV_VAR, "all") in ("false", "", "0", "no", "off"):
+    logger.warning(
+        f"Env variable {RUN_EXAMPLES_ENV_VAR} not set - not running examples."
+        " Use `just docs-full` to run and render examples.",
+    )
+
+for path in sorted(Path("examples").rglob("*.py")):
+    module_path = path.relative_to("examples").with_suffix("")
+    doc_path = path.relative_to("examples").with_suffix(".md")
+    full_doc_path = Path("examples", doc_path)
+
+    parts = tuple(module_path.parts)
+    filename = parts[-1]
+
+    if filename.startswith("_"):
+        continue
+
+    heading_fn = Path(path.parent / "heading.txt")
+    if heading_fn.is_file():
+        # This adjusts the navigation section heading to the content in heading.txt
+        heading = heading_fn.read_text().strip()
+        full_doc_path = Path("examples", heading, *doc_path.parts[1:])
+
+    example = Example.from_file(path)
+    with mkdocs_gen_files.open(full_doc_path, "w") as f:
+        f.write(example.generate_doc())
+
+    toc_name = example.name
+    mkdocs_gen_files.set_edit_path(full_doc_path, full_doc_path)
\ No newline at end of file
diff --git a/docs/hooks/cleanup_log_output.py b/docs/hooks/cleanup_log_output.py
new file mode 100644
index 0000000000..2ce95ce729
--- /dev/null
+++ b/docs/hooks/cleanup_log_output.py
@@ -0,0 +1,38 @@
+"""The module is a hook which disables warnings and log messages which pollute the
+doc build output.
+
+One possible downside is if one of these modules ends up giving an actual
+error, such as OpenML failing to retrieve a dataset. I tried to make sure ERROR
+log message are still allowed through.
+"""
+import logging
+import warnings
+from typing import Any
+
+import mkdocs
+import mkdocs.plugins
+import mkdocs.structure.pages
+
+log = logging.getLogger("mkdocs")
+
+
+@mkdocs.plugins.event_priority(-50)
+def on_startup(**kwargs: Any):
+    # We get a load of deprecation warnings from SMAC
+    warnings.filterwarnings("ignore", category=DeprecationWarning)
+
+    # ConvergenceWarning from sklearn
+    warnings.filterwarnings("ignore", module="sklearn")
+
+
+def on_pre_page(
+    page: mkdocs.structure.pages.Page,
+    config: Any,
+    files: Any,
+) -> mkdocs.structure.pages.Page | None:
+    # NOTE: mkdocs says they're always normalized to be '/' seperated
+    # which means this should work on windows as well.
+
+    logging.getLogger("smac").setLevel(logging.ERROR)
+    logging.getLogger("openml").setLevel(logging.ERROR)
+    return page
\ No newline at end of file
diff --git a/docs/hooks/debug_which_page_is_being_rendered.py b/docs/hooks/debug_which_page_is_being_rendered.py
new file mode 100644
index 0000000000..9c4aec84a7
--- /dev/null
+++ b/docs/hooks/debug_which_page_is_being_rendered.py
@@ -0,0 +1,34 @@
+"""This module is a hook that when any code is being rendered, it will
+print the path to the file being rendered.
+
+This makes it easier to identify which file is being rendered when an error happens.
+"""
+from __future__ import annotations
+
+import logging
+import os
+from typing import TYPE_CHECKING, Any
+
+import mkdocs
+import mkdocs.plugins
+
+if TYPE_CHECKING:
+    import mkdocs.structure.pages
+
+log = logging.getLogger("mkdocs")
+
+RENDER_EXAMPLES_ENV_VAR = "SMAC_DOC_RENDER_EXAMPLES"
+EXEC_DOCS_ENV_VAR = "SMAC_EXEC_DOCS"
+
+truthy_values = {"yes", "on", "true", "1", "all"}
+
+
+def on_pre_page(
+    page: mkdocs.structure.pages.Page,
+    config: Any,
+    files: Any,
+) -> mkdocs.structure.pages.Page | None:
+    render_examples = os.environ.get(RENDER_EXAMPLES_ENV_VAR, "true")
+    render_code = os.environ.get(EXEC_DOCS_ENV_VAR, "true")
+    if render_examples.lower() in truthy_values or render_code.lower() in truthy_values:
+        log.info(f"{page.file.src_path}")
diff --git a/docs/hooks/disable_markdown_exec.py b/docs/hooks/disable_markdown_exec.py
new file mode 100644
index 0000000000..ee6ae10bd6
--- /dev/null
+++ b/docs/hooks/disable_markdown_exec.py
@@ -0,0 +1,45 @@
+"""This disable markdown_exec based on an environment variable.
+This speeds up the build of the docs for faster iteration.
+
+This is done by overwriting the module responsible for compiling and executing the code
+by overriding the `exec(...)` global variable that is used to run the code.
+We hijack it and print a helpful message about how to run the code cell instead.
+
+https://github.com/pawamoy/markdown-exec/blob/adff40b2928dbb2d22f27684e085f02d39a07291/src/markdown_exec/formatters/python.py#L42-L70
+"""
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any
+
+import mkdocs
+import mkdocs.plugins
+import mkdocs.structure.pages
+
+RUN_CODE_BLOCKS_ENV_VAR = "SMAC_EXEC_DOCS"
+
+logger = logging.getLogger("mkdocs")
+
+
+def _print_msg(compiled_code: Any, code_block_id: int, exec_globals: dict) -> None:
+    _print = exec_globals["print"]
+    _print(
+        f"Env variable {RUN_CODE_BLOCKS_ENV_VAR}=0 - No code to display."
+        "\nUse `just docs-code` (or `just docs-full` for examples) to run"
+        " the code block and display output."
+    )
+
+truthy_values = {"yes", "on", "true", "1"}
+
+@mkdocs.plugins.event_priority(100)
+def on_startup(**kwargs: Any):
+    run_code_blocks = os.environ.get(RUN_CODE_BLOCKS_ENV_VAR, "true")
+    if run_code_blocks.lower() not in truthy_values:
+        logger.warning(
+            f"Disabling markdown-exec due to {RUN_CODE_BLOCKS_ENV_VAR}={run_code_blocks}"
+            "\n.Use `just docs-full` to run and render examples.",
+        )
+        from markdown_exec.formatters import python
+
+        setattr(python, "exec_python", _print_msg)
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000000..3d94273355
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,37 @@
+Home
+# SMAC3 Documentation
+
+## Introduction
+
+SMAC is a tool for algorithm configuration to optimize the parameters of arbitrary algorithms, including hyperparameter optimization of Machine Learning algorithms. The main core consists of Bayesian Optimization in combination with an aggressive racing mechanism to efficiently decide which of two configurations performs better.
+
+SMAC3 is written in Python3 and continuously tested with Python 3.8, 3.9, and 3.10. Its Random Forest is written in C++. In the following, SMAC is representatively mentioned for SMAC3.
+
+
+## Cite Us
+If you use SMAC, please cite our [JMLR paper](https://jmlr.org/papers/v23/21-0888.html):
+
+```bibtex
+@article{lindauer-jmlr22a,
+       author  = {Marius Lindauer and Katharina Eggensperger and Matthias Feurer and André Biedenkapp and Difan Deng and Carolin Benjamins and Tim Ruhkopf and René Sass and Frank Hutter},
+       title   = {SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization},
+       journal = {Journal of Machine Learning Research},
+       year    = {2022},
+       volume  = {23},
+       number  = {54},
+       pages   = {1--9},
+       url     = {http://jmlr.org/papers/v23/21-0888.html}
+}
+```
+
+For the original idea, we refer to:
+
+```text
+Hutter, F. and Hoos, H. H. and Leyton-Brown, K.
+Sequential Model-Based Optimization for General Algorithm Configuration
+In: Proceedings of the conference on Learning and Intelligent Optimization (LION 5)
+```
+
+## Contact
+
+SMAC3 is developed by [AutoML.org](https://www.automl.org). If you want to contribute or found an issue, please visit our [GitHub page](https://github.com/automl/SMAC3). Our guidelines for contributing to this package can be found [here](https://github.com/automl/SMAC3/blob/main/CONTRIBUTING.md).
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index 736448d784..0000000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,57 +0,0 @@
-Home
-====
-
-.. toctree::
-   :hidden:
-   :maxdepth: 2
-
-   1_installation
-   2_package_overview
-   3_getting_started
-   4_minimal_example
-   examples/index
-   advanced_usage/index
-   5_api
-   6_references
-   7_glossary
-   8_faq
-   9_license
-
-
-SMAC is a tool for algorithm configuration to optimize the parameters of arbitrary algorithms, including hyperparameter 
-optimization of Machine Learning algorithms. The main core consists of Bayesian Optimization in combination with an 
-aggressive racing mechanism to efficiently decide which of two configurations performs better.
-
-SMAC3 is written in Python3 and continuously tested with Python 3.8, 3.9, and 3.10. Its Random
-Forest is written in C++. In the following, SMAC is representatively mentioned for SMAC3.
-
-If you use SMAC, please cite our `JMLR paper <https://jmlr.org/papers/v23/21-0888.html>`_:
-
-.. code-block:: text
-
-    @article{lindauer-jmlr22a,
-             author  = {Marius Lindauer and Katharina Eggensperger and Matthias Feurer and André Biedenkapp and Difan Deng and Carolin Benjamins and Tim Ruhkopf and René Sass and Frank Hutter},
-             title   = {SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization},
-             journal = {Journal of Machine Learning Research},
-             year    = {2022},
-             volume  = {23},
-             number  = {54},
-             pages   = {1--9},
-             url     = {http://jmlr.org/papers/v23/21-0888.html}
-    }
-
-For the original idea, we refer to:
-
-.. code-block:: text
-
-   Hutter, F. and Hoos, H. H. and Leyton-Brown, K.
-   Sequential Model-Based Optimization for General Algorithm Configuration
-   In: Proceedings of the conference on Learning and Intelligent OptimizatioN (LION 5)
-
-
-Contact
--------
-
-SMAC3 is developed by `<https://automl.org>`_.
-If you want to contribute or found an issue please visit our github page `<https://github.com/automl/SMAC3>`_.
-Our guidelines for contributing to this package can be found `here <https://github.com/automl/SMAC3/blob/main/CONTRIBUTING.md>`_.
diff --git a/docs/stylesheets/custom.css b/docs/stylesheets/custom.css
new file mode 100644
index 0000000000..ae7ed5498e
--- /dev/null
+++ b/docs/stylesheets/custom.css
@@ -0,0 +1,32 @@
+:root {
+  --md-primary-fg-color:        #00b2ff;
+  --md-accent-fg-color:        #FF00B2;
+}
+
+/* Change the color of the slider track */
+input[type="range"]::-webkit-slider-runnable-track {
+  background: #00b2ff; /* Change this to your desired color */
+}
+
+input[type="range"]::-moz-range-track {
+  background: #00b2ff; /* Change this to your desired color */
+}
+
+input[type="range"]::-ms-track {
+  background: #00b2ff; /* Change this to your desired color */
+  border-color: transparent;
+  color: transparent;
+}
+
+/* Change the color of the slider thumb */
+input[type="range"]::-webkit-slider-thumb {
+  background: #00b2ff; /* Change this to your desired color */
+}
+
+input[type="range"]::-moz-range-thumb {
+  background: #00b2ff; /* Change this to your desired color */
+}
+
+input[type="range"]::-ms-thumb {
+  background: #00b2ff; /* Change this to your desired color */
+}
\ No newline at end of file
diff --git a/examples/1_basics/1_quadratic_function.py b/examples/1_basics/1_quadratic_function.py
index 4d27c7ae6d..4c059dc5f8 100644
--- a/examples/1_basics/1_quadratic_function.py
+++ b/examples/1_basics/1_quadratic_function.py
@@ -1,11 +1,10 @@
-"""
-Quadratic Function
-^^^^^^^^^^^^^^^^^^
+"""Quadratic Function
+# Flags: doc-Runnable
 
 An example of applying SMAC to optimize a quadratic function.
 
 We use the black-box facade because it is designed for black-box function optimization.
-The black-box facade uses a :term:`Gaussian Process<GP>` as its surrogate model.
+The black-box facade uses a [Gaussian Process][GP] as its surrogate model.
 The facade works best on a numerical hyperparameter configuration space and should not
 be applied to problems with large evaluation budgets (up to 1000 evaluations).
 """
@@ -14,7 +13,7 @@
 from ConfigSpace import Configuration, ConfigurationSpace, Float
 from matplotlib import pyplot as plt
 
-from smac import HyperparameterOptimizationFacade as HPOFacade
+from smac.facade.hyperparameter_optimization_facade import HyperparameterOptimizationFacade as HPOFacade
 from smac import RunHistory, Scenario
 
 __copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
diff --git a/examples/1_basics/2_svm_cv.py b/examples/1_basics/2_svm_cv.py
index 345fcffb07..65d676fe46 100644
--- a/examples/1_basics/2_svm_cv.py
+++ b/examples/1_basics/2_svm_cv.py
@@ -1,6 +1,5 @@
-"""
-Support Vector Machine with Cross-Validation
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Support Vector Machine with Cross-Validation
+# Flags: doc-Runnable
 
 An example of optimizing a simple support vector machine on the IRIS dataset. We use the
 hyperparameter optimization facade, which uses a random forest as its surrogate model. It is able to
diff --git a/examples/1_basics/3_ask_and_tell.py b/examples/1_basics/3_ask_and_tell.py
index 6ab8b5ba80..b66a7e19b8 100644
--- a/examples/1_basics/3_ask_and_tell.py
+++ b/examples/1_basics/3_ask_and_tell.py
@@ -1,6 +1,5 @@
-"""
-Ask-and-Tell
-^^^^^^^^^^^^
+"""Ask-and-Tell
+# Flags: doc-Runnable
 
 This examples show how to use the Ask-and-Tell interface.
 """
diff --git a/examples/1_basics/4_callback.py b/examples/1_basics/4_callback.py
index 0fd9e9d9d7..252baaabce 100644
--- a/examples/1_basics/4_callback.py
+++ b/examples/1_basics/4_callback.py
@@ -1,6 +1,5 @@
-"""
-Custom Callback
-^^^^^^^^^^^^^^^
+"""Custom Callback
+# Flags: doc-Runnable
 
 Using callbacks is the easieast way to integrate custom code inside the Bayesian optimization loop.
 In this example, we disable SMAC's default logging option and use the custom callback to log the evaluated trials.
diff --git a/examples/1_basics/5_continue.py b/examples/1_basics/5_continue.py
index 63cfb3957f..eff67b2328 100644
--- a/examples/1_basics/5_continue.py
+++ b/examples/1_basics/5_continue.py
@@ -1,16 +1,15 @@
-"""
-Continue an Optimization
-^^^^^^^^^^^^^^^^^^^^^^^^
+"""Continue an Optimization
+# Flags: doc-Runnable
 
 SMAC can also be continued from a previous run. To do so, it reads in old files (derived from scenario's name,
 output_directory and seed) and sets the corresponding components. In this example, an optimization of a simple quadratic
 function is continued.
 
-First, after creating a scenario with 50 trials, we run SMAC with overwrite=True. This will
+First, after creating a scenario with 50 trials, we run SMAC with `overwrite=True`. This will
 overwrite any previous runs (in case the example was called before). We use a custom callback to artificially stop
 this first optimization after 10 trials.
 
-Second, we again run the SMAC optimization using the same scenario, but this time with overwrite=False. As
+Second, we again run the SMAC optimization using the same scenario, but this time with `overwrite=False`. As
 there already is a previous run with the same meta data, this run will be continued until the 50 trials are reached.
 """
 
diff --git a/examples/1_basics/6_priors.py b/examples/1_basics/6_priors.py
index 691460c0b2..4af42444bf 100644
--- a/examples/1_basics/6_priors.py
+++ b/examples/1_basics/6_priors.py
@@ -1,6 +1,5 @@
-"""
-User Priors over the Optimum
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""User Priors over the Optimum
+# Flags: doc-Runnable
 
 Example for optimizing a Multi-Layer Perceptron (MLP) setting priors over the optimum on the
 hyperparameters. These priors are derived from user knowledge (from previous runs on similar
diff --git a/examples/1_basics/7_parallelization_cluster.py b/examples/1_basics/7_parallelization_cluster.py
index 2467f7d229..607b521c09 100644
--- a/examples/1_basics/7_parallelization_cluster.py
+++ b/examples/1_basics/7_parallelization_cluster.py
@@ -1,6 +1,4 @@
-"""
-Parallelization-on-Cluster
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Parallelization on Cluster
 
 An example of applying SMAC to optimize Branin using parallelization via Dask client on a 
 SLURM cluster. If you do not want to use a cluster but your local machine, set dask_client
@@ -22,7 +20,7 @@
 
 Here we optimize the synthetic 2d function Branin.
 We use the black-box facade because it is designed for black-box function optimization.
-The black-box facade uses a :term:`Gaussian Process<GP>` as its surrogate model.
+The black-box facade uses a [Gaussian Process][GP] as its surrogate model.
 The facade works best on a numerical hyperparameter configuration space and should not
 be applied to problems with large evaluation budgets (up to 1000 evaluations).
 """
diff --git a/examples/1_basics/8_warmstart.py b/examples/1_basics/8_warmstart.py
index eef95d173a..bc6f29b236 100644
--- a/examples/1_basics/8_warmstart.py
+++ b/examples/1_basics/8_warmstart.py
@@ -1,10 +1,9 @@
-"""
-Warmstarting SMAC
-======================================
+"""Warmstarting SMAC
+# Flags: doc-Runnable
 
 With the ask and tell interface, we can support warmstarting SMAC. We can communicate rich
 information about the previous trials to SMAC using `TrialInfo` and `TrialValue` instances.
-For more details on ask and tell consult `advanced_usage/5_ask_and_tell`.
+For more details on ask and tell consult the [info page ask-and-tell](../../../advanced_usage/5_ask_and_tell).
 """
 from __future__ import annotations
 
diff --git a/examples/1_basics/README.rst b/examples/1_basics/README.rst
deleted file mode 100644
index ab81e73c59..0000000000
--- a/examples/1_basics/README.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Basics
-------
\ No newline at end of file
diff --git a/examples/1_basics/heading.txt b/examples/1_basics/heading.txt
new file mode 100644
index 0000000000..223b890d10
--- /dev/null
+++ b/examples/1_basics/heading.txt
@@ -0,0 +1 @@
+1 Basics
\ No newline at end of file
diff --git a/examples/2_multi_fidelity/1_mlp_epochs.py b/examples/2_multi_fidelity/1_mlp_epochs.py
index 5cb0aefa05..3ea7f30f41 100644
--- a/examples/2_multi_fidelity/1_mlp_epochs.py
+++ b/examples/2_multi_fidelity/1_mlp_epochs.py
@@ -1,6 +1,5 @@
-"""
-Multi-Layer Perceptron Using Multiple Epochs
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Multi-Layer Perceptron Using Multiple Epochs
+# Flags: doc-Runnable
 
 Example for optimizing a Multi-Layer Perceptron (MLP) using multiple budgets.
 Since we want to take advantage of multi-fidelity, the ``MultiFidelityFacade`` is a good choice. By default,
@@ -13,7 +12,7 @@
 that ``budget`` specifies the number of epochs smac wants to allocate. The digits dataset
 is chosen to optimize the average accuracy on 5-fold cross validation.
 
-.. note::
+!!! note
 
     This example uses the ``MultiFidelityFacade`` facade, which is the closest implementation to
     `BOHB <https://github.com/automl/HpBandSter>`_.
diff --git a/examples/2_multi_fidelity/2_sgd_datasets.py b/examples/2_multi_fidelity/2_sgd_datasets.py
index 178ea21c2b..814f53f449 100644
--- a/examples/2_multi_fidelity/2_sgd_datasets.py
+++ b/examples/2_multi_fidelity/2_sgd_datasets.py
@@ -1,6 +1,5 @@
-"""
-Stochastic Gradient Descent On Multiple Datasets
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Stochastic Gradient Descent On Multiple Datasets
+# Flags: doc-Runnable
 
 Example for optimizing a Multi-Layer Perceptron (MLP) across multiple (dataset) instances.
 
diff --git a/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py b/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py
index 7c0ebdcf0d..89cc811414 100644
--- a/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py
+++ b/examples/2_multi_fidelity/3_specify_HB_via_total_budget.py
@@ -1,6 +1,6 @@
-"""
-Specify Number of Trials via a Total Budget in Hyperband
-^^^^^^^^^^^^^^^^^^
+"""Specify Number of Trials via a Total Budget in Hyperband
+# Flags: doc-Runnable
+
 This example uses a dummy function but illustrates how to setup Hyperband if you 
 want to specify a total optimization budget in terms of fidelity units.
 
diff --git a/examples/2_multi_fidelity/README.rst b/examples/2_multi_fidelity/README.rst
deleted file mode 100644
index 77811397bf..0000000000
--- a/examples/2_multi_fidelity/README.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Multi-Fidelity and Multi-Instances
-----------------------------------
-
diff --git a/examples/2_multi_fidelity/heading.txt b/examples/2_multi_fidelity/heading.txt
new file mode 100644
index 0000000000..a32280f09c
--- /dev/null
+++ b/examples/2_multi_fidelity/heading.txt
@@ -0,0 +1 @@
+2 Multi-Fidelity and Multi-Instances
\ No newline at end of file
diff --git a/examples/3_multi_objective/1_schaffer.py b/examples/3_multi_objective/1_schaffer.py
index 913662508d..9adf5f683e 100644
--- a/examples/3_multi_objective/1_schaffer.py
+++ b/examples/3_multi_objective/1_schaffer.py
@@ -1,6 +1,5 @@
-"""
-2D Schaffer Function with Objective Weights
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""2D Schaffer Function with Objective Weights
+# Flags: doc-Runnable
 
 A simple example on how to use multi-objective optimization is shown. The 2D Schaffer function is used. In the plot
 you can see that all points are on the Pareto front. However, since we set the objective weights, you can notice that
diff --git a/examples/3_multi_objective/2_parego.py b/examples/3_multi_objective/2_parego.py
index b5294fb98b..8a407218ba 100644
--- a/examples/3_multi_objective/2_parego.py
+++ b/examples/3_multi_objective/2_parego.py
@@ -1,6 +1,5 @@
-"""
-ParEGO
-^^^^^^
+"""ParEGO
+# Flags: doc-Runnable
 
 An example of how to use multi-objective optimization with ParEGO. Both accuracy and run-time are going to be
 optimized on the digits dataset using an MLP, and the configurations are shown in a plot, highlighting the best ones in 
diff --git a/examples/3_multi_objective/README.rst b/examples/3_multi_objective/README.rst
deleted file mode 100644
index 995ad9408b..0000000000
--- a/examples/3_multi_objective/README.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Multi-Objective
----------------
-
diff --git a/examples/3_multi_objective/heading.txt b/examples/3_multi_objective/heading.txt
new file mode 100644
index 0000000000..653c325d75
--- /dev/null
+++ b/examples/3_multi_objective/heading.txt
@@ -0,0 +1 @@
+3 Multi-Objective
\ No newline at end of file
diff --git a/examples/4_advanced_optimizer/3_metadata_callback.py b/examples/4_advanced_optimizer/3_metadata_callback.py
index b82670dbc6..681a669a78 100644
--- a/examples/4_advanced_optimizer/3_metadata_callback.py
+++ b/examples/4_advanced_optimizer/3_metadata_callback.py
@@ -1,21 +1,21 @@
-"""
-Callback for logging run metadata
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Callback for logging run metadata
 
 An example for using a callback to log run metadata to a file. Any arguments passed to the callback will be logged
 to a json file at the beginning of the SMAC run (arguments must be json serializable).
 
 Instead of editing the Git-related information (repository, branch, commit) by hand each time they change,
-this information can also be added automatically using GitPython (install via "pip install GitPython").
+this information can also be added automatically using GitPython (install via `pip install GitPython`).
 There is an example for obtaining the information via GitPython below:
-    from git import Repo
-    repo = Repo(".", search_parent_directories=True)
-    MetadataCallback(
-        repository=repo.working_tree_dir.split("/")[-1],
-        branch=str(repo.active_branch),
-        commit=str(repo.head.commit),
-        command=" ".join([sys.argv[0][len(repo.working_tree_dir) + 1:]] + sys.argv[1:]),
-    )
+```python
+from git import Repo
+repo = Repo(".", search_parent_directories=True)
+MetadataCallback(
+    repository=repo.working_tree_dir.split("/")[-1],
+    branch=str(repo.active_branch),
+    commit=str(repo.head.commit),
+    command=" ".join([sys.argv[0][len(repo.working_tree_dir) + 1:]] + sys.argv[1:]),
+)
+```
 """
 
 import sys
diff --git a/examples/4_advanced_optimizer/4_intensify_crossvalidation.py b/examples/4_advanced_optimizer/4_intensify_crossvalidation.py
index 679253da18..41267fc0cc 100644
--- a/examples/4_advanced_optimizer/4_intensify_crossvalidation.py
+++ b/examples/4_advanced_optimizer/4_intensify_crossvalidation.py
@@ -1,9 +1,8 @@
-"""
-Speeding up Cross-Validation with Intensification
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Speeding up Cross-Validation with Intensification
+# Flags: doc-Runnable
 
 An example of optimizing a simple support vector machine on the digits dataset. In contrast to the
-[simple example](examples/1_basics/2_svm_cv.py), in which all cross-validation folds are executed
+[simple example](../../1_basics/2_svm_cv), in which all cross-validation folds are executed
 at once, we use the intensification mechanism described in the original 
 [SMAC paper](https://link.springer.com/chapter/10.1007/978-3-642-25566-3_40) as also demonstrated
 by [Auto-WEKA](https://dl.acm.org/doi/10.1145/2487575.2487629). This mechanism allows us to
diff --git a/examples/4_advanced_optimizer/1_turbo_optimizer.py b/examples/4_advanced_optimizer/_1_turbo_optimizer.py
similarity index 100%
rename from examples/4_advanced_optimizer/1_turbo_optimizer.py
rename to examples/4_advanced_optimizer/_1_turbo_optimizer.py
diff --git a/examples/4_advanced_optimizer/2_boing_optimizer.py b/examples/4_advanced_optimizer/_2_boing_optimizer.py
similarity index 100%
rename from examples/4_advanced_optimizer/2_boing_optimizer.py
rename to examples/4_advanced_optimizer/_2_boing_optimizer.py
diff --git a/examples/4_advanced_optimizer/heading.txt b/examples/4_advanced_optimizer/heading.txt
new file mode 100644
index 0000000000..080cf4a43b
--- /dev/null
+++ b/examples/4_advanced_optimizer/heading.txt
@@ -0,0 +1 @@
+4 Advanced Topics
\ No newline at end of file
diff --git a/examples/5_commandline/1_call_target_function_script.py b/examples/5_commandline/1_call_target_function_script.py
index 8c28ae0a34..1ec31e893d 100644
--- a/examples/5_commandline/1_call_target_function_script.py
+++ b/examples/5_commandline/1_call_target_function_script.py
@@ -1,31 +1,29 @@
-"""
-Call Target Function From Script
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+"""Call Target Function From Script
+# Flags: doc-Runnable
 
 This simple example shows how to call a script with the following content:
 
-.. code-block:: bash
-
-    #!/bin/bash
-
-    # Set arguments first
-    for argument in "$@"
-    do
-        key=$(echo $argument | cut -f1 -d=)
-        value=$(echo $argument | cut -f2 -d=)  
+```bash
+#!/bin/bash
 
-        if [[ $key == *"--"* ]]; then
-            v="${key/--/}"
-            declare $v="${value}"
-        fi
-    done
+# Set arguments first
+for argument in "$@"
+do
+    key=$(echo $argument | cut -f1 -d=)
+    value=$(echo $argument | cut -f2 -d=)  
 
-    # We simply set the cost to our parameter
-    cost=$x0
+    if [[ $key == *"--"* ]]; then
+        v="${key/--/}"
+        declare $v="${value}"
+    fi
+done
 
-    # Return everything
-    echo "cost=$cost"
+# We simply set the cost to our parameter
+cost=$x0
 
+# Return everything
+echo "cost=$cost"
+```
 """
 
 from ConfigSpace import ConfigurationSpace
diff --git a/examples/5_commandline/README.rst b/examples/5_commandline/README.rst
deleted file mode 100644
index 7614fd90d9..0000000000
--- a/examples/5_commandline/README.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Command-Line Interface
-----------------------
-
-SMAC can call a target function from a script. This is useful if you want to optimize non-python code.
\ No newline at end of file
diff --git a/examples/5_commandline/heading.txt b/examples/5_commandline/heading.txt
new file mode 100644
index 0000000000..73dc157a8b
--- /dev/null
+++ b/examples/5_commandline/heading.txt
@@ -0,0 +1 @@
+5 Command-Line Interface
\ No newline at end of file
diff --git a/examples/README.rst b/examples/README.md
similarity index 87%
rename from examples/README.rst
rename to examples/README.md
index deaa8fbacc..33bd78ca36 100644
--- a/examples/README.rst
+++ b/examples/README.md
@@ -1,5 +1,4 @@
-Examples
-========
+# Examples
 
 We provide several examples of how to use SMAC with Python. Practical use-cases were chosen to show the
 variety of SMAC.
\ No newline at end of file
diff --git a/mkdocs.yaml b/mkdocs.yaml
new file mode 100644
index 0000000000..8a1ded77f2
--- /dev/null
+++ b/mkdocs.yaml
@@ -0,0 +1,225 @@
+# This project uses mkdocs to generate the documentation.
+# Specifically it uses the mkdocs-material theme, which provides a whole
+# host of nice features and customization
+#
+# mkdocs: https://www.mkdocs.org/getting-started/#getting-started-with-mkdocs
+# mkdocs-material: https://squidfunk.github.io/mkdocs-material/
+#
+# Please refer to these links for more information on how to use mkdocs
+#
+# For serving the docs locally, you can take a look at the `justfile` at
+# the root of this repository, it contains a few commands for generating the docs
+# with different levels of execution.
+#
+# Please refer to individual sections for any additional notes
+site_name: "SMAC3"
+repo_url: https://github.com/automl/SMAC3/
+repo_name: automl/SMAC3
+
+site_dir: docs/site
+
+theme:
+  name: material
+  logo: images/logo.png
+  favicon: images/logo.png
+  icon:
+    repo: fontawesome/brands/github
+  features:
+    - content.code.annotate
+    - content.code.copy
+    - navigation.footer
+    - navigation.sections
+    - toc.follow
+    - toc.integrate
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - header.autohide
+    - search.suggest
+    - search.highlight
+    - search.share
+  font:
+    text: Roboto
+    code: Roboto Mono
+  palette:
+    - scheme: slate
+      media: "(prefers-color-scheme: dark)"
+      primary: custom
+      accent: custom
+      toggle:
+        icon: material/eye-outline
+        name: Switch to light mode
+
+    # Palette toggle for light mode
+    - scheme: default
+      media: "(prefers-color-scheme: light)"
+      primary: custom
+      accent: custom
+      toggle:
+        icon: material/eye
+        name: Switch to dark mode
+
+
+# The `mike` versioning provider
+# https://github.com/jimporter/mike
+#
+# This is what allows us to create versioned docs in the github cli
+extra:
+  version:
+    provider: mike
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/automl
+    - icon: fontawesome/brands/twitter
+      link: https://twitter.com/automl_org
+
+# We do have some extra custom css
+# If for whatever reason you think this is breaking something,
+# please feel free to remove it.
+extra_css:
+  - stylesheets/custom.css
+
+watch:
+  - smac
+  - docs
+  - examples
+  - CONTRIBUTING.md
+
+markdown_extensions:
+  - admonition
+  - tables
+  - attr_list
+  - md_in_html
+  - toc:
+      permalink: "#"
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.magiclink:
+      hide_protocol: true
+      repo_url_shortener: true
+      repo_url_shorthand: true
+      user: automl
+      repo: SMAC3
+  - pymdownx.highlight
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.details
+  - pymdownx.tabbed:
+      alternate_style: true
+  - pymdownx.superfences:
+      custom_fences:
+      - name: mermaid
+        class: mermaid
+        format: !!python/name:pymdownx.superfences.fence_code_format
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+
+# These are files that are run when serving the docs.
+hooks:
+  # This prevents logging messages from polluting the doc build
+  - docs/hooks/cleanup_log_output.py
+  # This prevents markdown_exec (plugin) from executing code blocks
+  # dependant on environment variables. These env variables are
+  # automatically set with the `justfile` commands to build docs
+  - docs/hooks/disable_markdown_exec.py
+  # This hook simply prints the page being rendered for an easier time debugging
+  # any issues with code in docs
+  - docs/hooks/debug_which_page_is_being_rendered.py
+
+plugins:
+  - search
+  - autorefs
+  - glightbox
+  - offline:
+      enabled: !ENV [SMAC_DOCS_OFFLINE, false]
+  - markdown-exec
+  - mike:
+      version_selector: true
+      css_dir: css
+      javascript_dir: js
+      canonical_version: latest
+  - gen-files:
+      scripts:
+        - docs/api_generator.py
+        - docs/example_runner.py
+  - literate-nav:
+        nav_file: SUMMARY.md
+  - mkdocstrings:
+      default_handler: python
+      enable_inventory: true
+      handlers:
+        python:
+          paths: [smac]
+          # Extra objects which allow for linking to external docs
+          import:
+            - 'https://docs.python.org/3/objects.inv'
+            - 'https://numpy.org/doc/stable/objects.inv'
+            - 'https://pandas.pydata.org/docs/objects.inv'
+            - 'https://optuna.readthedocs.io/en/stable/objects.inv'
+            - 'https://scikit-learn.org/stable/objects.inv'
+            - 'https://pytorch.org/docs/stable/objects.inv'
+            - 'https://jobqueue.dask.org/en/latest/objects.inv'
+          # Please do not try to change these without having
+          # looked at all of the documentation and seeing if it
+          # causes the API docs to look weird anywhere.
+          options:  # https://mkdocstrings.github.io/python/usage/
+            docstring_section_style: spacy
+            docstring_options:
+              ignore_init_summary: true
+              trim_doctest_flags: true
+              returns_multiple_items: false
+            show_docstring_attributes: true
+            show_docstring_description: true
+            show_root_heading: true
+            show_root_toc_entry: true
+            show_object_full_path: false
+            show_root_members_full_path: false
+            signature_crossrefs: true
+            merge_init_into_class: true
+            show_symbol_type_heading: true
+            show_symbol_type_toc: true
+            docstring_style: google
+            inherited_members: true
+            show_if_no_docstring: false
+            show_bases: true
+            show_source: true
+            members_order: "alphabetical"
+            group_by_category: true
+            show_signature: true
+            separate_signature: true
+            show_signature_annotations: true
+            filters:
+              - "!^_[^_]"
+
+nav:
+  - Home: "index.md"
+  - Installation: "1_installation.md"
+  - Package Overview: "2_package_overview.md"
+  - Getting Started: "3_getting_started.md"
+  - Advanced Usage:
+    - "advanced_usage/1_components.md"
+    - "advanced_usage/2_multi_fidelity.md"
+    - "advanced_usage/3_multi_objective.md"
+    - "advanced_usage/4_instances.md"
+    - "advanced_usage/5_ask_and_tell.md"
+    - "advanced_usage/6_commandline.md"
+    - "advanced_usage/7_stopping_criteria.md"
+    - "advanced_usage/8_logging.md"
+    - "advanced_usage/9_parallelism.md"
+    - "advanced_usage/10_continue.md"
+    - "advanced_usage/11_reproducibility.md"
+    - "advanced_usage/12_optimizations.md"
+  # Auto generated with docs/examples_runner.py
+  - Examples: "examples/"
+  # Auto generated with docs/api_generator.py
+  - API: "api/"
+  - Info & FAQ:
+    - "6_references.md"
+    - "7_glossary.md"
+    - "8_faq.md"
+  # - Contributing:
+  #   - "contributing/index.md"
+  #   - "contributing/contributing-a-benchmark.md"
+  #   - "contributing/contributing-an-optimizer.md"
+  # - What's New?: "changelog.md"
+
diff --git a/setup.py b/setup.py
index eb01e0ec66..6ac234efe1 100644
--- a/setup.py
+++ b/setup.py
@@ -39,6 +39,22 @@ def read_file(filepath: str) -> str:
         "flake8",
         "pre-commit",
         "pylint",
+        "mkdocs",
+        "mkdocs-material",
+        "mkdocs-autorefs",
+        "mkdocs-gen-files",
+        "mkdocs-literate-nav",
+        "mkdocs-glightbox",
+        "mkdocs-glossary-plugin",
+        "mkdocstrings[python]",
+        "markdown-exec[ansi]",
+        "mike",
+        "pillow",
+        "cairosvg",
+        "black",                # This allows mkdocstrings to format signatures in the docs
+        "pytest",
+        "pytest-coverage",
+        "pytest-cases",
     ],
 }
 
diff --git a/smac/acquisition/function/__init__.py b/smac/acquisition/function/__init__.py
index 5ac88a7213..9a0a4c52cb 100644
--- a/smac/acquisition/function/__init__.py
+++ b/smac/acquisition/function/__init__.py
@@ -6,7 +6,9 @@
 from smac.acquisition.function.integrated_acquisition_function import (
     IntegratedAcquisitionFunction,
 )
-from smac.acquisition.function.prior_acqusition_function import PriorAcquisitionFunction
+from smac.acquisition.function.prior_acquisition_function import (
+    PriorAcquisitionFunction,
+)
 from smac.acquisition.function.probability_improvement import PI
 from smac.acquisition.function.thompson import TS
 
diff --git a/smac/acquisition/function/abstract_acquisition_function.py b/smac/acquisition/function/abstract_acquisition_function.py
index 519f5b3d0f..c62170299a 100644
--- a/smac/acquisition/function/abstract_acquisition_function.py
+++ b/smac/acquisition/function/abstract_acquisition_function.py
@@ -50,7 +50,7 @@ def update(self, model: AbstractModel, **kwargs: Any) -> None:
 
         This method will be called after fitting the model, but before maximizing the acquisition
         function. As an examples, EI uses it to update the current fmin. The default implementation only updates the
-        attributes of the acqusition function which are already present.
+        attributes of the acquisition function which are already present.
 
         Calls `_update` to update the acquisition function attributes.
 
diff --git a/smac/acquisition/function/confidence_bound.py b/smac/acquisition/function/confidence_bound.py
index 13d5db3204..e5a264b033 100644
--- a/smac/acquisition/function/confidence_bound.py
+++ b/smac/acquisition/function/confidence_bound.py
@@ -18,7 +18,7 @@
 class LCB(AbstractAcquisitionFunction):
     r"""Computes the lower confidence bound for a given x over the best so far value as acquisition value.
 
-    :math:`LCB(X) = \mu(\mathbf{X}) - \sqrt(\beta_t)\sigma(\mathbf{X})` [SKKS10]_
+    :math:`LCB(X) = \mu(\mathbf{X}) - \sqrt(\beta_t)\sigma(\mathbf{X})` [[SKKS10][SKKS10]]
 
     with
 
@@ -93,7 +93,7 @@ def _compute(self, X: np.ndarray) -> np.ndarray:
         assert self._model is not None
         if self._num_data is None:
             raise ValueError(
-                "No current number of data points specified. Call `update` to inform the acqusition function."
+                "No current number of data points specified. Call `update` to inform the acquisition function."
             )
 
         if len(X.shape) == 1:
diff --git a/smac/acquisition/function/prior_acqusition_function.py b/smac/acquisition/function/prior_acquisition_function.py
similarity index 99%
rename from smac/acquisition/function/prior_acqusition_function.py
rename to smac/acquisition/function/prior_acquisition_function.py
index ca180908df..9f0a7d2a3e 100644
--- a/smac/acquisition/function/prior_acqusition_function.py
+++ b/smac/acquisition/function/prior_acquisition_function.py
@@ -28,7 +28,7 @@ class PriorAcquisitionFunction(AbstractAcquisitionFunction):
     r"""Weight the acquisition function with a user-defined prior over the optimum.
 
     See "piBO: Augmenting Acquisition Functions with User Beliefs for Bayesian Optimization" by Carl
-    Hvarfner et al. [HSSL22]_ for further details.
+    Hvarfner et al. [[HSSL22][HSSL22]] for further details.
 
     Parameters
     ----------
diff --git a/smac/acquisition/maximizer/__init__.py b/smac/acquisition/maximizer/__init__.py
index 5e3756190e..b97bd2bbbe 100644
--- a/smac/acquisition/maximizer/__init__.py
+++ b/smac/acquisition/maximizer/__init__.py
@@ -1,4 +1,4 @@
-from smac.acquisition.maximizer.abstract_acqusition_maximizer import (
+from smac.acquisition.maximizer.abstract_acquisition_maximizer import (
     AbstractAcquisitionMaximizer,
 )
 from smac.acquisition.maximizer.differential_evolution import DifferentialEvolution
diff --git a/smac/acquisition/maximizer/abstract_acqusition_maximizer.py b/smac/acquisition/maximizer/abstract_acquisition_maximizer.py
similarity index 100%
rename from smac/acquisition/maximizer/abstract_acqusition_maximizer.py
rename to smac/acquisition/maximizer/abstract_acquisition_maximizer.py
diff --git a/smac/acquisition/maximizer/local_and_random_search.py b/smac/acquisition/maximizer/local_and_random_search.py
index 71c7f86c47..a409996d3e 100644
--- a/smac/acquisition/maximizer/local_and_random_search.py
+++ b/smac/acquisition/maximizer/local_and_random_search.py
@@ -5,7 +5,7 @@
 from ConfigSpace import Configuration, ConfigurationSpace
 
 from smac.acquisition.function import AbstractAcquisitionFunction
-from smac.acquisition.maximizer.abstract_acqusition_maximizer import (
+from smac.acquisition.maximizer.abstract_acquisition_maximizer import (
     AbstractAcquisitionMaximizer,
 )
 from smac.acquisition.maximizer.local_search import LocalSearch
@@ -184,6 +184,6 @@ def _maximize(
         next_configs_by_acq_value.sort(reverse=True, key=lambda x: x[0])
         first_five = [f"{_[0]} ({_[1].origin})" for _ in next_configs_by_acq_value[:5]]
 
-        logger.debug(f"First 5 acquisition function values of selected configurations:\n{', '.join(first_five)}")
+        logger.debug(f"First 5 acquisition function values of selected configurations: \n{', '.join(first_five)}")
 
         return next_configs_by_acq_value
diff --git a/smac/acquisition/maximizer/local_search.py b/smac/acquisition/maximizer/local_search.py
index ff0d4b3733..bc26d1131f 100644
--- a/smac/acquisition/maximizer/local_search.py
+++ b/smac/acquisition/maximizer/local_search.py
@@ -10,7 +10,7 @@
 from ConfigSpace.exceptions import ForbiddenValueError
 
 from smac.acquisition.function import AbstractAcquisitionFunction
-from smac.acquisition.maximizer.abstract_acqusition_maximizer import (
+from smac.acquisition.maximizer.abstract_acquisition_maximizer import (
     AbstractAcquisitionMaximizer,
 )
 from smac.utils.configspace import (
diff --git a/smac/acquisition/maximizer/random_search.py b/smac/acquisition/maximizer/random_search.py
index c5f87fda06..35115a9e4d 100644
--- a/smac/acquisition/maximizer/random_search.py
+++ b/smac/acquisition/maximizer/random_search.py
@@ -2,7 +2,7 @@
 
 from ConfigSpace import Configuration
 
-from smac.acquisition.maximizer.abstract_acqusition_maximizer import (
+from smac.acquisition.maximizer.abstract_acquisition_maximizer import (
     AbstractAcquisitionMaximizer,
 )
 from smac.utils.logging import get_logger
diff --git a/smac/constants.py b/smac/constants.py
index 7db48ebe84..590f2d1162 100644
--- a/smac/constants.py
+++ b/smac/constants.py
@@ -1,3 +1,4 @@
+"""Constants used in SMAC, e.g. maximum number of cutoffs, very small number, etc."""
 __copyright__ = "Copyright 2022, automl.org"
 __license__ = "3-clause BSD"
 
diff --git a/smac/facade/abstract_facade.py b/smac/facade/abstract_facade.py
index 9a2031099f..fd153cdb8a 100644
--- a/smac/facade/abstract_facade.py
+++ b/smac/facade/abstract_facade.py
@@ -14,7 +14,7 @@
 from smac.acquisition.function.abstract_acquisition_function import (
     AbstractAcquisitionFunction,
 )
-from smac.acquisition.maximizer.abstract_acqusition_maximizer import (
+from smac.acquisition.maximizer.abstract_acquisition_maximizer import (
     AbstractAcquisitionMaximizer,
 )
 from smac.callback.callback import Callback
diff --git a/smac/facade/hyperband_facade.py b/smac/facade/hyperband_facade.py
index d8be7bf4df..92ab11c14c 100644
--- a/smac/facade/hyperband_facade.py
+++ b/smac/facade/hyperband_facade.py
@@ -10,7 +10,7 @@
 
 class HyperbandFacade(RandomFacade):
     """
-    Facade to use model-free Hyperband [LJDR18]_ for algorithm configuration.
+    Facade to use model-free Hyperband [[LJDR18][LJDR18]] for algorithm configuration.
 
     Uses Random Aggressive Online Racing (ROAR) to compare configurations, a random
     initial design and the Hyperband intensifier.
diff --git a/smac/facade/old/boing_facade.py b/smac/facade/old/_boing_facade.py
similarity index 100%
rename from smac/facade/old/boing_facade.py
rename to smac/facade/old/_boing_facade.py
diff --git a/smac/intensifier/abstract_intensifier.py b/smac/intensifier/abstract_intensifier.py
index b7a5ae1ca6..e46d6ccd1e 100644
--- a/smac/intensifier/abstract_intensifier.py
+++ b/smac/intensifier/abstract_intensifier.py
@@ -100,6 +100,7 @@ def meta(self) -> dict[str, Any]:
         return {
             "name": self.__class__.__name__,
             "max_incumbents": self._max_incumbents,
+            "max_config_calls": self._max_config_calls,
             "seed": self._seed,
         }
 
@@ -594,7 +595,7 @@ def update_incumbents(self, config: Configuration) -> None:
                     self._remove_rejected_config(config_id)
                     logger.info(
                         f"Added config {config_hash} and rejected config {removed_incumbent_hash} as incumbent because "
-                        f"it is not better than the incumbents on {len(config_isb_keys)} instances:"
+                        f"it is not better than the incumbents on {len(config_isb_keys)} instances: "
                     )
                     print_config_changes(rh.get_config(removed_incumbent_id), config, logger=logger)
         elif len(previous_incumbents) < len(new_incumbents):
diff --git a/smac/main/config_selector.py b/smac/main/config_selector.py
index 42a72f02bb..9e3fd8f3ca 100644
--- a/smac/main/config_selector.py
+++ b/smac/main/config_selector.py
@@ -10,7 +10,7 @@
 from smac.acquisition.function.abstract_acquisition_function import (
     AbstractAcquisitionFunction,
 )
-from smac.acquisition.maximizer.abstract_acqusition_maximizer import (
+from smac.acquisition.maximizer.abstract_acquisition_maximizer import (
     AbstractAcquisitionMaximizer,
 )
 from smac.callback.callback import Callback
diff --git a/smac/main/old/boing.py b/smac/main/old/_boing.py
similarity index 100%
rename from smac/main/old/boing.py
rename to smac/main/old/_boing.py
diff --git a/smac/main/old/turbo.py b/smac/main/old/_turbo.py
similarity index 100%
rename from smac/main/old/turbo.py
rename to smac/main/old/_turbo.py
diff --git a/tests/test_acquisition/test_functions.py b/tests/test_acquisition/test_functions.py
index 53c8b5f3a0..57de282852 100644
--- a/tests/test_acquisition/test_functions.py
+++ b/tests/test_acquisition/test_functions.py
@@ -747,14 +747,14 @@ def test_ts_NxD(model, acq_ts):
 
 
 def test_ts_rng():
-    """Test TS acqusition function with model that only has attribute 'rng'"""
+    """Test TS acquisition function with model that only has attribute 'rng'"""
     model = MockModelRNG()
     ts = TS()
     ts.model = model
 
 
 def test_ts_sampler():
-    "Test TS acqusition function with model that only has attribute 'sample_functions'"
+    "Test TS acquisition function with model that only has attribute 'sample_functions'"
     model = MockModelSampler()
     ts = TS()
     ts.model = model