diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 33a40614..00000000
--- a/.flake8
+++ /dev/null
@@ -1,9 +0,0 @@
-[flake8]
-exclude = .git,__pycache__,.vscode
-max-line-length=99
-ignore=E302,E305,W503,E203,E731,E402,E266,E712,F401,F821
-indent-size = 4
-per-file-ignores=
-    qolmat/imputations/imputers.py:F401
-    */__init__.py:F401
-    examples/test.py:F401
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index b3177c72..39ba5324 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -1,13 +1,11 @@
-name: Publish Package on PYPI
+name: Publish Package on PyPI
 
 on:
   release:
     types: [published]
 
-
 jobs:
   deploy:
-
     runs-on: ubuntu-latest
 
     steps:
@@ -16,14 +14,19 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: '3.10'
+    - name: Install Poetry
+      run: |
+        curl -sSL https://install.python-poetry.org | python3 -
+        echo "$HOME/.local/bin" >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+        poetry install
     - name: Build package
-      run: python setup.py sdist bdist_wheel
+      run: |
+        poetry build
     - name: Publish package
-      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
+      env:
+        PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        poetry config pypi-token.pypi $PYPI_TOKEN
+        poetry publish
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9081326e..d737fc7f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -3,14 +3,13 @@ name: Unit tests
 on:
   push:
     branches:
-      -dev
-      -main
+      - "**"
   pull_request:
     types: [opened, synchronize, reopened, ready_for_review]
   workflow_dispatch:
 
 jobs:
-  build-linux:
+  check:
     if: github.event.pull_request.draft == false
     runs-on: ${{matrix.os}}
     strategy:
@@ -22,24 +21,23 @@ jobs:
         shell: bash -l {0}
 
     steps:
-      - name: Git clone
+      - name: Checkout
         uses: actions/checkout@v3
-      - name: Set up venv for ci
-        uses: conda-incubator/setup-miniconda@v2
+      - name: Python
+        uses: actions/setup-python@v4
         with:
-          python-version: ${{matrix.python-version}}
-          environment-file: environment.ci.yml
-      - name: Lint with flake8
-        run: |
-          flake8
-      - name: Test with pytest
-        run: |
-          make coverage
-      - name: typing with mypy
-        run: |
-          mypy qolmat
-          echo you should uncomment mypy qolmat and delete this line
-      - name: Upload coverage reports to Codecov
+          python-version: ${{ matrix.python-version }}
+      - name: Poetry
+        uses: snok/install-poetry@v1
+        with:
+          version: 1.8.3
+      - name: Lock
+        run: poetry lock --no-update
+      - name: Install
+        run: poetry install
+      - name: Checkers
+        run: make checkers
+      - name: Codecov
         uses: codecov/codecov-action@v3
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/test_quick.yml b/.github/workflows/test_quick.yml
deleted file mode 100644
index 40f58f5a..00000000
--- a/.github/workflows/test_quick.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: Unit tests fast
-
-on:
-  push:
-    branches-ignore:
-      - dev
-      - main
-  workflow_dispatch:
-
-jobs:
-  basic-testing:
-    runs-on: ${{matrix.os}}
-    strategy:
-      matrix:
-        os: [ubuntu-latest]
-        python-version: [3.8]
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    steps:
-      - name: Git clone
-        uses: actions/checkout@v3
-
-      # See caching environments
-      # https://github.com/conda-incubator/setup-miniconda#caching-environments
-      - name: Setup Mambaforge
-        uses: conda-incubator/setup-miniconda@v2
-        with:
-            miniforge-variant: Mambaforge
-            miniforge-version: latest
-            activate-environment: env_qolmat_ci
-            use-mamba: true
-
-      - name: Get Date
-        id: get-date
-        run: echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT
-
-      - name: Cache Conda env
-        uses: actions/cache@v2
-        with:
-          path: ${{ env.CONDA }}/envs
-          key:
-            conda-${{ runner.os }}--${{ runner.arch }}--${{
-            steps.get-date.outputs.today }}-${{
-            hashFiles('environment.ci.yml') }}-${{ env.CACHE_NUMBER
-            }}
-        env:
-          # Increase this value to reset cache if environment.ci.yml has not changed
-          CACHE_NUMBER: 0
-        id: cache
-
-      - name: Update environment
-        run: mamba env update -n env_qolmat_ci -f environment.ci.yml
-        if: steps.cache.outputs.cache-hit != 'true'
-
-      - name: Lint with flake8
-        run: |
-          flake8
-      - name: Test with pytest
-        run: |
-          make coverage
-      - name: Test docstrings
-        run: make doctest
-      - name: typing with mypy
-        run: |
-          mypy qolmat
-          echo you should uncomment mypy qolmat and delete this line
diff --git a/.gitignore b/.gitignore
index e385a1ee..970a7e3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,7 @@
+*.ipynb
+poetry.lock
 # Byte-compiled / optimized / DLL files
+data/
 __pycache__/
 *.py[cod]
 *$py.class
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 62948350..68c1acf3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,20 +8,8 @@ repos:
         exclude: (docs/)
       - id: trailing-whitespace
         exclude: (docs/)
-  - repo: https://github.com/psf/black
-    rev: 22.8.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.3.3
     hooks:
-      - id: black
-        args:
-          - "-l 99"
-  # Flake8
-  - repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
-    hooks:
-      - id: flake8
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.1.1
-    hooks:
-      - id: mypy
-        args: [--ignore-missing-imports]
-        additional_dependencies: [types-requests]
+      - id: ruff
+      - id: ruff-format
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 84b6f589..d5f1e3bc 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -29,14 +29,11 @@ You can create a virtual environment via `conda`:
 
 .. code:: sh
 
-    $ conda env create -f environment.dev.yml
-    $ conda activate env_qolmat_dev
-
-If you need to use pytorch, enter the command:
-
-.. code:: sh
-
-    $ pip install -e .[pytorch]
+    $ pip install poetry
+    $ poetry config virtualenvs.in-project true
+    $ poetry lock
+    $ poetry install
+    $ poetry shell
 
 Once the environment is installed, pre-commit is installed, but need to be activated using the following command:
 
@@ -78,7 +75,7 @@ These tests absolutely have to pass.
 
 .. code:: sh
 
-    $ mypy qolmat
+    $ make check-types
 
 Unit test
 ^^^^^^^^^
@@ -88,4 +85,4 @@ The coverage should on new features must be above 95%.
 
 .. code:: sh
 
-    $ pytest -vs --cov-branch --cov=qolmat --pyargs tests --cov-report term-missing
+    $ make check-coverage
diff --git a/Makefile b/Makefile
index c08e0d40..e0ca5828 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,34 @@
-coverage:
-	pytest --cov-branch --cov=qolmat --cov-report=xml tests
 
-doctest:
-	pytest --doctest-modules --pyargs qolmat
+check-coverage:
+	poetry run pytest --cov-branch --cov=qolmat/ --cov-report=xml tests/
 
-doc:
-	make html -C docs
+check-poetry:
+	poetry check --lock
+
+check-quality:
+	poetry run ruff check qolmat/ tests/
+
+check-security:
+	poetry run bandit --recursive --configfile=pyproject.toml qolmat/
+
+check-tests:
+	poetry run pytest tests/
+
+check-types:
+	poetry run mypy qolmat/ tests/
+
+checkers: check-coverage check-types
 
 clean:
 	rm -rf .mypy_cache .pytest_cache .coverage*
 	rm -rf **__pycache__
 	make clean -C docs
+
+coverage:
+	poetry run pytest --cov-branch --cov=qolmat --cov-report=xml tests
+
+doc:
+	make html -C docs
+
+doctest:
+	poetry run pytest --doctest-modules --pyargs qolmat
diff --git a/docs/conf.py b/docs/conf.py
index 2429e591..6e080268 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,7 +12,7 @@
 
 import os
 import sys
-import sphinx_gallery
+
 import sphinx_rtd_theme
 
 # If extensions (or modules to document with autodoc) are in another directory,
@@ -53,7 +53,6 @@
 # see https://github.com/numpy/numpydoc/issues/69
 numpydoc_show_class_members = False
 
-from distutils.version import LooseVersion
 
 # pngmath / imgmath compatibility layer for different sphinx versions
 # import sphinx
diff --git a/environment.ci.yml b/environment.ci.yml
deleted file mode 100644
index 86949837..00000000
--- a/environment.ci.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: env_qolmat_ci
-channels:
-    - defaults
-    - conda-forge
-dependencies:
-    - codecov
-    - flake8
-    - matplotlib
-    - mypy
-    - numpy
-    - numpydoc
-    - pytest
-    - pytest-cov
-    - pytest-mock
-    - pip
-    - pip:
-        - torch
-        - -e .
diff --git a/environment.dev.yml b/environment.dev.yml
deleted file mode 100644
index e2dfbed9..00000000
--- a/environment.dev.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-name: env_qolmat_dev
-channels:
-    - conda-forge
-    - defaults
-dependencies:
-    - bump2version=1.0.1
-    - dcor=0.6
-    - ipykernel=6.21.0
-    - jupyter=1.0.0
-    - jupyterlab=1.2.6
-    - jupytext=1.14.4
-    - hyperopt=0.2.7
-    - numpy=1.24.4
-    - packaging=23.1
-    - pandas=2.0.1
-    - python=3.8
-    - pip=23.0.1
-    - scipy=1.10.1
-    - scikit-learn=1.3.2
-    - sphinx=4.3.2
-    - sphinx-gallery=0.10.1
-    - sphinx_rtd_theme=1.0.0
-    - statsmodels=0.14.0
-    - twine=3.7.1
-    - wheel=0.37.1
-    - pip:
-          - flake8==6.0.0
-          - jupytext==1.14.4
-          - matplotlib==3.6.2
-          - mypy==1.1.1
-          - numpydoc==1.5.0
-          - pre-commit==2.21.0
-          - pytest==7.2.0
-          - pytest-cov==4.0.0
-          - pytest-mock==3.10.0
-          - sphinx_markdown_tables==0.0.17
-          - -e .
diff --git a/environment.doc.yml b/environment.doc.yml
deleted file mode 100644
index 30458c93..00000000
--- a/environment.doc.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: env_qolmat_doc
-channels:
-    - conda-forge
-    - defaults
-dependencies:
-    - numpydoc=1.1.0
-    - python=3.8
-    - sphinx=4.3.2
-    - sphinx-gallery=0.10.1
-    - sphinx_rtd_theme=1.0.0
-    - typing_extensions=4.0.1
-    - pip
-    - pip:
-      - sphinx-markdown-tables==0.0.17
diff --git a/examples/RPCA.md b/examples/RPCA.md
index 05f8b755..0a4fbe8e 100644
--- a/examples/RPCA.md
+++ b/examples/RPCA.md
@@ -34,6 +34,10 @@ from qolmat.imputations.rpca import rpca_utils
 from qolmat.utils.data import generate_artificial_ts
 ```
 
+```python
+from qolmat.imputations.imputers import ImputerRpcaNoisy, ImputerRpcaPcp
+```
+
 **Generate synthetic data**
 
 ```python tags=[]
@@ -46,16 +50,33 @@ amp_noise = 0.1
 X_true, A_true, E_true = generate_artificial_ts(n_samples, periods, amp_anomalies, ratio_anomalies, amp_noise)
 
 signal = X_true + A_true + E_true
+signal = 10 + signal * 40
 
 # Adding missing data
 signal[120:180] = np.nan
 signal[:20] = np.nan
+for i in range(10):
+    signal[i::365] = np.nan
 # signal[80:220] = np.nan
 # mask = np.random.choice(len(signal), round(len(signal) / 20))
 # signal[mask] = np.nan
 
 ```
 
+```python
+import pandas as pd
+df = pd.DataFrame({"signal": signal})
+irn = ImputerRpcaPcp(period=100)
+df_imp = irn.fit_transform(df)
+```
+
+```python
+plt.plot(df_imp["signal"])
+plt.plot(df["signal"])
+
+plt.xlim(0, 200)
+```
+
 ```python tags=[]
 fig = plt.figure(figsize=(15, 8))
 ax = fig.add_subplot(4, 1, 1)
diff --git a/examples/tutorials/plot_tuto_benchmark_TS.py b/examples/tutorials/plot_tuto_benchmark_TS.py
index f205d08a..1fdbbddb 100644
--- a/examples/tutorials/plot_tuto_benchmark_TS.py
+++ b/examples/tutorials/plot_tuto_benchmark_TS.py
@@ -1,5 +1,4 @@
-"""
-=========================
+"""=========================
 Benchmark for time series
 =========================
 
@@ -14,18 +13,18 @@
 # First import some libraries
 
 import numpy as np
-import pandas as pd
 
 np.random.seed(1234)
-from matplotlib import pyplot as plt
 import matplotlib.ticker as plticker
+from matplotlib import pyplot as plt
 
 tab10 = plt.get_cmap("tab10")
 
+from sklearn.linear_model import LinearRegression
+
 from qolmat.benchmark import comparator, missing_patterns
 from qolmat.imputations import imputers
 from qolmat.utils import data, plot
-from sklearn.linear_model import LinearRegression
 
 # %%
 # 1. Data
diff --git a/examples/tutorials/plot_tuto_categorical.py b/examples/tutorials/plot_tuto_categorical.py
index b6e993fb..f0491ac8 100644
--- a/examples/tutorials/plot_tuto_categorical.py
+++ b/examples/tutorials/plot_tuto_categorical.py
@@ -1,5 +1,4 @@
-"""
-==============================
+"""==============================
 Benchmark for categorical data
 ==============================
 
@@ -8,14 +7,13 @@
 It comprehends passengers features as well as if they survived the accident.
 """
 
-from qolmat.imputations import preprocessing, imputers
+from sklearn.pipeline import Pipeline
+
+from qolmat.benchmark import comparator, missing_patterns
+from qolmat.imputations import imputers, preprocessing
 from qolmat.imputations.imputers import ImputerRegressor
-from qolmat.benchmark import missing_patterns
-from qolmat.benchmark import comparator
 from qolmat.utils import data
 
-from sklearn.pipeline import Pipeline
-
 # %%
 # 1. Titanic dataset
 # ---------------------------------------------------------------
diff --git a/examples/tutorials/plot_tuto_diffusion_models.py b/examples/tutorials/plot_tuto_diffusion_models.py
index 317128db..0ff0d80a 100644
--- a/examples/tutorials/plot_tuto_diffusion_models.py
+++ b/examples/tutorials/plot_tuto_diffusion_models.py
@@ -1,5 +1,4 @@
-"""
-===============================================
+"""===============================================
 Tutorial for imputers based on diffusion models
 ===============================================
 
@@ -7,15 +6,14 @@
 and :class:`~qolmat.imputations.diffusions.ddpms.TsDDPM` classes.
 """
 
-import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 
-from qolmat.utils import data
 from qolmat.benchmark import comparator, missing_patterns
-
-from qolmat.imputations.imputers_pytorch import ImputerDiffusion
 from qolmat.imputations.diffusions.ddpms import TabDDPM, TsDDPM
+from qolmat.imputations.imputers_pytorch import ImputerDiffusion
+from qolmat.utils import data
 
 # %%
 # 1. Time-series data
diff --git a/examples/tutorials/plot_tuto_hole_generator.py b/examples/tutorials/plot_tuto_hole_generator.py
index 07594591..07ea6348 100644
--- a/examples/tutorials/plot_tuto_hole_generator.py
+++ b/examples/tutorials/plot_tuto_hole_generator.py
@@ -1,5 +1,4 @@
-"""
-============================================
+"""============================================
 Tutorial for hole generation in tabular data
 ============================================
 
@@ -17,13 +16,10 @@
 """
 from typing import List
 
-from io import BytesIO
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import requests
-import zipfile
 
 from qolmat.benchmark import missing_patterns
 from qolmat.utils import data
@@ -90,6 +86,7 @@ def visualise_missing_values(df_init: pd.DataFrame, df_mask: pd.DataFrame):
         initial dataframe
     df_mask : pd.DataFrame
         masked dataframe
+
     """
     df_tot = df_init.copy()
     df_tot[df_init.notna()] = 0
@@ -117,6 +114,7 @@ def get_holes_sizes_column_wise(data: np.ndarray) -> List[List[int]]:
     -------
     List[List[int]]
         List of hole size for each column.
+
     """
     hole_sizes = []
     for col in range(data.shape[1]):
@@ -153,6 +151,7 @@ def plot_cdf(
         list of labels
     colors : List[str]
         list of colors
+
     """
     _, axs = plt.subplots(1, df.shape[1], sharey=True, figsize=(15, 3))
 
diff --git a/examples/tutorials/plot_tuto_mcar.py b/examples/tutorials/plot_tuto_mcar.py
index c43d1217..a9bddb7f 100644
--- a/examples/tutorials/plot_tuto_mcar.py
+++ b/examples/tutorials/plot_tuto_mcar.py
@@ -1,5 +1,4 @@
-"""
-============================================
+"""============================================
 Tutorial for Testing the MCAR Case
 ============================================
 
@@ -8,10 +7,9 @@
 
 # %%
 # First import some libraries
-from matplotlib import pyplot as plt
-
 import numpy as np
 import pandas as pd
+from matplotlib import pyplot as plt
 from scipy.stats import norm
 
 from qolmat.analysis.holes_characterization import LittleTest
diff --git a/examples/tutorials/plot_tuto_mean_median.py b/examples/tutorials/plot_tuto_mean_median.py
index 403b4407..33c36db2 100644
--- a/examples/tutorials/plot_tuto_mean_median.py
+++ b/examples/tutorials/plot_tuto_mean_median.py
@@ -1,5 +1,4 @@
-"""
-========================================================================================
+"""========================================================================================
 Comparison of basic imputers
 ========================================================================================
 
@@ -21,7 +20,6 @@
 from qolmat.imputations import imputers
 from qolmat.utils import data, plot
 
-
 # %%
 # 1. Data
 # ---------------------------------------------------------------
@@ -29,11 +27,14 @@
 # Originally, the first 81 columns contain extracted features and
 # the 82nd column contains the critical temperature which is used as the
 # target variable.
-# The data does not contain missing values; so for the purpose of this notebook,
+# The data does not contain missing values;
+# so for the purpose of this notebook,
 # we corrupt the data, with the :func:`qolmat.utils.data.add_holes` function.
 # In this way, each column has missing values.
 
-df = data.add_holes(data.get_data("Superconductor"), ratio_masked=0.2, mean_size=120)
+df = data.add_holes(
+    data.get_data("Superconductor"), ratio_masked=0.2, mean_size=120
+)
 
 # %%
 # The dataset contains 82 columns. For simplicity,
@@ -55,7 +56,9 @@
 # a missing (resp. observed) value.
 
 plt.figure(figsize=(15, 4))
-plt.imshow(df.notna().values.T, aspect="auto", cmap="binary", interpolation="none")
+plt.imshow(
+    df.notna().values.T, aspect="auto", cmap="binary", interpolation="none"
+)
 plt.yticks(range(len(df.columns)), df.columns)
 plt.xlabel("Samples", fontsize=12)
 plt.grid(False)
@@ -102,7 +105,9 @@
 custom_cmap = matplotlib.colors.ListedColormap(colorsList)
 
 plt.figure(figsize=(15, 4))
-plt.imshow(df_tot.values.T, aspect="auto", cmap=custom_cmap, interpolation="none")
+plt.imshow(
+    df_tot.values.T, aspect="auto", cmap=custom_cmap, interpolation="none"
+)
 plt.yticks(range(len(df_tot.columns)), df_tot.columns)
 plt.xlabel("Samples", fontsize=12)
 plt.grid(False)
@@ -147,7 +152,9 @@
 # are relatively poor. Other imputation methods are therefore
 # necessary (see folder `imputations`).
 
-dfs_imputed = {name: imp.fit_transform(df) for name, imp in dict_imputers.items()}
+dfs_imputed = {
+    name: imp.fit_transform(df) for name, imp in dict_imputers.items()
+}
 
 for col in cols_to_impute:
     fig, ax = plt.subplots(figsize=(10, 3))
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..a0f87501
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,126 @@
+# PACKAGE
+
+[tool.poetry]
+name = "qolmat"
+version = "0.1.8"
+description = "A Python library for optimal data imputation."
+authors = [
+    "Julien ROUSSEL <julien.roussel@capgemini.com>",
+    "Anh Khoa NGO HO <anh-khoa.ngo-ho@capgemini.com>",
+    "Hong-Lan BOTTERMAN <hong-lan.botterman@capgemini.com>",
+    "Guillaume SAËS <guillaume.saes@capgemini.com>",
+]
+license = "BSD-3-Clause"
+readme = "README.rst"
+homepage = "https://github.com/Quantmetry/qolmat"
+repository = "https://github.com/Quantmetry/qolmat"
+documentation = "https://qolmat.readthedocs.io/en/latest/"
+keywords = ["imputation"]
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved",
+    "Topic :: Software Development",
+    "Topic :: Scientific/Engineering",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX",
+    "Operating System :: Unix",
+    "Operating System :: MacOS",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+
+# DEPENDENCIES
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<3.12"
+bump2version = "1.0.1"
+dcor = "0.6"
+jupyter = "1.0.0"
+jupyterlab = "1.2.6"
+jupytext = "1.14.4"
+hyperopt = "0.2.7"
+numpy = "1.24.4"
+packaging = "23.1"
+pandas = "2.0.1"
+scipy = "1.10.1"
+scikit-learn = "1.3.2"
+sphinx-markdown-tables = { version = "*", optional = true }
+statsmodels = "0.14.0"
+typed-ast = { version = "*", optional = true }
+twine = "3.7.1"
+wheel = "0.37.1"
+category-encoders = "^2.6.3"
+ipykernel = "^6.29.5"
+torch = "^2.4.0"
+
+[tool.poetry.dev-dependencies]
+matplotlib = "3.6.2"
+pre-commit = "2.21.0"
+
+[tool.poetry.group.checkers.dependencies]
+bandit = "^1.7.9"
+mypy = "1.1.1"
+ruff = "^0.6.3"
+pytest = "7.2.0"
+pytest-cov = "4.0.0"
+pytest-mock = "3.10.0"
+
+[tool.poetry.group.ci.dependencies]
+codecov = "^2.1.13"
+
+[tool.poetry.group.docs.dependencies]
+numpydoc = "1.1.0"
+sphinx = "4.3.2"
+sphinx-gallery = "0.10.1"
+sphinx_rtd_theme = "1.0.0"
+
+[tool.poetry.extras]
+tests = ["typed-ast"]
+docs = ["sphinx-markdown-tables"]
+
+[tool.poetry.urls]
+"Bug Tracker" = "https://github.com/Quantmetry/qolmat"
+"Source Code" = "https://github.com/Quantmetry/qolmat"
+
+[[tool.poetry.source]]
+name = "pytorch_cpu"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "explicit"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+
+# CONFIGURATION
+[tool.bandit]
+targets = ["qolmat"]
+
+[tool.mypy]
+pretty = true
+strict = false
+python_version = ">=3.8.1,<3.12"
+ignore_missing_imports = true
+
+[tool.ruff]
+line-length = 79
+fix = true
+indent-width = 4
+target-version = "py310"
+exclude = ["examples/", "docs/"]
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
+select = ["C", "D", "E", "F", "I", "Q", "W"]
+ignore = ["C901", "D107"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["qolmat"]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**/*.py" = ["D100", "D103"]
+"__init__.py" = ["D104"]
diff --git a/qolmat/analysis/holes_characterization.py b/qolmat/analysis/holes_characterization.py
index 5669ac7b..aa511052 100644
--- a/qolmat/analysis/holes_characterization.py
+++ b/qolmat/analysis/holes_characterization.py
@@ -1,3 +1,5 @@
+"""Script for characterising the holes."""
+
 from abc import ABC, abstractmethod
 from typing import Optional, Union
 
@@ -9,34 +11,37 @@
 
 
 class McarTest(ABC):
-    """
-    Astract class for MCAR tests.
-    """
+    """Astract class for MCAR tests."""
 
     @abstractmethod
     def test(self, df: pd.DataFrame) -> float:
+        """Test function."""
         pass
 
 
 class LittleTest(McarTest):
-    """
-    This class implements the Little's test, which is designed to detect the heterogeneity accross
-    the missing patterns. The null hypothesis is "The missing data mechanism is MCAR". The
-    shortcoming of this test is that it won't detect the heterogeneity of covariance.
+    """Little Test class.
+
+    This class implements the Little's test, which is designed to detect the
+    heterogeneity accross the missing patterns. The null hypothesis is
+    "The missing data mechanism is MCAR". The shortcoming of this test is
+    that it won't detect the heterogeneity of covariance.
 
     References
     ----------
-    Little. "A Test of Missing Completely at Random for Multivariate Data with Missing Values."
-    Journal of the American Statistical Association, Volume 83, 1988 - Issue 404
+    Little. "A Test of Missing Completely at Random for Multivariate Data with
+    Missing Values." Journal of the American Statistical Association,
+    Volume 83, 1988 - Issue 404
 
     Parameters
     ----------
     imputer : Optional[ImputerEM]
-        Imputer based on the EM algorithm. The 'model' attribute must be equal to 'multinormal'.
-        If None, the default ImputerEM is taken.
+        Imputer based on the EM algorithm. The 'model' attribute must be
+        equal to 'multinormal'. If None, the default ImputerEM is taken.
     random_state : int, RandomState instance or None, default=None
         Controls the randomness.
         Pass an int for reproducible output across multiple function calls.
+
     """
 
     def __init__(
@@ -47,15 +52,14 @@ def __init__(
         super().__init__()
         if imputer and imputer.model != "multinormal":
             raise AttributeError(
-                "The ImputerEM model must be 'multinormal' to use the Little's test"
+                "The ImputerEM model must be 'multinormal' "
+                "to use the Little's test"
             )
         self.imputer = imputer
         self.random_state = random_state
 
     def test(self, df: pd.DataFrame) -> float:
-        """
-        Apply the Little's test over a real dataframe.
-
+        """Apply the Little's test over a real dataframe.
 
         Parameters
         ----------
@@ -66,6 +70,7 @@ def test(self, df: pd.DataFrame) -> float:
         -------
         float
             The p-value of the test.
+
         """
         imputer = self.imputer or ImputerEM(random_state=self.random_state)
         imputer = imputer._fit_element(df)
@@ -79,16 +84,22 @@ def test(self, df: pd.DataFrame) -> float:
         # Iterate over the patterns
 
         df_nan = df.notna()
-        for tup_pattern, df_nan_pattern in df_nan.groupby(df_nan.columns.tolist()):
+        for tup_pattern, df_nan_pattern in df_nan.groupby(
+            df_nan.columns.tolist()
+        ):
             n_rows_pattern, _ = df_nan_pattern.shape
             ind_pattern = df_nan_pattern.index
             df_pattern = df.loc[ind_pattern, list(tup_pattern)]
             obs_mean = df_pattern.mean().to_numpy()
 
             diff_means = obs_mean - ml_means[list(tup_pattern)]
-            inv_sigma_pattern = np.linalg.inv(ml_cov[:, tup_pattern][tup_pattern, :])
+            inv_sigma_pattern = np.linalg.inv(
+                ml_cov[:, tup_pattern][tup_pattern, :]
+            )
 
-            d0 += n_rows_pattern * np.dot(np.dot(diff_means, inv_sigma_pattern), diff_means.T)
+            d0 += n_rows_pattern * np.dot(
+                np.dot(diff_means, inv_sigma_pattern), diff_means.T
+            )
             degree_f += tup_pattern.count(True)
 
         return 1 - float(chi2.cdf(d0, degree_f))
diff --git a/qolmat/benchmark/comparator.py b/qolmat/benchmark/comparator.py
index 5a60c6f5..4fed2e9e 100644
--- a/qolmat/benchmark/comparator.py
+++ b/qolmat/benchmark/comparator.py
@@ -1,3 +1,5 @@
+"""Script for comparator."""
+
 from typing import Any, Dict, List, Optional
 
 import numpy as np
@@ -8,23 +10,28 @@
 
 
 class Comparator:
-    """
-    This class implements a comparator for evaluating different imputation methods.
+    """Comparator class.
+
+    This class implements a comparator for evaluating different
+    imputation methods.
 
     Parameters
     ----------
     dict_models: Dict[str, any]
         dictionary of imputation methods
     selected_columns: List[str]Œ
-        list of column's names selected (all with at least one null value will be imputed)
+        list of column's names selected (all with at least one null value will
+        be imputed)
     columnwise_evaluation : Optional[bool], optional
-        whether the metric should be calculated column-wise or not, by default False
-    dict_config_opti: Optional[Dict[str, Dict[str, Union[str, float, int]]]] = {}
-        dictionary of search space for each implementation method. By default, the value is set to
-        {}.
+        whether the metric should be calculated column-wise or not,
+        by default False
+    dict_config_opti: Optional[Dict[str, Dict[str, Union[str, float, int]]]]
+        dictionary of search space for each implementation method.
+        By default, the value is set to {}.
     max_evals: int = 10
         number of calls of the optimization algorithm
         10.
+
     """
 
     def __init__(
@@ -53,24 +60,29 @@ def get_errors(
         df_imputed: pd.DataFrame,
         df_mask: pd.DataFrame,
     ) -> pd.DataFrame:
-        """Functions evaluating the reconstruction's quality
+        """Get errors - estimate the reconstruction's quality.
 
         Parameters
         ----------
-        signal_ref : pd.DataFrame
+        df_origin : pd.DataFrame
             reference/orginal signal
-        signal_imputed : pd.DataFrame
+        df_imputed : pd.DataFrame
             imputed signal
+        df_mask : pd.DataFrame
+            masked dataframe (NA)
 
         Returns
         -------
         pd.DataFrame
             DataFrame of results obtained via different metrics
+
         """
         dict_errors = {}
         for name_metric in self.metrics:
             fun_metric = metrics.get_metric(name_metric)
-            dict_errors[name_metric] = fun_metric(df_origin, df_imputed, df_mask)
+            dict_errors[name_metric] = fun_metric(
+                df_origin, df_imputed, df_mask
+            )
         df_errors = pd.concat(dict_errors.values(), keys=dict_errors.keys())
         return df_errors
 
@@ -81,23 +93,25 @@ def evaluate_errors_sample(
         dict_config_opti_imputer: Dict[str, Any] = {},
         metric_optim: str = "mse",
     ) -> pd.Series:
-        """Evaluate the errors in the cross-validation
+        """Evaluate the errors in the cross-validation.
 
         Parameters
         ----------
-        tested_model : any
+        imputer : Any
             imputation model
         df : pd.DataFrame
             dataframe to impute
         dict_config_opti_imputer : Dict
             search space for tested_model's hyperparameters
         metric_optim : str
-            Loss function used when imputers undergo hyperparameter optimization
+            Loss function used when imputers undergo hyperparameter
+            optimization
 
         Returns
         -------
         pd.Series
             Series with the errors for each metric and each variable
+
         """
         list_errors = []
         df_origin = df[self.selected_columns].copy()
@@ -117,9 +131,12 @@ def evaluate_errors_sample(
             subset = self.generator_holes.subset
             if subset is None:
                 raise ValueError(
-                    "HoleGenerator `subset` should be overwritten in split but it is none!"
+                    "HoleGenerator `subset` should be overwritten in split "
+                    "but it is none!"
                 )
-            df_errors = self.get_errors(df_origin[subset], df_imputed[subset], df_mask[subset])
+            df_errors = self.get_errors(
+                df_origin[subset], df_imputed[subset], df_mask[subset]
+            )
             list_errors.append(df_errors)
         df_errors = pd.DataFrame(list_errors)
         errors_mean = df_errors.mean(axis=0)
@@ -130,20 +147,20 @@ def compare(
         self,
         df: pd.DataFrame,
     ):
-        """Function to compare different imputation methods on dataframe df
+        """Compure different imputation methods on dataframe df.
 
         Parameters
         ----------
         df : pd.DataFrame
-        verbose : bool, optional
-            _description_, by default True
+            input dataframe (for comparison)
+
         Returns
         -------
         pd.DataFrame
-            Dataframe with the metrics results, imputers are in columns and indices represent
-            metrics and variables.
-        """
+            Dataframe with the metrics results, imputers are in columns
+            and indices represent metrics and variables.
 
+        """
         dict_errors = {}
 
         for name, imputer in self.dict_imputers.items():
@@ -156,7 +173,10 @@ def compare(
                 )
                 print("done.")
             except Exception as excp:
-                print(f"Error while testing {name} of type {type(imputer).__name__}!")
+                print(
+                    f"Error while testing {name} of type "
+                    f"{type(imputer).__name__}!"
+                )
                 raise excp
 
         df_errors = pd.DataFrame(dict_errors)
diff --git a/qolmat/benchmark/hyperparameters.py b/qolmat/benchmark/hyperparameters.py
index eaf6efc4..7aa4a24b 100644
--- a/qolmat/benchmark/hyperparameters.py
+++ b/qolmat/benchmark/hyperparameters.py
@@ -1,15 +1,15 @@
-import copy
-from typing import Any, Callable, Dict, List, Union
+"""Script for hyperparameter optimisation."""
 
-import numpy as np
-import pandas as pd
+import copy
+from typing import Callable, Dict, List
 
 # import skopt
 # from skopt.space import Categorical, Dimension, Integer, Real
 import hyperopt as ho
-from hyperopt.pyll.base import Apply as hoApply
-from qolmat.benchmark import metrics
+import numpy as np
+import pandas as pd
 
+from qolmat.benchmark import metrics
 from qolmat.benchmark.missing_patterns import _HoleGenerator
 from qolmat.imputations.imputers import _Imputer
 from qolmat.utils.utils import HyperValue
@@ -22,18 +22,21 @@ def get_objective(
     metric: str,
     names_hyperparams: List[str],
 ) -> Callable:
-    """
-    Define the objective function, which is the average metric computed over the folds provided by
+    """Define the objective function.
+
+    This is the average metric computed over the folds provided by
     the hole generator, using a cross-validation.
 
     Parameters
     ----------
     imputer: _Imputer
-        Imputer that should be optimized, it should at least have a fit_transform method and an
-        imputer_params attribute
+        Imputer that should be optimized, it should at least have a
+        fit_transform method and an imputer_params attribute
+    df : pd.DataFrame
+        input dataframe
     generator: _HoleGenerator
-        Generator creating the masked values in the nested cross validation allowing to measure the
-         imputer performance
+        Generator creating the masked values in the nested cross validation
+        allowing to measure the imputer performance
     metric: str
         Metric used as perfomance indicator, common values are `mse` and `mae`
     names_hyperparams: List[str]
@@ -43,6 +46,7 @@ def get_objective(
     -------
     Callable[List[HyperValue], float]
         Objective function
+
     """
 
     def fun_obf(args: List[HyperValue]) -> float:
@@ -58,7 +62,9 @@ def fun_obf(args: List[HyperValue]) -> float:
             df_imputed = imputer.fit_transform(df_corrupted)
             subset = generator.subset
             fun_metric = metrics.get_metric(metric)
-            errors = fun_metric(df_origin[subset], df_imputed[subset], df_mask[subset])
+            errors = fun_metric(
+                df_origin[subset], df_imputed[subset], df_mask[subset]
+            )
             list_errors.append(errors)
 
         mean_errors = np.mean(errors)
@@ -76,44 +82,55 @@ def optimize(
     max_evals: int = 100,
     verbose: bool = False,
 ):
-    """Return the provided imputer with hyperparameters optimized in the provided range in order to
-     minimize the provided metric.
+    """Optimisation function.
+
+    Return the provided imputer with hyperparameters optimized in the provided
+    range in order to minimize the provided metric.
 
     Parameters
     ----------
     imputer: _Imputer
-        Imputer that should be optimized, it should at least have a fit_transform method and an
-        imputer_params attribute
+        Imputer that should be optimized, it should at least have a
+        fit_transform method and an imputer_params attribute
+    df : pd.DataFrame
+        input dataframe
     generator: _HoleGenerator
-        Generator creating the masked values in the nested cross validation allowing to measure the
-         imputer performance
+        Generator creating the masked values in the nested cross validation
+        allowing to measure the imputer performance
     metric: str
         Metric used as perfomance indicator, common values are `mse` and `mae`
     dict_config: Dict[str, HyperValue]
         Search space for the tested hyperparameters
     max_evals: int
-        Maximum number of evaluation of the performance of the algorithm. Each estimation involves
-        one call to fit_transform per fold returned by the generator. See the n_fold attribute.
+        Maximum number of evaluation of the performance of the algorithm.
+        Each estimation involves one call to fit_transform per fold returned
+        by the generator. See the n_fold attribute.
     verbose: bool
-        Verbosity switch, usefull for imputers that can have unstable behavior for some
-        hyperparameters values
+        Verbosity switch, usefull for imputers that can have unstable
+        behavior for some hyperparameters values
 
     Returns
     -------
     _Imputer
         Optimized imputer
+
     """
     imputer = copy.deepcopy(imputer)
     if dict_config == {}:
         return imputer
     names_hyperparams = list(dict_config.keys())
     values_hyperparams = list(dict_config.values())
-    imputer.imputer_params = tuple(set(imputer.imputer_params) | set(dict_config.keys()))
+    imputer.imputer_params = tuple(
+        set(imputer.imputer_params) | set(dict_config.keys())
+    )
     if verbose and hasattr(imputer, "verbose"):
         setattr(imputer, "verbose", False)
     fun_obj = get_objective(imputer, df, generator, metric, names_hyperparams)
     hyperparams = ho.fmin(
-        fn=fun_obj, space=values_hyperparams, algo=ho.tpe.suggest, max_evals=max_evals
+        fn=fun_obj,
+        space=values_hyperparams,
+        algo=ho.tpe.suggest,
+        max_evals=max_evals,
     )
 
     for key, value in hyperparams.items():
diff --git a/qolmat/benchmark/metrics.py b/qolmat/benchmark/metrics.py
index f8f87441..b8af8667 100644
--- a/qolmat/benchmark/metrics.py
+++ b/qolmat/benchmark/metrics.py
@@ -1,15 +1,17 @@
+"""Script for metrics."""
+
 from functools import partial
 from typing import Callable, Dict, List
 
+import dcor
 import numpy as np
 import pandas as pd
 import scipy
+from numpy.linalg import LinAlgError
 from sklearn import metrics as skm
-import dcor
 
 from qolmat.utils import algebra, utils
 from qolmat.utils.exceptions import NotEnoughSamples
-from numpy.linalg import LinAlgError
 
 EPS = np.finfo(float).eps
 
@@ -26,7 +28,9 @@ def columnwise_metric(
     type_cols: str = "all",
     **kwargs,
 ) -> pd.Series:
-    """For each column, compute a metric score based on the true dataframe
+    """Compute column-wise metrics.
+
+    For each column, compute a metric score based on the true dataframe
     and the predicted dataframe
 
     Parameters
@@ -44,17 +48,21 @@ def columnwise_metric(
         - `all` to apply the metric to all columns
         - `numerical` to apply the metric to numerical columns only
         - `categorical` to apply the metric to categorical columns only
+    **kwargs: dict
+        additional arguments
 
     Returns
     -------
     pd.Series
         Series of scores for all columns
+
     """
     try:
         pd.testing.assert_index_equal(df1.columns, df2.columns)
     except AssertionError:
         raise ValueError(
-            f"Input dataframes do not have the same columns! ({df1.columns} != {df2.columns})"
+            "Input dataframes do not have the same columns! "
+            f"({df1.columns} != {df2.columns})"
         )
     if type_cols == "all":
         cols = df1.columns
@@ -63,19 +71,23 @@ def columnwise_metric(
     elif type_cols == "categorical":
         cols = utils._get_categorical_features(df1)
     else:
-        raise ValueError(f"Value {type_cols} is not valid for parameter `type_cols`!")
+        raise ValueError(
+            f"Value {type_cols} is not valid for parameter `type_cols`!"
+        )
     values = {}
     for col in cols:
         df1_col = df1.loc[df_mask[col], col]
         df2_col = df2.loc[df_mask[col], col]
-        assert df1_col.notna().all()
-        assert df2_col.notna().all()
+        if df1_col.isna().any() or df2_col.isna().any():
+            raise ValueError(f"Column {col} contains NaN.")
         values[col] = metric(df1_col, df2_col, **kwargs)
 
     return pd.Series(values)
 
 
-def mean_squared_error(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> pd.Series:
+def mean_squared_error(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> pd.Series:
     """Mean squared error between two dataframes.
 
     Parameters
@@ -90,14 +102,17 @@ def mean_squared_error(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFra
     Returns
     -------
     pd.Series
+
     """
-    return columnwise_metric(df1, df2, df_mask, skm.mean_squared_error, type_cols="numerical")
+    return columnwise_metric(
+        df1, df2, df_mask, skm.mean_squared_error, type_cols="numerical"
+    )
 
 
 def root_mean_squared_error(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> pd.Series:
-    """Root mean squared error between two dataframes.
+    """Compute the root mean squared error between two dataframes.
 
     Parameters
     ----------
@@ -111,14 +126,22 @@ def root_mean_squared_error(
     Returns
     -------
     pd.Series
+
     """
     return columnwise_metric(
-        df1, df2, df_mask, skm.mean_squared_error, type_cols="numerical", squared=False
+        df1,
+        df2,
+        df_mask,
+        skm.mean_squared_error,
+        type_cols="numerical",
+        squared=False,
     )
 
 
-def mean_absolute_error(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> pd.Series:
-    """Mean absolute error between two dataframes.
+def mean_absolute_error(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> pd.Series:
+    """Compute the mean absolute error between two dataframes.
 
     Parameters
     ----------
@@ -132,14 +155,17 @@ def mean_absolute_error(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFr
     Returns
     -------
     pd.Series
+
     """
-    return columnwise_metric(df1, df2, df_mask, skm.mean_absolute_error, type_cols="numerical")
+    return columnwise_metric(
+        df1, df2, df_mask, skm.mean_absolute_error, type_cols="numerical"
+    )
 
 
 def mean_absolute_percentage_error(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> pd.Series:
-    """Mean absolute percentage error between two dataframes.
+    """Compute the mean absolute percentage error between two dataframes.
 
     Parameters
     ----------
@@ -153,14 +179,22 @@ def mean_absolute_percentage_error(
     Returns
     -------
     pd.Series
+
     """
     return columnwise_metric(
-        df1, df2, df_mask, skm.mean_absolute_percentage_error, type_cols="numerical"
+        df1,
+        df2,
+        df_mask,
+        skm.mean_absolute_percentage_error,
+        type_cols="numerical",
     )
 
 
-def _weighted_mean_absolute_percentage_error_1D(values1: pd.Series, values2: pd.Series) -> float:
-    """Weighted mean absolute percentage error between two series.
+def _weighted_mean_absolute_percentage_error_1D(
+    values1: pd.Series, values2: pd.Series
+) -> float:
+    """Compute the weighted mean absolute perc. error between 2 series.
+
     Based on https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
 
     Parameters
@@ -174,6 +208,7 @@ def _weighted_mean_absolute_percentage_error_1D(values1: pd.Series, values2: pd.
     -------
     float
         Weighted mean absolute percentage error
+
     """
     return (values1 - values2).abs().sum() / values1.abs().sum()
 
@@ -181,7 +216,7 @@ def _weighted_mean_absolute_percentage_error_1D(values1: pd.Series, values2: pd.
 def weighted_mean_absolute_percentage_error(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> pd.Series:
-    """Weighted mean absolute percentage error between two dataframes.
+    """Compute the weighted mean absolute percentage error between 2 df.
 
     Parameters
     ----------
@@ -195,6 +230,7 @@ def weighted_mean_absolute_percentage_error(
     Returns
     -------
     pd.Series
+
     """
     return columnwise_metric(
         df1,
@@ -205,9 +241,10 @@ def weighted_mean_absolute_percentage_error(
     )
 
 
-def accuracy(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> pd.Series:
-    """
-    Matching ratio beetween the two datasets.
+def accuracy(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> pd.Series:
+    """Compute the matching ratio beetween the two datasets.
 
     Parameters
     ----------
@@ -221,6 +258,7 @@ def accuracy(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> pd.
     Returns
     -------
     pd.Series
+
     """
     return columnwise_metric(
         df1,
@@ -232,8 +270,7 @@ def accuracy(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> pd.
 
 
 def accuracy_1D(values1: pd.Series, values2: pd.Series) -> float:
-    """
-    Matching ratio beetween the set of values.
+    """Compute the matching ratio beetween the set of values.
 
     Parameters
     ----------
@@ -246,6 +283,7 @@ def accuracy_1D(values1: pd.Series, values2: pd.Series) -> float:
     -------
     float
         accuracy
+
     """
     return (values1 == values2).mean()
 
@@ -256,8 +294,9 @@ def dist_wasserstein(
     df_mask: pd.DataFrame,
     method: str = "columnwise",
 ) -> pd.Series:
-    """Wasserstein distances between columns of 2 dataframes.
-    Wasserstein distance can only be computed columnwise
+    """Compute the Wasserstein distances between columns of 2 dataframes.
+
+    Wasserstein distance can only be computed columnwise.
 
     Parameters
     ----------
@@ -267,24 +306,34 @@ def dist_wasserstein(
         Predicted dataframe
     df_mask : pd.DataFrame
         Elements of the dataframes to compute on
+    method : str, optional
+        columnwise or not
 
     Returns
     -------
     pd.Series
         wasserstein distances
+
     """
     if method == "columnwise":
-        return columnwise_metric(df1, df2, df_mask, scipy.stats.wasserstein_distance)
+        return columnwise_metric(
+            df1, df2, df_mask, scipy.stats.wasserstein_distance
+        )
     else:
         raise AssertionError(
-            f"The parameter of the function wasserstein_distance should be one of"
-            f"the following: [`columnwise`], not `{method}`!"
+            f"The parameter of the function wasserstein_distance should "
+            "be one of the following: "
+            f"[`columnwise`], not `{method}`!"
         )
 
 
 def kolmogorov_smirnov_test_1D(df1: pd.Series, df2: pd.Series) -> float:
-    """Compute KS test statistic of the two-sample Kolmogorov-Smirnov test for goodness of fit.
-    See more in https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html.
+    """Compute KS test statistic.
+
+    Compute KS test stat. of the two-sample Kolmogorov-Smirnov test
+    for goodness of fit.
+    See more in
+    https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html.
 
     Parameters
     ----------
@@ -297,6 +346,7 @@ def kolmogorov_smirnov_test_1D(df1: pd.Series, df2: pd.Series) -> float:
     -------
     float
         KS test statistic
+
     """
     return scipy.stats.ks_2samp(df1, df2)[0]
 
@@ -304,7 +354,8 @@ def kolmogorov_smirnov_test_1D(df1: pd.Series, df2: pd.Series) -> float:
 def kolmogorov_smirnov_test(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> pd.Series:
-    """Kolmogorov Smirnov Test for numerical features.
+    """Compute the Kolmogorov Smirnov Test for numerical features.
+
     Lower score means better performance.
 
     Parameters
@@ -320,12 +371,16 @@ def kolmogorov_smirnov_test(
     -------
     pd.Series
         KS test statistic
+
     """
-    return columnwise_metric(df1, df2, df_mask, kolmogorov_smirnov_test_1D, type_cols="numerical")
+    return columnwise_metric(
+        df1, df2, df_mask, kolmogorov_smirnov_test_1D, type_cols="numerical"
+    )
 
 
 def _total_variance_distance_1D(df1: pd.Series, df2: pd.Series) -> float:
-    """Compute Total Variance Distance for a categorical feature
+    """Compute Total Variance Distance for a categorical feature.
+
     It is based on TVComplement in https://github.com/sdv-dev/SDMetrics
 
     Parameters
@@ -339,6 +394,7 @@ def _total_variance_distance_1D(df1: pd.Series, df2: pd.Series) -> float:
     -------
     float
         Total variance distance
+
     """
     list_categories = list(set(df1.unique()).union(set(df2.unique())))
     freqs1 = df1.value_counts() / len(df1)
@@ -351,7 +407,8 @@ def _total_variance_distance_1D(df1: pd.Series, df2: pd.Series) -> float:
 def total_variance_distance(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> pd.Series:
-    """Total variance distance for categorical features
+    """Compute the total variance distance for categorical features.
+
     It is based on TVComplement in https://github.com/sdv-dev/SDMetrics
 
     Parameters
@@ -367,6 +424,7 @@ def total_variance_distance(
     -------
     pd.Series
         Total variance distance
+
     """
     return columnwise_metric(
         df1,
@@ -382,9 +440,13 @@ def _check_same_number_columns(df1: pd.DataFrame, df2: pd.DataFrame):
         raise Exception("inputs have to have the same number of columns.")
 
 
-def _get_correlation_pearson_matrix(df: pd.DataFrame, use_p_value: bool = True) -> pd.DataFrame:
-    """Get matrix of correlation values for numerical features
-    based on Pearson correlation coefficient or p-value for testing non-correlation.
+def _get_correlation_pearson_matrix(
+    df: pd.DataFrame, use_p_value: bool = True
+) -> pd.DataFrame:
+    """Get matrix of correlation values for numerical features.
+
+    Based on Pearson correlation coefficient or p-value for
+    testing non-correlation.
 
     Parameters
     ----------
@@ -397,12 +459,15 @@ def _get_correlation_pearson_matrix(df: pd.DataFrame, use_p_value: bool = True)
     -------
     pd.DataFrame
         Correlation matrix
+
     """
     cols = df.columns.tolist()
     matrix = np.zeros((len(df.columns), len(df.columns)))
     for idx_1, col_1 in enumerate(cols):
         for idx_2, col_2 in enumerate(cols):
-            res = scipy.stats.mstats.pearsonr(df[[col_1]].values, df[[col_2]].values)
+            res = scipy.stats.mstats.pearsonr(
+                df[[col_1]].values, df[[col_2]].values
+            )
             if use_p_value:
                 matrix[idx_1, idx_2] = res[1]
             else:
@@ -417,8 +482,11 @@ def mean_difference_correlation_matrix_numerical_features(
     df_mask: pd.DataFrame,
     use_p_value: bool = True,
 ) -> pd.Series:
-    """Mean absolute of differences between the correlation matrices of df1 and df2.
-    based on Pearson correlation coefficient or p-value for testing non-correlation.
+    """Compute the mean absolute of differences.
+
+    Computed between the correlation matrices of df1 and df2.
+    based on Pearson correlation coefficient or p-value for
+    testing non-correlation.
 
     Parameters
     ----------
@@ -435,6 +503,7 @@ def mean_difference_correlation_matrix_numerical_features(
     -------
     pd.Series
         Mean absolute of differences for each feature
+
     """
     df1 = df1[df_mask].dropna(axis=0)
     df2 = df2[df_mask].dropna(axis=0)
@@ -442,28 +511,38 @@ def mean_difference_correlation_matrix_numerical_features(
     _check_same_number_columns(df1, df2)
 
     cols_numerical = utils._get_numerical_features(df1)
-    df_corr1 = _get_correlation_pearson_matrix(df1[cols_numerical], use_p_value=use_p_value)
-    df_corr2 = _get_correlation_pearson_matrix(df2[cols_numerical], use_p_value=use_p_value)
+    df_corr1 = _get_correlation_pearson_matrix(
+        df1[cols_numerical], use_p_value=use_p_value
+    )
+    df_corr2 = _get_correlation_pearson_matrix(
+        df2[cols_numerical], use_p_value=use_p_value
+    )
 
     diff_corr = (df_corr1 - df_corr2).abs().mean(axis=1)
     return pd.Series(diff_corr, index=cols_numerical)
 
 
-def _get_correlation_chi2_matrix(data: pd.DataFrame, use_p_value: bool = True) -> pd.DataFrame:
-    """Get matrix of correlation values for categorical features
-    based on Chi-square test of independence of variables (the test statistic or the p-value).
+def _get_correlation_chi2_matrix(
+    data: pd.DataFrame, use_p_value: bool = True
+) -> pd.DataFrame:
+    """Get matrix of correlation values for categorical features.
+
+    Based on Chi-square test of independence of variables
+    (the test statistic or the p-value).
 
     Parameters
     ----------
-    df : pd.DataFrame
+    data : pd.DataFrame
         dataframe
     use_p_value : bool, optional
-        use the p-value of the test instead of the test statistic, by default True
+        use the p-value of the test instead of the test statistic,
+        by default True
 
     Returns
     -------
     pd.DataFrame
         Correlation matrix
+
     """
     cols = data.columns.tolist()
     matrix = np.zeros((len(data.columns), len(data.columns)))
@@ -486,8 +565,11 @@ def mean_difference_correlation_matrix_categorical_features(
     df_mask: pd.DataFrame,
     use_p_value: bool = True,
 ) -> pd.Series:
-    """Mean absolute of differences between the correlation matrix of df1 and df2
-    based on Chi-square test of independence of variables (the test statistic or the p-value)
+    """Compute the mean absolute of differences.
+
+    Computed between the correlation matrix of df1 and df2
+    based on Chi-square test of independence of variables
+    (the test statistic or the p-value)
 
     Parameters
     ----------
@@ -498,12 +580,14 @@ def mean_difference_correlation_matrix_categorical_features(
     df_mask : pd.DataFrame
         Elements of the dataframes to compute on
     use_p_value : bool, optional
-        use the p-value of the test instead of the test statistic, by default True
+        use the p-value of the test instead of the test statistic,
+        by default True
 
     Returns
     -------
     pd.Series
         Mean absolute of differences for each feature
+
     """
     df1 = df1[df_mask].dropna(axis=0)
     df2 = df2[df_mask].dropna(axis=0)
@@ -511,8 +595,12 @@ def mean_difference_correlation_matrix_categorical_features(
     _check_same_number_columns(df1, df2)
 
     cols_categorical = utils._get_categorical_features(df1)
-    df_corr1 = _get_correlation_chi2_matrix(df1[cols_categorical], use_p_value=use_p_value)
-    df_corr2 = _get_correlation_chi2_matrix(df2[cols_categorical], use_p_value=use_p_value)
+    df_corr1 = _get_correlation_chi2_matrix(
+        df1[cols_categorical], use_p_value=use_p_value
+    )
+    df_corr2 = _get_correlation_chi2_matrix(
+        df2[cols_categorical], use_p_value=use_p_value
+    )
 
     diff_corr = (df_corr1 - df_corr2).abs().mean(axis=1)
     return pd.Series(diff_corr, index=cols_categorical)
@@ -524,7 +612,9 @@ def _get_correlation_f_oneway_matrix(
     cols_numerical: List[str],
     use_p_value: bool = True,
 ) -> pd.DataFrame:
-    """Get matrix of correlation values between categorical and numerical features
+    """Get matrix of correlation values.
+
+    Computed between categorical and numerical features
     based on the one-way ANOVA.
 
     Parameters
@@ -536,12 +626,14 @@ def _get_correlation_f_oneway_matrix(
     cols_numerical : List[str]
         list numerical columns
     use_p_value : bool, optional
-        use the p-value of the test instead of the test statistic, by default True
+        use the p-value of the test instead of the test statistic,
+        by default True
 
     Returns
     -------
     pd.DataFrame
         Correlation matrix
+
     """
     matrix = np.zeros((len(cols_categorical), len(cols_numerical)))
     for idx_cat, col_cat in enumerate(cols_categorical):
@@ -561,7 +653,9 @@ def mean_diff_corr_matrix_categorical_vs_numerical_features(
     df_mask: pd.DataFrame,
     use_p_value: bool = True,
 ) -> pd.Series:
-    """Mean absolute of differences between the correlation matrix of df1 and df2
+    """Compute the mean absolute of differences.
+
+    Computation between the correlation matrix of df1 and df2
     based on the one-way ANOVA.
 
     Parameters
@@ -573,12 +667,14 @@ def mean_diff_corr_matrix_categorical_vs_numerical_features(
     df_mask : pd.DataFrame
         Elements of the dataframes to compute on
     use_p_value : bool, optional
-        use the p-value of the test instead of the test statistic, by default True
+        use the p-value of the test instead of the test statistic,
+        by default True
 
     Returns
     -------
     pd.Series
         Mean absolute of differences for each feature
+
     """
     df1 = df1[df_mask].dropna(axis=0)
     df2 = df2[df_mask].dropna(axis=0)
@@ -603,7 +699,8 @@ def mean_diff_corr_matrix_categorical_vs_numerical_features(
 
 
 def _sum_manhattan_distances_1D(values: pd.Series) -> float:
-    """Sum of Manhattan distances computed for one column
+    """Compute the sum of Manhattan distances computed for one column.
+
     It is based on https://www.geeksforgeeks.org/sum-manhattan-distances-pairs-points/
 
     Parameters
@@ -615,6 +712,7 @@ def _sum_manhattan_distances_1D(values: pd.Series) -> float:
     -------
     float
         Sum of Manhattan distances
+
     """
     values = values.sort_values(ascending=True)
     sums_partial = values.shift().fillna(0.0).cumsum()
@@ -624,25 +722,31 @@ def _sum_manhattan_distances_1D(values: pd.Series) -> float:
 
 
 def _sum_manhattan_distances(df1: pd.DataFrame) -> float:
-    """Sum Manhattan distances between all pairs of rows.
+    """Compute the sum Manhattan distances between all pairs of rows.
+
     It is based on https://www.geeksforgeeks.org/sum-manhattan-distances-pairs-points/
 
     Parameters
     ----------
     df1 : pd.DataFrame
+        input dataframe
 
     Returns
     -------
     float
         Sum of Manhattan distances for all pairs of rows.
+
     """
     cols = df1.columns.tolist()
     result = sum([_sum_manhattan_distances_1D(df1[col]) for col in cols])
     return result
 
 
-def sum_energy_distances(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> pd.Series:
-    """Sum of energy distances between df1 and df2.
+def sum_energy_distances(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> pd.Series:
+    """Compute the sum of energy distances between df1 and df2.
+
     It is based on https://dcor.readthedocs.io/en/latest/theory.html#
 
     Parameters
@@ -658,8 +762,8 @@ def sum_energy_distances(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataF
     -------
     pd.Series
         Sum of energy distances between df1 and df2.
-    """
 
+    """
     # Replace nan in dataframe
     df1 = df1[df_mask].fillna(0.0)
     df2 = df2[df_mask].fillna(0.0)
@@ -670,7 +774,11 @@ def sum_energy_distances(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataF
 
     df = pd.concat([df1, df2])
     sum_distances_df1_df2 = _sum_manhattan_distances(df)
-    sum_distance = 2 * sum_distances_df1_df2 - 4 * sum_distances_df1 - 4 * sum_distances_df2
+    sum_distance = (
+        2 * sum_distances_df1_df2
+        - 4 * sum_distances_df1
+        - 4 * sum_distances_df2
+    )
 
     return pd.Series(sum_distance, index=["All"])
 
@@ -681,7 +789,8 @@ def sum_pairwise_distances(
     df_mask: pd.DataFrame,
     metric: str = "cityblock",
 ) -> float:
-    """Sum of pairwise distances based on a predefined metric.
+    """Compute the sum of pairwise distances based on a predefined metric.
+
     Metrics are found in this link
     https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html
 
@@ -700,6 +809,7 @@ def sum_pairwise_distances(
     -------
     float
         Sum of pairwise distances based on a predefined metric
+
     """
     df1 = df1[df_mask.any(axis=1)]
     df2 = df2[df_mask.any(axis=1)]
@@ -717,12 +827,16 @@ def frechet_distance_base(
     df1: pd.DataFrame,
     df2: pd.DataFrame,
 ) -> pd.Series:
-    """Compute the Fréchet distance between two dataframes df1 and df2
-    Frechet_distance = || mu_1 - mu_2 ||_2^2 + Tr(Sigma_1 + Sigma_2 - 2(Sigma_1 . Sigma_2)^(1/2))
-    It is normalized, df1 and df2 are first scaled by a factor (std(df1) + std(df2)) / 2
-    and then centered around (mean(df1) + mean(df2)) / 2
-    Based on: Dowson, D. C., and BV666017 Landau. "The Fréchet distance between multivariate normal
-    distributions." Journal of multivariate analysis 12.3 (1982): 450-455.
+    """Compute the Fréchet distance between two dataframes df1 and df2.
+
+    Frechet_distance = || mu_1 - mu_2 ||_2^2
+        + Tr(Sigma_1 + Sigma_2 - 2(Sigma_1 . Sigma_2)^(1/2))
+    It is normalized, df1 and df2 are first scaled by a factor
+    (std(df1) + std(df2)) / 2 and then centered around
+    (mean(df1) + mean(df2)) / 2
+    Based on: Dowson, D. C., and BV666017 Landau.
+    "The Fréchet distance between multivariate normal distributions."
+    Journal of multivariate analysis 12.3 (1982): 450-455.
 
     Parameters
     ----------
@@ -735,8 +849,8 @@ def frechet_distance_base(
     -------
     pd.Series
         Frechet distance in a Series object
-    """
 
+    """
     if df1.shape != df2.shape:
         raise Exception("inputs have to be of same dimensions.")
 
@@ -759,12 +873,13 @@ def frechet_distance(
     method: str = "single",
     min_n_rows: int = 10,
 ) -> pd.Series:
-    """
-    Frechet distance computed using a pattern decomposition. Several variant are implemented:
-    - the `single` method relies on a single estimation of the means and covariance matrix. It is
-    relevent for MCAR data.
-    - the `pattern`method relies on the aggregation of the estimated distance between each
-    pattern. It is relevent for MAR data.
+    """Compute Frechet distance computed using a pattern decomposition.
+
+    Several variant are implemented:
+    - the `single` method relies on a single estimation of the means and
+    covariance matrix. It is relevent for MCAR data.
+    - the `pattern`method relies on the aggregation of the estimated distance
+    between each pattern. It is relevent for MAR data.
 
     Parameters
     ----------
@@ -775,8 +890,8 @@ def frechet_distance(
     df_mask : pd.DataFrame
         Mask indicating on which values the distance has to computed on
     method: str
-        Method used to compute the distance on multivariate datasets with missing values.
-        Possible values are `robust` and `pattern`.
+        Method used to compute the distance on multivariate datasets with
+        missing values. Possible values are `robust` and `pattern`.
     min_n_rows: int
         Minimum number of rows for a KL estimation
 
@@ -784,8 +899,8 @@ def frechet_distance(
     -------
     pd.Series
         Series of computed metrics
-    """
 
+    """
     if method == "single":
         return frechet_distance_base(df1, df2)
     return pattern_based_weighted_mean_metric(
@@ -799,9 +914,12 @@ def frechet_distance(
 
 
 def kl_divergence_1D(df1: pd.Series, df2: pd.Series) -> float:
-    """Estimation of the Kullback-Leibler divergence between the two 1D empirical distributions
-    given by `df1`and `df2`. The samples are binarized using a uniform spacing with 20 bins from
-    the smallest to the largest value. Not that this may be a coarse estimation.
+    """Estimate the the Kullback-Leibler divergence for 1D.
+
+    Computation between the two 1D empirical distributions
+    given by `df1`and `df2`. The samples are binarized using a uniform spacing
+    with 20 bins from the smallest to the largest value. Not that this may be
+    a coarse estimation.
 
     Parameters
     ----------
@@ -814,6 +932,7 @@ def kl_divergence_1D(df1: pd.Series, df2: pd.Series) -> float:
     -------
     float
         Kullback-Leibler divergence between the two empirical distributions.
+
     """
     min_val = min(df1.min(), df2.min())
     max_val = max(df1.max(), df2.max())
@@ -824,7 +943,9 @@ def kl_divergence_1D(df1: pd.Series, df2: pd.Series) -> float:
 
 
 def kl_divergence_gaussian(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
-    """Kullback-Leibler divergence estimation based on a Gaussian approximation of both empirical
+    """Compute Kullback-Leibler divergence estimation.
+
+    Computation based on a Gaussian approximation of both empirical
     distributions
 
     Parameters
@@ -838,16 +959,20 @@ def kl_divergence_gaussian(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
     -------
     pd.Series
         Series of estimated metrics
+
     """
     cov1 = df1.cov().values
     cov2 = df2.cov().values
     means1 = np.array(df1.mean())
     means2 = np.array(df2.mean())
     try:
-        div_kl = algebra.kl_divergence_gaussian_exact(means1, cov1, means2, cov2)
+        div_kl = algebra.kl_divergence_gaussian_exact(
+            means1, cov1, means2, cov2
+        )
     except LinAlgError:
         raise ValueError(
-            "Provided datasets have degenerate colinearities, KL-divergence cannot be computed!"
+            "Provided datasets have degenerate colinearities, KL-divergence "
+            "cannot be computed!"
         )
     return div_kl
 
@@ -859,11 +984,12 @@ def kl_divergence(
     method: str = "columnwise",
     min_n_rows: int = 10,
 ) -> pd.Series:
-    """
-    Estimation of the Kullback-Leibler divergence between too empirical distributions. Three
-    methods are implemented:
-    - columnwise, relying on a uniform binarization and only taking marginals into account
-    (https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence),
+    """Estimate the KL divergence.
+
+    Estimation of the Kullback-Leibler divergence between too empirical
+    distributions. Three methods are implemented:
+    - columnwise, relying on a uniform binarization and only taking marginals
+    into account (https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence),
     - gaussian, relying on a Gaussian approximation,
 
     Parameters
@@ -875,8 +1001,8 @@ def kl_divergence(
     df_mask: pd.DataFrame
         Mask indicating on what values the divergence should be computed
     method: str
-        Method used to compute the divergence on multivariate datasets with missing values.
-        Possible values are `columnwise` and `gaussian`.
+        Method used to compute the divergence on multivariate datasets with
+        missing values. Possible values are `columnwise` and `gaussian`.
     min_n_rows: int
         Minimum number of rows for a KL estimation
 
@@ -888,11 +1014,15 @@ def kl_divergence(
     Raises
     ------
     AssertionError
-        If the empirical distributions do not have enough samples to estimate a KL divergence.
-        Consider using a larger dataset of lowering the parameter `min_n_rows`.
+        If the empirical distributions do not have enough samples to estimate
+        a KL divergence. Consider using a larger dataset of lowering
+        the parameter `min_n_rows`.
+
     """
     if method == "columnwise":
-        return columnwise_metric(df1, df2, df_mask, kl_divergence_1D, type_cols="numerical")
+        return columnwise_metric(
+            df1, df2, df_mask, kl_divergence_1D, type_cols="numerical"
+        )
     elif method == "gaussian":
         return pattern_based_weighted_mean_metric(
             df1,
@@ -904,13 +1034,17 @@ def kl_divergence(
         )
     else:
         raise AssertionError(
-            f"The parameter of the function wasserstein_distance should be one of"
-            f"the following: [`columnwise`, `gaussian`], not `{method}`!"
+            f"The parameter of the function wasserstein_distance "
+            "should be one of the following: "
+            f"[`columnwise`, `gaussian`], not `{method}`!"
         )
 
 
 def distance_anticorr(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
-    """Score based on the distance anticorrelation between two empirical distributions.
+    """Compute distance anticorr.
+
+    Score based on the distance anticorrelation between
+    two empirical distributions.
     The theoretical basis can be found on dcor documentation:
     https://dcor.readthedocs.io/en/latest/theory.html
 
@@ -925,6 +1059,7 @@ def distance_anticorr(df1: pd.DataFrame, df2: pd.DataFrame) -> float:
     -------
     float
         Distance correlation score
+
     """
     return (1 - dcor.distance_correlation(df1.values, df2.values)) / 2
 
@@ -935,7 +1070,7 @@ def distance_anticorr_pattern(
     df_mask: pd.DataFrame,
     min_n_rows: int = 10,
 ) -> pd.Series:
-    """Correlation distance computed using a pattern decomposition
+    """Compute correlation distance computed using a pattern decomposition.
 
     Parameters
     ----------
@@ -952,8 +1087,8 @@ def distance_anticorr_pattern(
     -------
     pd.Series
         Series of computed metrics
-    """
 
+    """
     return pattern_based_weighted_mean_metric(
         df1,
         df2,
@@ -974,6 +1109,7 @@ def pattern_based_weighted_mean_metric(
     **kwargs,
 ) -> pd.Series:
     """Compute a mean score based on missing patterns.
+
     Note that for each pattern, a score is returned by the function metric.
     This code is based on https://www.statsmodels.org/
 
@@ -989,11 +1125,16 @@ def pattern_based_weighted_mean_metric(
         metric function
     min_n_rows : int, optional
         minimum number of row allowed for a pattern without nan, by default 10
+    type_cols : str, optional
+        type of the columns ("all", "numerical", "categorical")
+    **kwargs : dict
+        additional arguments
 
     Returns
     -------
     pd.Series
         _description_
+
     """
     if type_cols == "all":
         cols = df1.columns
@@ -1002,7 +1143,9 @@ def pattern_based_weighted_mean_metric(
     elif type_cols == "categorical":
         cols = df1.select_dtypes(exclude=["number"]).columns
     else:
-        raise ValueError(f"Value {type_cols} is not valid for parameter `type_cols`!")
+        raise ValueError(
+            f"Value {type_cols} is not valid for parameter `type_cols`!"
+        )
 
     if np.any(df_mask & df1.isna()):
         raise ValueError("The argument df1 has missing values on the mask!")
@@ -1016,7 +1159,9 @@ def pattern_based_weighted_mean_metric(
     df2 = df2[cols].loc[rows_mask]
     df_mask = df_mask[cols].loc[rows_mask]
     max_num_row = 0
-    for tup_pattern, df_mask_pattern in df_mask.groupby(df_mask.columns.tolist()):
+    for tup_pattern, df_mask_pattern in df_mask.groupby(
+        df_mask.columns.tolist()
+    ):
         ind_pattern = df_mask_pattern.index
         df1_pattern = df1.loc[ind_pattern, list(tup_pattern)]
         max_num_row = max(max_num_row, len(df1_pattern))
@@ -1027,12 +1172,27 @@ def pattern_based_weighted_mean_metric(
         scores.append(metric(df1_pattern, df2_pattern, **kwargs))
     if len(scores) == 0:
         raise NotEnoughSamples(max_num_row, min_n_rows)
-    return pd.Series(sum([s * w for s, w in zip(scores, weights)]), index=["All"])
+    return pd.Series(
+        sum([s * w for s, w in zip(scores, weights)]), index=["All"]
+    )
 
 
 def get_metric(
     name: str,
 ) -> Callable[[pd.DataFrame, pd.DataFrame, pd.DataFrame], pd.Series]:
+    """Get metric.
+
+    Parameters
+    ----------
+    name : str
+        name of the metic to compute
+
+    Returns
+    -------
+    Callable[[pd.DataFrame, pd.DataFrame, pd.DataFrame], pd.Series]
+        metric
+
+    """
     dict_metrics: Dict[str, Callable] = {
         "mse": mean_squared_error,
         "rmse": root_mean_squared_error,
@@ -1043,7 +1203,9 @@ def get_metric(
         "KL_columnwise": partial(kl_divergence, method="columnwise"),
         "KL_gaussian": partial(kl_divergence, method="gaussian"),
         "KS_test": kolmogorov_smirnov_test,
-        "correlation_diff": mean_difference_correlation_matrix_numerical_features,
+        "correlation_diff": (
+            mean_difference_correlation_matrix_numerical_features
+        ),
         "energy": sum_energy_distances,
         "frechet": partial(frechet_distance, method="single"),
         "frechet_pattern": partial(frechet_distance, method="pattern"),
diff --git a/qolmat/benchmark/missing_patterns.py b/qolmat/benchmark/missing_patterns.py
index 65b6d6ea..2f317a4a 100644
--- a/qolmat/benchmark/missing_patterns.py
+++ b/qolmat/benchmark/missing_patterns.py
@@ -1,19 +1,33 @@
+"""Script for missing patterns."""
+
 from __future__ import annotations
 
 import functools
-from typing import Callable, List, Optional, Tuple, Union
+import math
 import warnings
+from typing import Callable, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
 from sklearn import utils as sku
-from sklearn.utils import resample
-import math
 
-from qolmat.utils.exceptions import NoMissingValue, SubsetIsAString
+from qolmat.utils.exceptions import SubsetIsAString
 
 
 def compute_transition_counts_matrix(states: pd.Series):
+    """Compute transtion counts matrix.
+
+    Parameters
+    ----------
+    states : pd.Series
+        possible states (masks)
+
+    Returns
+    -------
+    pd.Series | pd.DataFrame
+        transition counts matrix
+
+    """
     if isinstance(states.iloc[0], tuple):
         n_variables = len(states.iloc[0])
         state_nonan = pd.Series([tuple([False] * n_variables)])
@@ -28,18 +42,48 @@ def compute_transition_counts_matrix(states: pd.Series):
     return df_counts
 
 
-def compute_transition_matrix(states: pd.Series, ngroups: Optional[List] = None):
+def compute_transition_matrix(
+    states: pd.Series, ngroups: Optional[List] = None
+):
+    """Compute the transition matrix.
+
+    Parameters
+    ----------
+    states : pd.Series
+        serie of possible states (masks)
+    ngroups : Optional[List], optional
+        groups, by default None
+
+    Returns
+    -------
+    pd.DataFrame | pd.Series
+        transition matrix
+
+    """
     if ngroups is None:
         df_counts = compute_transition_counts_matrix(states)
     else:
-        list_counts = [compute_transition_counts_matrix(df) for _, df in states.groupby(ngroups)]
-        df_counts = functools.reduce(lambda a, b: a.add(b, fill_value=0), list_counts)
+        list_counts = [
+            compute_transition_counts_matrix(df)
+            for _, df in states.groupby(ngroups)
+        ]
+        df_counts = functools.reduce(
+            lambda a, b: a.add(b, fill_value=0), list_counts
+        )
 
     df_transition = df_counts.div(df_counts.sum(axis=1), axis=0)
     return df_transition
 
 
 def get_sizes_max(values_isna: pd.Series) -> pd.Series[int]:
+    """Get max sizes.
+
+    Parameters
+    ----------
+    values_isna : pd.Series
+        pandas series indicating if value is missing.
+
+    """
     ids_hole = (values_isna.diff() != 0).cumsum()
     sizes_max = values_isna.groupby(ids_hole, group_keys=True).apply(
         lambda x: (~x) * np.arange(len(x))
@@ -51,14 +95,16 @@ def get_sizes_max(values_isna: pd.Series) -> pd.Series[int]:
 
 
 class _HoleGenerator:
-    """
-    This abstract class implements the generic method to generate masks according to law of missing
-    values.
+    """Abstract HoleGenerator class.
+
+    This abstract class implements the generic method to generate masks
+    according to law of missing values.
 
     Parameters
     ----------
     n_splits : int
-        number of dataframes with missing additional missing values to be created
+        number of dataframes with missing additional missing values to be
+        created
     subset : Optional[List[str]]
         Names of the columns for which holes must be created, by default None
     ratio_masked : Optional[float]
@@ -68,6 +114,7 @@ class _HoleGenerator:
         Pass an int for reproducible output across multiple function calls.
     groups: Tuple[str, ...]
         Column names used to group the data
+
     """
 
     generate_mask: Callable
@@ -88,20 +135,22 @@ def __init__(
         self.groups = groups
 
     def fit(self, X: pd.DataFrame) -> _HoleGenerator:
-        """
-        Fits the generator.
+        """Fit the generator.
 
         Parameters
         ----------
         X : pd.DataFrame
             Initial dataframe with a missing pattern to be imitated.
+
         """
         self._check_subset(X)
         self.dict_ratios = {}
         missing_per_col = X[self.subset].isna().sum()
         self.dict_ratios = (missing_per_col / missing_per_col.sum()).to_dict()
         if self.groups:
-            self.ngroups = X.groupby(list(self.groups)).ngroup().rename("_ngroup")
+            self.ngroups = (
+                X.groupby(list(self.groups)).ngroup().rename("_ngroup")
+            )
         else:
             self.ngroups = None
 
@@ -109,6 +158,7 @@ def fit(self, X: pd.DataFrame) -> _HoleGenerator:
 
     def split(self, X: pd.DataFrame) -> List[pd.DataFrame]:
         """Create a list of boolean masks representing the data to mask.
+
         Parameters
         ----------
         X : pd.DataFrame
@@ -117,17 +167,19 @@ def split(self, X: pd.DataFrame) -> List[pd.DataFrame]:
         Returns
         -------
         Dict[str, pd.DataFrame]
-            the initial dataframe, the dataframe with additional missing entries and the created
-            mask
-        """
+            the initial dataframe, the dataframe with additional missing
+            entries and the created mask
 
+        """
         self.fit(X)
         list_masks = []
         for _ in range(self.n_splits):
             if self.ngroups is None:
                 mask = self.generate_mask(X)
             else:
-                mask = X.groupby(self.ngroups, group_keys=False).apply(self.generate_mask)
+                mask = X.groupby(self.ngroups, group_keys=False).apply(
+                    self.generate_mask
+                )
             list_masks.append(mask)
         return list_masks
 
@@ -140,8 +192,10 @@ def _check_subset(self, X: pd.DataFrame):
 
 
 class UniformHoleGenerator(_HoleGenerator):
-    """This class implements a way to generate holes in a dataframe.
-    The holes are generated randomly, using the resample method of scikit learn.
+    """UniformHoleGenerator class.
+
+    This class implements a way to generate holes in a dataframe.
+    The holes are generated randomly, using the resample method of sklearn.
 
     Parameters
     ----------
@@ -157,6 +211,7 @@ class UniformHoleGenerator(_HoleGenerator):
     sample_proportional: bool, optional
         If True, generates holes in target columns with same equal frequency.
         If False, reproduces the empirical proportions between the variables.
+
     """
 
     def __init__(
@@ -177,15 +232,14 @@ def __init__(
         self.sample_proportional = sample_proportional
 
     def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
-        """
-        Returns a mask for the dataframe at hand.
+        """Return a mask for the dataframe at hand.
 
         Parameters
         ----------
         X : pd.DataFrame
             Initial dataframe with a missing pattern to be imitated.
-        """
 
+        """
         self.random_state = sku.check_random_state(self.random_state)
         df_mask = pd.DataFrame(False, index=X.index, columns=X.columns)
 
@@ -206,8 +260,10 @@ def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
 
 
 class _SamplerHoleGenerator(_HoleGenerator):
-    """This abstract class implements a generic way to generate holes in a dataframe by sampling 1D
-     hole size distributions.
+    """Abstract SamplerHoleGenerator class.
+
+    This abstract class implements a generic way to generate holes in a
+    dataframe by sampling 1D hole size distributions.
 
     Parameters
     ----------
@@ -222,6 +278,7 @@ class _SamplerHoleGenerator(_HoleGenerator):
         Pass an int for reproducible output across multiple function calls.
     groups: Tuple[str, ...]
         Column names used to group the data
+
     """
 
     sample_sizes: Callable
@@ -242,18 +299,27 @@ def __init__(
             groups=groups,
         )
 
-    def generate_hole_sizes(self, column: str, n_masked: int, sort: bool = True) -> List[int]:
-        """Generate a sequence of states "states" of size "size" from
-        a transition matrix "df_transition"
+    def generate_hole_sizes(
+        self, column: str, n_masked: int, sort: bool = True
+    ) -> List[int]:
+        """Generate a sequence of states "states" of size "size".
+
+        Generated from a transition matrix "df_transition"
 
         Parameters
         ----------
-        size : int
-            length of the output sequence
+        column : str
+            column name
+        n_masked: int
+            number of masks
+        sort: bool, optional
+            true if sort, by default True
 
         Returns
         -------
-        List[float]
+        List[int]
+            list of hole sizes
+
         """
         sizes_sampled = self.sample_sizes(column, n_masked)
         sizes_sampled = sizes_sampled[sizes_sampled.cumsum() < n_masked]
@@ -265,6 +331,7 @@ def generate_hole_sizes(self, column: str, n_masked: int, sort: bool = True) ->
 
     def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
         """Create missing data in an arraylike object based on a markov chain.
+
         States of the MC are the different masks of missing values:
         there are at most pow(2,X.shape[1]) possible states.
 
@@ -277,6 +344,7 @@ def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
         -------
         mask : pd.DataFrame
             masked dataframe with additional missing entries
+
         """
         mask = pd.DataFrame(False, columns=X.columns, index=X.index)
         n_masked_col = round(self.ratio_masked * len(X))
@@ -288,14 +356,29 @@ def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
             sizes_max = get_sizes_max(states)
             n_masked_left = n_masked_col
 
-            sizes_sampled = self.generate_hole_sizes(column, n_masked_col, sort=True)
-            assert sum(sizes_sampled) == n_masked_col
-            sizes_sampled += self.generate_hole_sizes(column, n_masked_col, sort=False)
+            sizes_sampled = self.generate_hole_sizes(
+                column, n_masked_col, sort=True
+            )
+            if sum(sizes_sampled) != n_masked_col:
+                raise ValueError(
+                    "sum of sizes_sampled is different from n_masked_col: "
+                    f"{sum(sizes_sampled)} != {n_masked_col}."
+                )
+            sizes_sampled += self.generate_hole_sizes(
+                column, n_masked_col, sort=False
+            )
             for sample in sizes_sampled:
                 sample = min(min(sample, sizes_max.max()), n_masked_left)
                 i_hole = self.rng.choice(np.where(sample <= sizes_max)[0])
 
-                assert (~mask[column].iloc[i_hole - sample : i_hole]).all()
+                if not (~mask[column].iloc[i_hole - sample : i_hole]).all():
+                    raise ValueError(
+                        "The mask condition is not satisfied for "
+                        f"column={column}, "
+                        f"sample={sample}, "
+                        f"and i_hole={i_hole}."
+                    )
+
                 mask[column].iloc[i_hole - sample : i_hole] = True
                 n_masked_left -= sample
 
@@ -308,12 +391,16 @@ def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
                     break
 
         if list_failed:
-            warnings.warn(f"No place to introduce sampled holes of size {list_failed}!")
+            warnings.warn(
+                f"No place to introduce sampled holes of size {list_failed}!"
+            )
         return mask
 
 
 class GeometricHoleGenerator(_SamplerHoleGenerator):
-    """This class implements a way to generate holes in a dataframe.
+    """GeometricHoleGenerator class.
+
+    This class implements a way to generate holes in a dataframe.
     The holes are generated following a Markov 1D process.
 
     Parameters
@@ -329,6 +416,7 @@ class GeometricHoleGenerator(_SamplerHoleGenerator):
         Pass an int for reproducible output across multiple function calls.
     groups: Tuple[str, ...]
         Column names used to group the data
+
     """
 
     def __init__(
@@ -348,14 +436,13 @@ def __init__(
         )
 
     def fit(self, X: pd.DataFrame) -> GeometricHoleGenerator:
-        """
-        Get the transition matrix from a list of states
+        """Get the transition matrix from a list of states.
 
         Parameters
         ----------
         X : pd.DataFrame
-            transition matrix (stochastic matrix) current in index, next in columns
-            1 is missing
+            transition matrix (stochastic matrix) current in index,
+            next in columns 1 is missing
 
 
         Returns
@@ -373,16 +460,35 @@ def fit(self, X: pd.DataFrame) -> GeometricHoleGenerator:
 
         return self
 
-    def sample_sizes(self, column, n_masked):
+    def sample_sizes(self, column: str, n_masked: int):
+        """Sample sizes.
+
+        Parameters
+        ----------
+        column : str
+            column name
+        n_masked : int
+            number of masks
+
+        Returns
+        -------
+        pd.Series
+            sizes sampled
+
+        """
         proba_out = self.dict_probas_out[column]
         mean_size = 1 / proba_out
         n_holes = 2 * round(n_masked / mean_size)
-        sizes_sampled = pd.Series(self.rng.geometric(p=proba_out, size=n_holes))
+        sizes_sampled = pd.Series(
+            self.rng.geometric(p=proba_out, size=n_holes)
+        )
         return sizes_sampled
 
 
 class EmpiricalHoleGenerator(_SamplerHoleGenerator):
-    """This class implements a way to generate holes in a dataframe.
+    """EmpiricalHoleGenerator class.
+
+    This class implements a way to generate holes in a dataframe.
     The distribution of holes is learned from the data.
     The distributions are learned column by column.
 
@@ -399,6 +505,7 @@ class EmpiricalHoleGenerator(_SamplerHoleGenerator):
         Pass an int for reproducible output across multiple function calls.
     groups: Tuple[str, ...]
         Column names used to group the data
+
     """
 
     def __init__(
@@ -418,6 +525,19 @@ def __init__(
         )
 
     def compute_distribution_holes(self, states: pd.Series) -> pd.Series:
+        """Compute the hole distribution.
+
+        Parameters
+        ----------
+        states : pd.Series
+            Series of states.
+
+        Returns
+        -------
+        pd.Series
+            hole distribution
+
+        """
         series_id = (states.diff() != 0).cumsum()
         series_id = series_id[states]
         distribution_holes = series_id.value_counts().value_counts()
@@ -427,7 +547,8 @@ def compute_distribution_holes(self, states: pd.Series) -> pd.Series:
 
     def fit(self, X: pd.DataFrame) -> EmpiricalHoleGenerator:
         """Compute the holes sizes of a dataframe.
-        Dataframe df has only one column
+
+        Dataframe df has only one column.
 
         Parameters
         ----------
@@ -438,6 +559,7 @@ def fit(self, X: pd.DataFrame) -> EmpiricalHoleGenerator:
         -------
         EmpiricalTimeHoleGenerator
             The model itself
+
         """
         super().fit(X)
 
@@ -445,42 +567,54 @@ def fit(self, X: pd.DataFrame) -> EmpiricalHoleGenerator:
         for column in self.subset:
             states = X[column].isna()
             if self.ngroups is None:
-                self.dict_distributions_holes[column] = self.compute_distribution_holes(states)
+                self.dict_distributions_holes[column] = (
+                    self.compute_distribution_holes(states)
+                )
             else:
                 distributions_holes = states.groupby(self.ngroups).apply(
                     self.compute_distribution_holes
                 )
-                distributions_holes = distributions_holes.groupby(by="_size_hole").sum()
+                distributions_holes = distributions_holes.groupby(
+                    by="_size_hole"
+                ).sum()
                 self.dict_distributions_holes[column] = distributions_holes
         return self
 
     def sample_sizes(self, column, n_masked):
-        """Create missing data in an arraylike object based on the holes size distribution.
+        """Create missing data based on the holes size distribution.
 
         Parameters
         ----------
         column : str
             name of the column to fill with holes
-        nb_holes : Optional[int], optional
-            number of holes to create, by default 10
+        n_masked :int
+            number of masks
 
         Returns
         -------
         samples_sizes : List[int]
+
         """
         distribution_holes = self.dict_distributions_holes[column]
         distribution_holes /= distribution_holes.sum()
-        mean_size = (distribution_holes.values * distribution_holes.index.values).sum()
+        mean_size = (
+            distribution_holes.values * distribution_holes.index.values
+        ).sum()
 
         n_samples = 2 * round(n_masked / mean_size)
-        sizes_sampled = self.rng.choice(distribution_holes.index, n_samples, p=distribution_holes)
+        sizes_sampled = self.rng.choice(
+            distribution_holes.index, n_samples, p=distribution_holes
+        )
         return sizes_sampled
 
 
 class MultiMarkovHoleGenerator(_HoleGenerator):
-    """This class implements a way to generate holes in a dataframe.
+    """MultiMarkovHoleGenerator class.
+
+    This class implements a way to generate holes in a dataframe.
     The holes are generated according to a Markov process.
-    Each line of the dataframe mask (np.nan) represents a state of the Markov chain.
+    Each line of the dataframe mask (np.nan) represents a state of the
+    Markov chain.
 
     Parameters
     ----------
@@ -495,6 +629,7 @@ class MultiMarkovHoleGenerator(_HoleGenerator):
         Pass an int for reproducible output across multiple function calls.
     groups: Tuple[str, ...]
         Column names used to group the data
+
     """
 
     def __init__(
@@ -514,7 +649,8 @@ def __init__(
         )
 
     def fit(self, X: pd.DataFrame) -> MultiMarkovHoleGenerator:
-        """
+        """Get the transition matrix.
+
         Get the transition matrix from a list of states
         transition matrix (stochastic matrix) current in index,
         next in columns 1 is missing
@@ -522,6 +658,7 @@ def fit(self, X: pd.DataFrame) -> MultiMarkovHoleGenerator:
         Parameters
         ----------
         X : pd.DataFrame
+            input dataframe
 
         Returns
         -------
@@ -533,28 +670,34 @@ def fit(self, X: pd.DataFrame) -> MultiMarkovHoleGenerator:
 
         states = X[self.subset].isna().apply(lambda x: tuple(x), axis=1)
         self.df_transition = compute_transition_matrix(states, self.ngroups)
-        self.df_transition.index = pd.MultiIndex.from_tuples(self.df_transition.index)
-        self.df_transition.columns = pd.MultiIndex.from_tuples(self.df_transition.columns)
+        self.df_transition.index = pd.MultiIndex.from_tuples(
+            self.df_transition.index
+        )
+        self.df_transition.columns = pd.MultiIndex.from_tuples(
+            self.df_transition.columns
+        )
 
         return self
 
-    def generate_multi_realisation(self, n_masked: int) -> List[List[Tuple[bool, ...]]]:
-        """Generate a sequence of states "states" of size "size"
-        from a transition matrix "df_transition"
+    def generate_multi_realisation(
+        self, n_masked: int
+    ) -> List[List[Tuple[bool, ...]]]:
+        """Generate a sequence of states "states" of size "size".
+
+        Generated from a transition matrix "df_transition"
 
         Parameters
         ----------
-        df_transition : pd.DataFrame
-            transition matrix (stochastic matrix)
-        size : int
-            length of the output sequence
+        n_masked : int
+            number of masks.
 
         Returns
         -------
         realisation ; List[int]
             sequence of states
+
         """
-        states = sorted(list(self.df_transition.index))
+        states = sorted(self.df_transition.index)
         state_nona = tuple([False] * len(states[0]))
 
         state = state_nona
@@ -564,7 +707,9 @@ def generate_multi_realisation(self, n_masked: int) -> List[List[Tuple[bool, ...
             realisation = []
             while True:
                 probas = self.df_transition.loc[state, :].values
-                state = np.random.choice(self.df_transition.columns, 1, p=probas)[0]
+                state = np.random.choice(
+                    self.df_transition.columns, 1, p=probas
+                )[0]
                 if state == state_nona:
                     break
                 else:
@@ -576,6 +721,7 @@ def generate_multi_realisation(self, n_masked: int) -> List[List[Tuple[bool, ...
 
     def generate_mask(self, X: pd.DataFrame) -> List[pd.DataFrame]:
         """Create missing data in an arraylike object based on a markov chain.
+
         States of the MC are the different masks of missing values:
         there are at most pow(2,X.shape[1]) possible states.
 
@@ -587,13 +733,15 @@ def generate_mask(self, X: pd.DataFrame) -> List[pd.DataFrame]:
         Returns
         -------
         Dict[str, pd.DataFrame]
-            the initial dataframe, the dataframe with additional missing entries and the created
-            mask
-        """
+            the initial dataframe, the dataframe with additional missing
+            entries and the created mask
 
+        """
         self.rng = sku.check_random_state(self.random_state)
         X_subset = X[self.subset]
-        mask = pd.DataFrame(False, columns=X_subset.columns, index=X_subset.index)
+        mask = pd.DataFrame(
+            False, columns=X_subset.columns, index=X_subset.index
+        )
 
         values_hasna = X_subset.isna().any(axis=1)
 
@@ -608,7 +756,11 @@ def generate_mask(self, X: pd.DataFrame) -> List[pd.DataFrame]:
             size_hole = min(size_hole, sizes_max.max())
             realisation = realisation[:size_hole]
             i_hole = self.rng.choice(np.where(size_hole <= sizes_max)[0])
-            assert (~mask.iloc[i_hole - size_hole : i_hole]).all().all()
+            if not (~mask.iloc[i_hole - size_hole : i_hole]).all().all():
+                raise ValueError(
+                    f"The mask condition is not satisfied for i_hole={i_hole} "
+                    f"and size_hole={size_hole}."
+                )
             if size_hole != 0:
                 mask.iloc[i_hole - size_hole : i_hole] = mask.iloc[
                     i_hole - size_hole : i_hole
@@ -629,7 +781,9 @@ def generate_mask(self, X: pd.DataFrame) -> List[pd.DataFrame]:
 
 
 class GroupedHoleGenerator(_HoleGenerator):
-    """This class implements a way to generate holes in a dataframe.
+    """GroupedHoleGenerator class.
+
+    This class implements a way to generate holes in a dataframe.
     The holes are generated from groups, specified by the user.
 
     Parameters
@@ -645,6 +799,7 @@ class GroupedHoleGenerator(_HoleGenerator):
         Pass an int for reproducible output across multiple function calls.
     groups : Tuple[str, ...]
         Names of the columns forming the groups, by default []
+
     """
 
     def __init__(
@@ -667,11 +822,12 @@ def __init__(
             raise Exception("Argument groups is an empty tuple!")
 
     def fit(self, X: pd.DataFrame) -> GroupedHoleGenerator:
-        """Create the groups based on the column names (groups attribute)
+        """Create the groups based on the column names (groups attribute).
 
         Parameters
         ----------
         X : pd.DataFrame
+            input dataframe
 
         Returns
         -------
@@ -681,33 +837,41 @@ def fit(self, X: pd.DataFrame) -> GroupedHoleGenerator:
         Raises
         ------
         if the number of samples/splits is greater than the number of groups.
-        """
 
+        """
         super().fit(X)
 
         if self.n_splits > self.ngroups.nunique():
-            raise ValueError("n_samples has to be smaller than the number of groups.")
+            raise ValueError(
+                "n_samples has to be smaller than the number of groups."
+            )
 
         return self
 
     def split(self, X: pd.DataFrame) -> List[pd.DataFrame]:
-        """creates masked dataframes
+        """Create masked dataframes.
 
         Parameters
         ----------
         X : pd.DataFrame
+            input dataframe
 
         Returns
         -------
         List[pd.DataFrame]
             list of masks
+
         """
         self.fit(X)
-        group_sizes = X.groupby(self.ngroups, group_keys=False).count().mean(axis=1)
+        group_sizes = (
+            X.groupby(self.ngroups, group_keys=False).count().mean(axis=1)
+        )
         list_masks = []
 
         for _ in range(self.n_splits):
-            shuffled_group_sizes = group_sizes.sample(frac=1, random_state=self.random_state)
+            shuffled_group_sizes = group_sizes.sample(
+                frac=1, random_state=self.random_state
+            )
 
             ratio_masks = shuffled_group_sizes.cumsum() / len(X)
             ratio_masks = ratio_masks.reset_index(name="ratio")
@@ -715,7 +879,9 @@ def split(self, X: pd.DataFrame) -> List[pd.DataFrame]:
             closest_ratio_mask = ratio_masks.iloc[
                 (ratio_masks["ratio"] - self.ratio_masked).abs().argsort()[:1]
             ]
-            groups_masked = ratio_masks.iloc[: closest_ratio_mask.index[0], :]["_ngroup"].values
+            groups_masked = ratio_masks.iloc[: closest_ratio_mask.index[0], :][
+                "_ngroup"
+            ].values
             if closest_ratio_mask.index[0] == 0:
                 groups_masked = ratio_masks.iloc[:1, :]["_ngroup"].values
 
diff --git a/qolmat/imputations/diffusions/base.py b/qolmat/imputations/diffusions/base.py
index 84fe339d..1b6a9abd 100644
--- a/qolmat/imputations/diffusions/base.py
+++ b/qolmat/imputations/diffusions/base.py
@@ -1,19 +1,24 @@
+"""Script for base classes."""
+
+import math
 from typing import Tuple
+
 import torch
-import math
 
 
 class ResidualBlock(torch.nn.Module):
-    """Residual block based on the work of Gorishniy et al., 2023
+    """ResidualBlock.
+
+    Based on the work of Gorishniy et al., 2023
     (https://arxiv.org/abs/2106.11959).
     We follow the implementation found in
-    https://github.com/Yura52/rtdl/blob/main/rtdl/nn/_backbones.py"""
+    https://github.com/Yura52/rtdl/blob/main/rtdl/nn/_backbones.py
+    """
 
-    def __init__(self, dim_input: int, dim_embedding: int = 128, p_dropout: float = 0.0):
-        """Residual block based on the work of Gorishniy et al., 2023
-        (https://arxiv.org/abs/2106.11959).
-        We follow the implementation found in
-        https://github.com/Yura52/rtdl/blob/main/rtdl/nn/_backbones.py
+    def __init__(
+        self, dim_input: int, dim_embedding: int = 128, p_dropout: float = 0.0
+    ):
+        """Init funciton.
 
         Parameters
         ----------
@@ -23,8 +28,8 @@ def __init__(self, dim_input: int, dim_embedding: int = 128, p_dropout: float =
             Embedding dimension, by default 128
         p_dropout : float, optional
             Dropout probability, by default 0.1
-        """
 
+        """
         super().__init__()
 
         self.layer_norm = torch.nn.LayerNorm(dim_input)
@@ -34,8 +39,10 @@ def __init__(self, dim_input: int, dim_embedding: int = 128, p_dropout: float =
 
         self.linear_out = torch.nn.Linear(dim_embedding, dim_input)
 
-    def forward(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Return an output of a residual block
+    def forward(
+        self, x: torch.Tensor, t: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Return an output of a residual block.
 
         Parameters
         ----------
@@ -48,8 +55,8 @@ def forward(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch
         -------
         Tuple[torch.Tensor, torch.Tensor]
             Output data at noise step t
-        """
 
+        """
         x_t = self.layer_norm(x + t)
         x_t_emb = torch.nn.functional.relu(self.linear_in(x_t))
         x_t_emb = self.dropout(x_t_emb)
@@ -59,12 +66,15 @@ def forward(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch
 
 
 class ResidualBlockTS(torch.nn.Module):
-    """Residual block based on the work of Gorishniy et al., 2023
+    """Residual block time series.
+
+    Residual block based on the work of Gorishniy et al., 2023
     (https://arxiv.org/abs/2106.11959).
     We follow the implementation found in
     https://github.com/Yura52/rtdl/blob/main/rtdl/nn/_backbones.py
     This class is for Time-Series data where we add Tranformers to
-    encode time-based/feature-based context."""
+    encode time-based/feature-based context.
+    """
 
     def __init__(
         self,
@@ -76,12 +86,7 @@ def __init__(
         nheads_time: int = 8,
         num_layers_transformer: int = 1,
     ):
-        """Residual block based on the work of Gorishniy et al., 2023
-        (https://arxiv.org/abs/2106.11959).
-        We follow the implementation found in
-        https://github.com/Yura52/rtdl/blob/main/rtdl/nn/_backbones.py
-        This class is for Time-Series data where we add Tranformers to
-        encode time-based/feature-based context.
+        """Init function.
 
         Parameters
         ----------
@@ -99,6 +104,7 @@ def __init__(
             Number of heads to encode time-based context, by default 8
         num_layers_transformer : int, optional
             Number of transformer layer, by default 1
+
         """
         super().__init__()
 
@@ -118,8 +124,10 @@ def __init__(
 
         self.linear_out = torch.nn.Linear(dim_embedding, dim_input)
 
-    def forward(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Return an output of a residual block
+    def forward(
+        self, x: torch.Tensor, t: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Return an output of a residual block.
 
         Parameters
         ----------
@@ -132,12 +140,15 @@ def forward(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch
         -------
         torch.Tensor
             Data output, noise predicted
+
         """
         batch_size, size_window, dim_emb = x.shape
 
         x_emb = self.layer_norm(x)
         x_emb_time = self.time_layer(x_emb)
-        t_emb = t.repeat(1, size_window).reshape(batch_size, size_window, dim_emb)
+        t_emb = t.repeat(1, size_window).reshape(
+            batch_size, size_window, dim_emb
+        )
 
         x_t = x + x_emb_time + t_emb
         x_t = self.linear_out(x_t)
@@ -146,11 +157,14 @@ def forward(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch
 
 
 class AutoEncoder(torch.nn.Module):
-    """Epsilon_theta model of the Algorithm 1 in
+    """Auto encoder class.
+
+    Epsilon_theta model of the Algorithm 1 in
     Ho et al., 2020 (https://arxiv.org/abs/2006.11239).
     This implementation is based on the work of
     Tashiro et al., 2021 (https://arxiv.org/abs/2107.03502).
-    Their code: https://github.com/ermongroup/CSDI/blob/main/diff_models.py"""
+    Their code: https://github.com/ermongroup/CSDI/blob/main/diff_models.py
+    """
 
     def __init__(
         self,
@@ -161,8 +175,7 @@ def __init__(
         num_blocks: int = 1,
         p_dropout: float = 0.0,
     ):
-        """Epsilon_theta model in Algorithm 1 in
-        Ho et al., 2020 (https://arxiv.org/abs/2006.11239)
+        """Init function.
 
         Parameters
         ----------
@@ -170,12 +183,15 @@ def __init__(
             Number of steps in forward/reverse processes
         dim_input : int
             Input dimension
+        residual_block: torch.nn.Module
+            residual blocks
         dim_embedding : int, optional
             Embedding dimension, by default 128
         num_blocks : int, optional
             Number of residual blocks, by default 1
         p_dropout : float, optional
             Dropout probability, by default 0.0
+
         """
         super().__init__()
 
@@ -193,10 +209,12 @@ def __init__(
         self.layer_out_2 = torch.nn.Linear(dim_embedding, dim_input)
         self.dropout_out = torch.nn.Dropout(p_dropout)
 
-        self.residual_layers = torch.nn.ModuleList([residual_block for _ in range(num_blocks)])
+        self.residual_layers = torch.nn.ModuleList(
+            [residual_block for _ in range(num_blocks)]
+        )
 
     def forward(self, x: torch.Tensor, t: torch.LongTensor) -> torch.Tensor:
-        """Predict a noise
+        """Predict a noise.
 
         Parameters
         ----------
@@ -209,6 +227,7 @@ def forward(self, x: torch.Tensor, t: torch.LongTensor) -> torch.Tensor:
         -------
         torch.Tensor
             Data output, noise predicted
+
         """
         # Noise step embedding
         t_emb = torch.as_tensor(self.embedding_noise_step)[t].squeeze()
@@ -224,15 +243,20 @@ def forward(self, x: torch.Tensor, t: torch.LongTensor) -> torch.Tensor:
             x_emb, skip_connection = layer(x_emb, t_emb)
             skip.append(skip_connection)
 
-        out = torch.sum(torch.stack(skip), dim=0) / math.sqrt(len(self.residual_layers))
+        out = torch.sum(torch.stack(skip), dim=0) / math.sqrt(
+            len(self.residual_layers)
+        )
         out = torch.nn.functional.relu(self.layer_out_1(out))
         out = self.dropout_out(out)
         out = self.layer_out_2(out)
 
         return out
 
-    def _build_embedding(self, num_noise_steps: int, dim: int = 64) -> torch.Tensor:
+    def _build_embedding(
+        self, num_noise_steps: int, dim: int = 64
+    ) -> torch.Tensor:
         """Build an embedding for noise step.
+
         More details in section E.1 of Tashiro et al., 2021
         (https://arxiv.org/abs/2107.03502)
 
@@ -247,9 +271,14 @@ def _build_embedding(self, num_noise_steps: int, dim: int = 64) -> torch.Tensor:
         -------
         torch.Tensor
             List of embeddings for noise steps
+
         """
         steps = torch.arange(num_noise_steps).unsqueeze(1)  # (T,1)
-        frequencies = 10.0 ** (torch.arange(dim) / (dim - 1) * 4.0).unsqueeze(0)  # (1,dim)
+        frequencies = 10.0 ** (torch.arange(dim) / (dim - 1) * 4.0).unsqueeze(
+            0
+        )  # (1,dim)
         table = steps * frequencies  # (T,dim)
-        table = torch.cat([torch.sin(table), torch.cos(table)], dim=1)  # (T,dim*2)
+        table = torch.cat(
+            [torch.sin(table), torch.cos(table)], dim=1
+        )  # (T,dim*2)
         return table
diff --git a/qolmat/imputations/diffusions/ddpms.py b/qolmat/imputations/diffusions/ddpms.py
index 231f870e..4f8728e9 100644
--- a/qolmat/imputations/diffusions/ddpms.py
+++ b/qolmat/imputations/diffusions/ddpms.py
@@ -1,25 +1,31 @@
-from typing import Dict, List, Callable, Tuple, Union
-from typing_extensions import Self
-import sys
-import numpy as np
-import pandas as pd
+"""Script for DDPM classes."""
+
 import time
 from datetime import timedelta
-from tqdm import tqdm
+from typing import Callable, Dict, List, Tuple, Union
 
+import numpy as np
+import pandas as pd
 import torch
-from torch.utils.data import DataLoader, TensorDataset
 from sklearn import preprocessing
 from sklearn import utils as sku
+from torch.utils.data import DataLoader, TensorDataset
+from tqdm import tqdm
 
-
-from qolmat.imputations.diffusions.base import AutoEncoder, ResidualBlock, ResidualBlockTS
+# from typing_extensions import Self
+from qolmat.benchmark import metrics, missing_patterns
+from qolmat.imputations.diffusions.base import (
+    AutoEncoder,
+    ResidualBlock,
+    ResidualBlockTS,
+)
 from qolmat.imputations.diffusions.utils import get_num_params
-from qolmat.benchmark import missing_patterns, metrics
 
 
 class TabDDPM:
-    """Diffusion model for tabular data based on
+    """Tab DDPM.
+
+    Diffusion model for tabular data based on
     Denoising Diffusion Probabilistic Models (DDPM) of
     Ho et al., 2020 (https://arxiv.org/abs/2006.11239),
     Tashiro et al., 2021 (https://arxiv.org/abs/2107.03502).
@@ -42,13 +48,7 @@ def __init__(
         is_clip: bool = True,
         random_state: Union[None, int, np.random.RandomState] = None,
     ):
-        """Diffusion model for tabular data based on
-        Denoising Diffusion Probabilistic Models (DDPM) of
-        Ho et al., 2020 (https://arxiv.org/abs/2006.11239),
-        Tashiro et al., 2021 (https://arxiv.org/abs/2107.03502).
-        This implementation follows the implementations found in
-        https://github.com/quickgrid/pytorch-diffusion/tree/main,
-        https://github.com/ermongroup/CSDI/tree/main
+        """Init function.
 
         Parameters
         ----------
@@ -70,11 +70,18 @@ def __init__(
             Dropout probability, by default 0.0
         num_sampling : int, optional
             Number of samples generated for each cell, by default 1
+        is_clip : bool, optional
+            if values have to be clipped, by default True
         random_state : int, RandomState instance or None, default=None
             Controls the randomness.
             Pass an int for reproducible output across multiple function calls.
+
         """
-        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        self.device = (
+            torch.device("cuda")
+            if torch.cuda.is_available()
+            else torch.device("cpu")
+        )
 
         # Hyper-parameters for DDPM
         # Section 2, equation 1, num_noise_steps is T.
@@ -92,7 +99,8 @@ def __init__(
         self.alpha = 1 - self.beta
         self.alpha_hat = torch.cumprod(self.alpha, dim=0)
 
-        # Section 3.2, algorithm 1 formula implementation. Generate values early reuse later.
+        # Section 3.2, algorithm 1 formula implementation.
+        # Generate values early reuse later.
         self.sqrt_alpha_hat = torch.sqrt(self.alpha_hat)
         self.sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hat)
 
@@ -117,10 +125,14 @@ def __init__(
         seed_torch = self.random_state.randint(2**31 - 1)
         torch.manual_seed(seed_torch)
 
-    def _q_sample(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Section 3.2, algorithm 1 formula implementation. Forward process, defined by `q`.
-        Found in section 2. `q` gradually adds gaussian noise according to variance schedule. Also,
-        can be seen on figure 2.
+    def _q_sample(
+        self, x: torch.Tensor, t: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Sample q.
+
+        Section 3.2, algorithm 1 formula implementation. Forward process,
+        defined by `q`. Found in section 2. `q` gradually adds gaussian noise
+        according to variance schedule. Also, can be seen on figure 2.
         Ho et al., 2020 (https://arxiv.org/abs/2006.11239)
 
         Parameters
@@ -134,8 +146,8 @@ def _q_sample(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, tor
         -------
         Tuple[torch.Tensor, torch.Tensor]
             Noised data at noise step t
-        """
 
+        """
         sqrt_alpha_hat = self.sqrt_alpha_hat[t].view(-1, 1)
         sqrt_one_minus_alpha_hat = self.sqrt_one_minus_alpha_hat[t].view(-1, 1)
 
@@ -146,16 +158,20 @@ def _set_eps_model(self) -> None:
         self._eps_model = AutoEncoder(
             num_noise_steps=self.num_noise_steps,
             dim_input=self.dim_input,
-            residual_block=ResidualBlock(self.dim_embedding, self.dim_embedding, self.p_dropout),
+            residual_block=ResidualBlock(
+                self.dim_embedding, self.dim_embedding, self.p_dropout
+            ),
             dim_embedding=self.dim_embedding,
             num_blocks=self.num_blocks,
             p_dropout=self.p_dropout,
         ).to(self.device)
 
-        self.optimiser = torch.optim.Adam(self._eps_model.parameters(), lr=self.lr)
+        self.optimiser = torch.optim.Adam(
+            self._eps_model.parameters(), lr=self.lr
+        )
 
     def _print_valid(self, epoch: int, time_duration: float) -> None:
-        """Print model performance on validation data
+        """Print model performance on validation data.
 
         Parameters
         ----------
@@ -163,22 +179,31 @@ def _print_valid(self, epoch: int, time_duration: float) -> None:
             Epoch of the printed performance
         time_duration : float
             Duration for training step
+
         """
         self.time_durations.append(time_duration)
         print_step = 1 if int(self.epochs / 10) == 0 else int(self.epochs / 10)
         if self.print_valid and epoch == 0:
-            print(f"Num params of {self.__class__.__name__}: {self.num_params}")
+            print(
+                f"Num params of {self.__class__.__name__}: {self.num_params}"
+            )
         if self.print_valid and epoch % print_step == 0:
             string_valid = f"Epoch {epoch}: "
             for s in self.summary:
-                string_valid += f" {s}={round(self.summary[s][epoch], self.round)}"
+                string_valid += (
+                    f" {s}={round(self.summary[s][epoch], self.round)}"
+                )
             # string_valid += f" | in {round(time_duration, 3)} secs"
-            remaining_duration = np.mean(self.time_durations) * (self.epochs - epoch)
-            string_valid += f" | remaining {timedelta(seconds=remaining_duration)}"
+            remaining_duration = np.mean(self.time_durations) * (
+                self.epochs - epoch
+            )
+            string_valid += (
+                f" | remaining {timedelta(seconds=remaining_duration)}"
+            )
             print(string_valid)
 
     def _impute(self, x: np.ndarray, x_mask_obs: np.ndarray) -> np.ndarray:
-        """Impute data array
+        """Impute data array.
 
         Parameters
         ----------
@@ -191,6 +216,7 @@ def _impute(self, x: np.ndarray, x_mask_obs: np.ndarray) -> np.ndarray:
         -------
         np.ndarray
             Imputed data
+
         """
         x_tensor = torch.from_numpy(x).float().to(self.device)
         x_mask_tensor = torch.from_numpy(x_mask_obs).float().to(self.device)
@@ -207,37 +233,55 @@ def _impute(self, x: np.ndarray, x_mask_obs: np.ndarray) -> np.ndarray:
 
                 for i in reversed(range(1, self.num_noise_steps)):
                     t = (
-                        torch.ones((x_batch.size(dim=0), 1), dtype=torch.long, device=self.device)
+                        torch.ones(
+                            (x_batch.size(dim=0), 1),
+                            dtype=torch.long,
+                            device=self.device,
+                        )
                         * i
                     )
                     if len(x_batch.size()) == 3:
-                        # Data are splited into chunks (i.e., Time-series data), a window of rows
+                        # Data are splited into chunks
+                        # (i.e., Time-series data),
+                        # a window of rows
                         # is processed.
                         sqrt_alpha_t = self.sqrt_alpha[t].view(-1, 1, 1)
                         beta_t = self.beta[t].view(-1, 1, 1)
-                        sqrt_one_minus_alpha_hat_t = self.sqrt_one_minus_alpha_hat[t].view(
-                            -1, 1, 1
+                        sqrt_one_minus_alpha_hat_t = (
+                            self.sqrt_one_minus_alpha_hat[t].view(-1, 1, 1)
                         )
                         epsilon_t = self.std_beta[t].view(-1, 1, 1)
                     else:
                         # Each row of data is separately processed.
                         sqrt_alpha_t = self.sqrt_alpha[t].view(-1, 1)
                         beta_t = self.beta[t].view(-1, 1)
-                        sqrt_one_minus_alpha_hat_t = self.sqrt_one_minus_alpha_hat[t].view(-1, 1)
+                        sqrt_one_minus_alpha_hat_t = (
+                            self.sqrt_one_minus_alpha_hat[t].view(-1, 1)
+                        )
                         epsilon_t = self.std_beta[t].view(-1, 1)
 
-                    random_noise = torch.randn_like(noise) if i > 1 else torch.zeros_like(noise)
+                    random_noise = (
+                        torch.randn_like(noise)
+                        if i > 1
+                        else torch.zeros_like(noise)
+                    )
 
                     noise = (
                         (1 / sqrt_alpha_t)
                         * (
                             noise
-                            - ((beta_t / sqrt_one_minus_alpha_hat_t) * self._eps_model(noise, t))
+                            - (
+                                (beta_t / sqrt_one_minus_alpha_hat_t)
+                                * self._eps_model(noise, t)
+                            )
                         )
                     ) + (epsilon_t * random_noise)
-                    noise = mask_x_batch * x_batch + (1.0 - mask_x_batch) * noise
+                    noise = (
+                        mask_x_batch * x_batch + (1.0 - mask_x_batch) * noise
+                    )
 
-                # Generate data output, this activation function depends on normalizer_x
+                # Generate data output, this activation function depends on
+                # normalizer_x
                 x_out = noise.detach().cpu().numpy()
                 outputs.append(x_out)
 
@@ -252,7 +296,7 @@ def _eval(
         x_mask_obs_df: pd.DataFrame,
         x_indices: List,
     ) -> Dict:
-        """Evaluate the model
+        """Evaluate the model.
 
         Parameters
         ----------
@@ -271,8 +315,8 @@ def _eval(
         -------
         Dict
             Scores
-        """
 
+        """
         list_x_imputed = []
         for i in tqdm(range(self.num_sampling), disable=True, leave=False):
             x_imputed = self._impute(x, x_mask_obs)
@@ -289,7 +333,9 @@ def _eval(
         x_final.loc[x_out.index] = x_out.loc[x_out.index]
 
         x_mask_imputed_df = ~x_mask_obs_df
-        columns_with_True = x_mask_imputed_df.columns[(x_mask_imputed_df == True).any()]
+        columns_with_True = x_mask_imputed_df.columns[
+            (x_mask_imputed_df).any()
+        ]
         scores = {}
         for metric in self.metrics_valid:
             scores[metric.__name__] = metric(
@@ -300,9 +346,12 @@ def _eval(
         return scores
 
     def _process_data(
-        self, x: pd.DataFrame, mask: pd.DataFrame = None, is_training: bool = False
+        self,
+        x: pd.DataFrame,
+        mask: pd.DataFrame = None,
+        is_training: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, List]:
-        """Pre-process data
+        """Pre-process data.
 
         Parameters
         ----------
@@ -317,10 +366,13 @@ def _process_data(
         -------
         Tuple[np.ndarray, np.ndarray]
             Data and mask pre-processed
+
         """
         if is_training:
             self.normalizer_x.fit(x.values)
-        x_windows_processed = self.normalizer_x.transform(x.fillna(x.mean()).values)
+        x_windows_processed = self.normalizer_x.transform(
+            x.fillna(x.mean()).values
+        )
         x_windows_mask_processed = ~x.isna().to_numpy()
         if mask is not None:
             x_windows_mask_processed = mask.to_numpy()
@@ -332,7 +384,9 @@ def _process_reversely_data(
     ):
         x_normalized = self.normalizer_x.inverse_transform(x_imputed)
         x_normalized = x_normalized[: x_input.shape[0]]
-        x_out = pd.DataFrame(x_normalized, columns=self.columns, index=x_input.index)
+        x_out = pd.DataFrame(
+            x_normalized, columns=self.columns, index=x_input.index
+        )
 
         x_final = x_input.copy()
         x_final.loc[x_out.index] = x_out.loc[x_out.index]
@@ -352,8 +406,8 @@ def fit(
         ),
         round: int = 10,
         cols_imputed: Tuple[str, ...] = (),
-    ) -> Self:
-        """Fit data
+    ) -> "TabDDPM":
+        """Fit data.
 
         Parameters
         ----------
@@ -368,8 +422,8 @@ def fit(
         x_valid : pd.DataFrame, optional
             Dataframe for validation, by default None
         metrics_valid : Tuple[Callable, ...], optional
-            Set of validation metrics, by default ( metrics.mean_absolute_error,
-            metrics.dist_wasserstein )
+            Set of validation metrics, by default (metrics.mean_absolute_error,
+            metrics.dist_wasserstein)
         round : int, optional
             Number of decimal places to round to, for better displaying model
             performance, by default 10
@@ -380,10 +434,12 @@ def fit(
         ------
         ValueError
             Batch size is larger than data size
+
         Returns
         -------
         Self
             Return Self
+
         """
         self.dim_input = len(x.columns)
         self.epochs = epochs
@@ -398,23 +454,29 @@ def fit(
 
         if len(self.cols_imputed) != 0:
             self.cols_idx_not_imputed = [
-                idx for idx, col in enumerate(self.columns) if col not in self.cols_imputed
+                idx
+                for idx, col in enumerate(self.columns)
+                if col not in self.cols_imputed
             ]
 
-        self.interval_x = {col: [x[col].min(), x[col].max()] for col in self.columns}
+        self.interval_x = {
+            col: [x[col].min(), x[col].max()] for col in self.columns
+        }
 
         # x_mask: 1 for observed values, 0 for nan
         x_processed, x_mask, _ = self._process_data(x, is_training=True)
 
         if self.batch_size > x_processed.shape[0]:
             raise ValueError(
-                f"Batch size {self.batch_size} larger than size of pre-processed x"
-                + f" size={x_processed.shape[0]}. Please reduce batch_size."
-                + " In the case of TabDDPMTS, you can also reduce freq_str."
+                f"Batch size {self.batch_size} larger than size of "
+                "pre-processed x "
+                f"size={x_processed.shape[0]}. Please reduce batch_size. "
+                "In the case of TabDDPMTS, you can also reduce freq_str."
             )
 
         if x_valid is not None:
-            # We reuse the UniformHoleGenerator to generate artificial holes (with one mask)
+            # We reuse the UniformHoleGenerator to generate artificial holes
+            # (with one mask)
             # in validation dataset
             x_valid_mask = missing_patterns.UniformHoleGenerator(
                 n_splits=1, ratio_masked=self.ratio_nan
@@ -425,7 +487,9 @@ def fit(
                 x_processed_valid,
                 x_processed_valid_obs_mask,
                 x_processed_valid_indices,
-            ) = self._process_data(x_valid, x_valid_obs_mask, is_training=False)
+            ) = self._process_data(
+                x_valid, x_valid_obs_mask, is_training=False
+            )
 
         x_tensor = torch.from_numpy(x_processed).float().to(self.device)
         x_mask_tensor = torch.from_numpy(x_mask).float().to(self.device)
@@ -447,7 +511,10 @@ def fit(
             time_start = time.time()
             self._eps_model.train()
             for id_batch, (x_batch, mask_x_batch) in enumerate(dataloader):
-                mask_obs_rand = torch.FloatTensor(mask_x_batch.size()).uniform_() > self.ratio_nan
+                mask_obs_rand = (
+                    torch.FloatTensor(mask_x_batch.size()).uniform_()
+                    > self.ratio_nan
+                )
                 for col in self.cols_idx_not_imputed:
                     mask_obs_rand[:, col] = 0.0
                 mask_x_batch = mask_x_batch * mask_obs_rand.to(self.device)
@@ -461,7 +528,9 @@ def fit(
                 )
                 x_batch_t, noise = self._q_sample(x=x_batch, t=t)
                 predicted_noise = self._eps_model(x=x_batch_t, t=t)
-                loss = (self.loss_func(predicted_noise, noise) * mask_x_batch).mean()
+                loss = (
+                    self.loss_func(predicted_noise, noise) * mask_x_batch
+                ).mean()
                 loss.backward()
                 self.optimiser.step()
                 loss_epoch += loss.item()
@@ -487,7 +556,7 @@ def fit(
         return self
 
     def predict(self, x: pd.DataFrame) -> pd.DataFrame:
-        """Predict/impute data
+        """Predict/impute data.
 
         Parameters
         ----------
@@ -498,10 +567,13 @@ def predict(self, x: pd.DataFrame) -> pd.DataFrame:
         -------
         pd.DataFrame
             Imputed data
+
         """
         self._eps_model.eval()
 
-        x_processed, x_mask, x_indices = self._process_data(x, is_training=False)
+        x_processed, x_mask, x_indices = self._process_data(
+            x, is_training=False
+        )
 
         list_x_imputed = []
         for i in tqdm(range(self.num_sampling), leave=False):
@@ -519,7 +591,9 @@ def predict(self, x: pd.DataFrame) -> pd.DataFrame:
 
 
 class TsDDPM(TabDDPM):
-    """Diffusion model for time-series data based on
+    """Time series DDPM.
+
+    Diffusion model for time-series data based on
     Denoising Diffusion Probabilistic Models (DDPMs) of
     Ho et al., 2020 (https://arxiv.org/abs/2006.11239),
     Tashiro et al., 2021 (https://arxiv.org/abs/2107.03502).
@@ -546,12 +620,7 @@ def __init__(
         is_rolling: bool = False,
         random_state: Union[None, int, np.random.RandomState] = None,
     ):
-        """Diffusion model for time-series data based on the works of
-        Ho et al., 2020 (https://arxiv.org/abs/2006.11239),
-        Tashiro et al., 2021 (https://arxiv.org/abs/2107.03502).
-        This implementation follows the implementations found in
-        https://github.com/quickgrid/pytorch-diffusion/tree/main,
-        https://github.com/ermongroup/CSDI/tree/main
+        """Init function.
 
         Parameters
         ----------
@@ -582,10 +651,12 @@ def __init__(
         num_sampling : int, optional
             Number of samples generated for each cell, by default 1
         is_rolling : bool, optional
-            Use pandas.DataFrame.rolling for preprocessing data, by default False
+            Use pandas.DataFrame.rolling for preprocessing data,
+            by default False
         random_state : int, RandomState instance or None, default=None
             Controls the randomness.
             Pass an int for reproducible output across multiple function calls.
+
         """
         super().__init__(
             num_noise_steps,
@@ -606,10 +677,14 @@ def __init__(
         self.num_layers_transformer = num_layers_transformer
         self.is_rolling = is_rolling
 
-    def _q_sample(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Section 3.2, algorithm 1 formula implementation. Forward process, defined by `q`.
-        Found in section 2. `q` gradually adds gaussian noise according to variance schedule. Also,
-        can be seen on figure 2.
+    def _q_sample(
+        self, x: torch.Tensor, t: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Sample q.
+
+        Section 3.2, algorithm 1 formula implementation. Forward process,
+        defined by `q`. Found in section 2. `q` gradually adds gaussian noise
+        according to variance schedule. Also, can be seen on figure 2.
 
         Parameters
         ----------
@@ -622,10 +697,12 @@ def _q_sample(self, x: torch.Tensor, t: torch.Tensor) -> Tuple[torch.Tensor, tor
         -------
         Tuple[torch.Tensor, torch.Tensor]
             Noised data at noise step t
-        """
 
+        """
         sqrt_alpha_hat = self.sqrt_alpha_hat[t].view(-1, 1, 1)
-        sqrt_one_minus_alpha_hat = self.sqrt_one_minus_alpha_hat[t].view(-1, 1, 1)
+        sqrt_one_minus_alpha_hat = self.sqrt_one_minus_alpha_hat[t].view(
+            -1, 1, 1
+        )
 
         epsilon = torch.randn_like(x, device=self.device)
         return sqrt_alpha_hat * x + sqrt_one_minus_alpha_hat * epsilon, epsilon
@@ -648,12 +725,17 @@ def _set_eps_model(self):
             p_dropout=self.p_dropout,
         ).to(self.device)
 
-        self.optimiser = torch.optim.Adam(self._eps_model.parameters(), lr=self.lr)
+        self.optimiser = torch.optim.Adam(
+            self._eps_model.parameters(), lr=self.lr
+        )
 
     def _process_data(
-        self, x: pd.DataFrame, mask: pd.DataFrame = None, is_training: bool = False
+        self,
+        x: pd.DataFrame,
+        mask: pd.DataFrame = None,
+        is_training: bool = False,
     ) -> Tuple[np.ndarray, np.ndarray, List]:
-        """Pre-process data
+        """Pre-process data.
 
         Parameters
         ----------
@@ -668,30 +750,45 @@ def _process_data(
         -------
         Tuple[np.ndarray, np.ndarray]
             Data and mask pre-processed
+
         """
         if is_training:
             self.normalizer_x.fit(x.values)
 
         x_windows: List = []
         x_windows_indices: List = []
-        columns_index = [col for col in x.index.names if col != self.index_datetime]
+        columns_index = [
+            col for col in x.index.names if col != self.index_datetime
+        ]
         if is_training:
             if self.is_rolling:
                 if self.print_valid:
                     print(
-                        "Preprocessing data with sliding window (pandas.DataFrame.rolling)"
-                        + " can require more times than usual. Please be patient!"
+                        "Preprocessing data with sliding window "
+                        "(pandas.DataFrame.rolling) "
+                        "can require more times than usual. "
+                        "Please be patient!"
                     )
                 if len(columns_index) == 0:
                     x_windows = x.rolling(window=self.freq_str)
                 else:
-                    columns_index_ = columns_index[0] if len(columns_index) == 1 else columns_index
-                    for x_group in tqdm(x.groupby(by=columns_index_), disable=True, leave=False):
+                    columns_index_ = (
+                        columns_index[0]
+                        if len(columns_index) == 1
+                        else columns_index
+                    )
+                    for x_group in tqdm(
+                        x.groupby(by=columns_index_), disable=True, leave=False
+                    ):
                         x_windows += list(
-                            x_group[1].droplevel(columns_index).rolling(window=self.freq_str)
+                            x_group[1]
+                            .droplevel(columns_index)
+                            .rolling(window=self.freq_str)
                         )
             else:
-                for x_w in x.resample(rule=self.freq_str, level=self.index_datetime):
+                for x_w in x.resample(
+                    rule=self.freq_str, level=self.index_datetime
+                ):
                     x_windows.append(x_w[1])
         else:
             if self.is_rolling:
@@ -703,23 +800,43 @@ def _process_data(
                             x_windows.append(x_rolling)
                             x_windows_indices.append(x_rolling.index)
                 else:
-                    columns_index_ = columns_index[0] if len(columns_index) == 1 else columns_index
-                    for x_group in tqdm(x.groupby(by=columns_index_), disable=True, leave=False):
-                        x_group_index = [x_group[0]] if len(columns_index) == 1 else x_group[0]
+                    columns_index_ = (
+                        columns_index[0]
+                        if len(columns_index) == 1
+                        else columns_index
+                    )
+                    for x_group in tqdm(
+                        x.groupby(by=columns_index_), disable=True, leave=False
+                    ):
+                        x_group_index = (
+                            [x_group[0]]
+                            if len(columns_index) == 1
+                            else x_group[0]
+                        )
                         x_group_value = x_group[1].droplevel(columns_index)
-                        indices_nan = x_group_value.loc[x_group_value.isna().any(axis=1), :].index
-                        x_group_rolling = x_group_value.rolling(window=self.freq_str)
+                        indices_nan = x_group_value.loc[
+                            x_group_value.isna().any(axis=1), :
+                        ].index
+                        x_group_rolling = x_group_value.rolling(
+                            window=self.freq_str
+                        )
                         for x_rolling in x_group_rolling:
                             if x_rolling.index[-1] in indices_nan:
                                 x_windows.append(x_rolling)
                                 x_rolling_ = x_rolling.copy()
                                 for idx, col in enumerate(columns_index):
                                     x_rolling_[col] = x_group_index[idx]
-                                x_rolling_ = x_rolling_.set_index(columns_index, append=True)
-                                x_rolling_ = x_rolling_.reorder_levels(x.index.names)
+                                x_rolling_ = x_rolling_.set_index(
+                                    columns_index, append=True
+                                )
+                                x_rolling_ = x_rolling_.reorder_levels(
+                                    x.index.names
+                                )
                                 x_windows_indices.append(x_rolling_.index)
             else:
-                for x_w in x.resample(rule=self.freq_str, level=self.index_datetime):
+                for x_w in x.resample(
+                    rule=self.freq_str, level=self.index_datetime
+                ):
                     x_windows.append(x_w[1])
                     x_windows_indices.append(x_w[1].index)
 
@@ -736,7 +853,12 @@ def _process_data(
             if x_w_shape[0] < self.size_window:
                 npad = [(0, self.size_window - x_w_shape[0]), (0, 0)]
                 x_w_norm = np.pad(x_w_norm, pad_width=npad, mode="wrap")
-                x_w_mask = np.pad(x_w_mask, pad_width=npad, mode="constant", constant_values=1)
+                x_w_mask = np.pad(
+                    x_w_mask,
+                    pad_width=npad,
+                    mode="constant",
+                    constant_values=1,
+                )
 
             x_windows_processed.append(x_w_norm)
             x_windows_mask_processed.append(x_w_mask)
@@ -750,10 +872,19 @@ def _process_data(
                 x_m_shape = x_m.shape
                 if x_m_shape[0] < self.size_window:
                     npad = [(0, self.size_window - x_m_shape[0]), (0, 0)]
-                    x_m_mask = np.pad(x_m_mask, pad_width=npad, mode="constant", constant_values=1)
+                    x_m_mask = np.pad(
+                        x_m_mask,
+                        pad_width=npad,
+                        mode="constant",
+                        constant_values=1,
+                    )
                 x_windows_mask_processed.append(x_m_mask)
 
-        return np.array(x_windows_processed), np.array(x_windows_mask_processed), x_windows_indices
+        return (
+            np.array(x_windows_processed),
+            np.array(x_windows_mask_processed),
+            x_windows_indices,
+        )
 
     def _process_reversely_data(
         self, x_imputed: np.ndarray, x_input: pd.DataFrame, x_indices: List
@@ -766,9 +897,13 @@ def _process_reversely_data(
             x_indices_nan_only.append(x_indices_batch[imputed_index])
 
         if len(np.shape(x_indices_nan_only)) == 1:
-            x_out_index = pd.Index(x_indices_nan_only, name=x_input.index.names[0])
+            x_out_index = pd.Index(
+                x_indices_nan_only, name=x_input.index.names[0]
+            )
         else:
-            x_out_index = pd.MultiIndex.from_tuples(x_indices_nan_only, names=x_input.index.names)
+            x_out_index = pd.MultiIndex.from_tuples(
+                x_indices_nan_only, names=x_input.index.names
+            )
         x_normalized = self.normalizer_x.inverse_transform(x_imputed_nan_only)
         x_out = pd.DataFrame(
             x_normalized,
@@ -796,8 +931,8 @@ def fit(
         cols_imputed: Tuple[str, ...] = (),
         index_datetime: str = "",
         freq_str: str = "1D",
-    ) -> Self:
-        """Fit data
+    ) -> "TsDDPM":
+        """Fit data.
 
         Parameters
         ----------
@@ -812,8 +947,8 @@ def fit(
         x_valid : pd.DataFrame, optional
             Dataframe for validation, by default None
         metrics_valid : Tuple[Callable, ...], optional
-            Set of validation metrics, by default ( metrics.mean_absolute_error,
-            metrics.dist_wasserstein )
+            Set of validation metrics, by default (metrics.mean_absolute_error,
+            metrics.dist_wasserstein)
         round : int, optional
             Number of decimal places to round to, by default 10
         cols_imputed : Tuple[str, ...], optional
@@ -822,19 +957,23 @@ def fit(
             Name of datetime-like index
         freq_str : str
             Frequency string of DateOffset of Pandas
+
         Raises
         ------
         ValueError
             Batch size is larger than data size
+
         Returns
         -------
         Self
             Return Self
+
         """
         if index_datetime == "":
             raise ValueError(
-                "Please set the params index_datetime (the name of datatime-like index column)."
-                + f" Suggestions: {x.index.names}"
+                "Please set the params index_datetime "
+                "(the name of datatime-like index column). "
+                f" Suggestions: {x.index.names}"
             )
         self.index_datetime = index_datetime
         self.freq_str = freq_str
diff --git a/qolmat/imputations/diffusions/utils.py b/qolmat/imputations/diffusions/utils.py
index c67a2f5f..eb24fb4f 100644
--- a/qolmat/imputations/diffusions/utils.py
+++ b/qolmat/imputations/diffusions/utils.py
@@ -1,9 +1,11 @@
+"""Utils for diffusion imputers."""
+
 import numpy as np
 import torch
 
 
 def get_num_params(model: torch.nn.Module) -> int:
-    """Get the total number of parameters of a model
+    """Get the total number of parameters of a model.
 
     Parameters
     ----------
@@ -14,6 +16,7 @@ def get_num_params(model: torch.nn.Module) -> int:
     -------
     float
         the total number of parameters
+
     """
     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
     params = sum([np.prod(p.size()) for p in model_parameters])
diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py
index 463add50..eba85062 100644
--- a/qolmat/imputations/em_sampler.py
+++ b/qolmat/imputations/em_sampler.py
@@ -1,6 +1,8 @@
+"""Script for EM imputation."""
+
+import warnings
 from abc import abstractmethod
 from typing import Dict, List, Literal, Tuple, Union
-import warnings
 
 import numpy as np
 from numpy.typing import NDArray
@@ -8,15 +10,17 @@
 from scipy import optimize as spo
 from sklearn import utils as sku
 from sklearn.base import BaseEstimator, TransformerMixin
-from typing_extensions import Self
 
+# from typing_extensions import Self
 from qolmat.utils import utils
 
 
 def _conjugate_gradient(A: NDArray, X: NDArray, mask: NDArray) -> NDArray:
-    """
-    Minimize Tr(X.T AX) wrt X where X is constrained to the initial value outside the given mask
-    To this aim, we compute in parallel a gradient algorithm for each row.
+    """Compute conjugate gradient.
+
+    Minimize Tr(X.T AX) wrt X where X is constrained to the initial value
+    outside the given mask To this aim, we compute in parallel a gradient
+    algorithm for each row.
 
     Parameters
     ----------
@@ -25,12 +29,14 @@ def _conjugate_gradient(A: NDArray, X: NDArray, mask: NDArray) -> NDArray:
     X : NDArray
         Array containing the values to optimize
     mask : NDArray
-        Boolean array indicating if a value of X is a variable of the optimization
+        Boolean array indicating if a value of X is a variable of
+        the optimization
 
     Returns
     -------
     NDArray
         Minimized array.
+
     """
     rows_imputed = mask.any(axis=1)
     X_temp = X[rows_imputed, :].copy()
@@ -44,7 +50,7 @@ def _conjugate_gradient(A: NDArray, X: NDArray, mask: NDArray) -> NDArray:
     alphan = np.zeros(n_rows)
     betan = np.zeros(n_rows)
     for n in range(n_iter + 2):
-        # if np.max(np.sum(rn**2)) < tolerance : # Condition de sortie " usuelle "
+        # if np.max(np.sum(rn**2)) < tolerance :
         #     X_temp[mask_isna] = xn[mask_isna]
         #     return X_temp.transpose()
         Apn = pn @ A
@@ -53,14 +59,18 @@ def _conjugate_gradient(A: NDArray, X: NDArray, mask: NDArray) -> NDArray:
         denominator = np.sum(pn * Apn, axis=1)
         not_converged = denominator != 0
         # we stop updating if convergence is reached for this row
-        alphan[not_converged] = numerator[not_converged] / denominator[not_converged]
+        alphan[not_converged] = (
+            numerator[not_converged] / denominator[not_converged]
+        )
 
         xn, rnp1 = xn + pn * alphan[:, None], rn - Apn * alphan[:, None]
         numerator = np.sum(rnp1**2, axis=1)
         denominator = np.sum(rn**2, axis=1)
         not_converged = denominator != 0
         # we stop updating if convergence is reached for this row
-        betan[not_converged] = numerator[not_converged] / denominator[not_converged]
+        betan[not_converged] = (
+            numerator[not_converged] / denominator[not_converged]
+        )
 
         pn, rn = rnp1 + pn * betan[:, None], rnp1
 
@@ -71,8 +81,12 @@ def _conjugate_gradient(A: NDArray, X: NDArray, mask: NDArray) -> NDArray:
     return X_final
 
 
-def max_diff_Linf(list_params: List[NDArray], n_steps: int, order: int = 1) -> float:
-    """Computes the maximal L infinity norm between the `n_steps` last elements spaced by order.
+def max_diff_Linf(
+    list_params: List[NDArray], n_steps: int, order: int = 1
+) -> float:
+    """Compute the maximal L infinity norm.
+
+    Computed between the `n_steps` last elements spaced by order.
     Used to compute the stop criterion.
 
     Parameters
@@ -88,6 +102,7 @@ def max_diff_Linf(list_params: List[NDArray], n_steps: int, order: int = 1) -> f
     -------
     float
         Minimal norm of differences
+
     """
     params = np.stack(list_params[-n_steps - order : -order])
     params_shift = np.stack(list_params[-n_steps:])
@@ -96,8 +111,9 @@ def max_diff_Linf(list_params: List[NDArray], n_steps: int, order: int = 1) -> f
 
 
 class EM(BaseEstimator, TransformerMixin):
-    """
-    Generic abstract class for missing values imputation through EM optimization and
+    """Abstract class for EM imputatoin.
+
+    It uses imputation through EM optimization and
     a projected MCMC sampling process.
 
     Parameters
@@ -110,30 +126,35 @@ class EM(BaseEstimator, TransformerMixin):
         Number of iterations for the Gibbs sampling method (+ noise addition),
         necessary for convergence, by default 50.
     n_samples : int, optional
-        Number of data samples used to estimate the parameters of the distribution. Default, 10
+        Number of data samples used to estimate the parameters of the
+        distribution. Default, 10
     ampli : float, optional
         Whether to sample the posterior (1)
         or to maximise likelihood (0), by default 1.
     random_state : int, optional
-        The seed of the pseudo random number generator to use, for reproductibility.
+        The seed of the pseudo random number generator to use,
+        for reproductibility.
     dt : float, optional
-        Process integration time step, a large value increases the sample bias and can make
-        the algorithm unstable, but compensates for a smaller n_iter_ou. By default, 2e-2.
+        Process integration time step, a large value increases the sample bias
+        and can make the algorithm unstable, but compensates for a
+        smaller n_iter_ou. By default, 2e-2.
     tolerance : float, optional
-        Threshold below which a L infinity norm difference indicates the convergence of the
-        parameters
-    stagnation_threshold : float, optional
-        Threshold below which a stagnation of the L infinity norm difference indicates the
+        Threshold below which a L infinity norm difference indicates the
         convergence of the parameters
+    stagnation_threshold : float, optional
+        Threshold below which a stagnation of the L infinity norm difference
+        indicates the convergence of the parameters
     stagnation_loglik : float, optional
-        Threshold below which an absolute difference of the log likelihood indicates the
-        convergence of the parameters
+        Threshold below which an absolute difference of the log likelihood
+        indicates the convergence of the parameters
     min_std: float, optional
-        Threshold below which the initial data matrix is considered ill-conditioned
+        Threshold below which the initial data matrix is considered
+        ill-conditioned
     period : int, optional
         Integer used to fold the temporal data periodically
     verbose : bool, optional
         Verbosity level, if False the warnings are silenced
+
     """
 
     def __init__(
@@ -153,7 +174,10 @@ def __init__(
         verbose: bool = False,
     ):
         if method not in ["mle", "sample"]:
-            raise ValueError(f"`method` must be 'mle' or 'sample', provided value is '{method}'")
+            raise ValueError(
+                "`method` must be 'mle' or 'sample', "
+                f"provided value is '{method}'."
+            )
 
         self.method = method
         self.max_iter_em = max_iter_em
@@ -180,38 +204,73 @@ def _check_convergence(self) -> bool:
 
     @abstractmethod
     def reset_learned_parameters(self):
+        """Reset learned parameters."""
         pass
 
     @abstractmethod
     def update_parameters(self, X: NDArray):
+        """Update parameters."""
         pass
 
     @abstractmethod
     def combine_parameters(self):
+        """Combine parameters."""
         pass
 
     def fit_parameters(self, X: NDArray):
+        """Fir parameters.
+
+        Parameters
+        ----------
+        __________
+        X: NDArray
+            Array to compute the parameters.
+
+        """
         self.reset_learned_parameters()
         self.update_parameters(X)
         self.combine_parameters()
 
     def fit_parameters_with_missingness(self, X: NDArray):
-        """
-        First estimation of the model parameters based on data with missing values.
+        """Fit the first estimation of the model parameters.
+
+        It is based on data with missing values.
 
         Parameters
         ----------
         X : NDArray
             Data matrix with missingness
+
         """
         X_imp = self.init_imputation(X)
         self.fit_parameters(X_imp)
 
     def update_criteria_stop(self, X: NDArray):
+        """Update the stopping criteria based on X.
+
+        Parameters
+        ----------
+        X : NDArray
+            array used to compute log likelihood.
+
+        """
         self.loglik = self.get_loglikelihood(X)
 
     @abstractmethod
     def get_loglikelihood(self, X: NDArray) -> float:
+        """Compute the loglikelihood of an array.
+
+        Parameters
+        ----------
+        X : NDArray
+            Input array.
+
+        Returns
+        -------
+        float
+            log-likelihood.
+
+        """
         return 0
 
     @abstractmethod
@@ -219,10 +278,24 @@ def gradient_X_loglik(
         self,
         X: NDArray,
     ) -> NDArray:
+        """Compute the gradient X loglik.
+
+        Parameters
+        ----------
+        X : NDArray
+            input array
+
+        Returns
+        -------
+        NDArray
+            gradient
+
+        """
         return np.empty  # type: ignore #noqa
 
     def get_gamma(self, n_cols: int) -> NDArray:
-        """
+        """Get gamma.
+
         Normalization matrix in the sampling process.
 
         Parameters
@@ -234,6 +307,7 @@ def get_gamma(self, n_cols: int) -> NDArray:
         -------
         NDArray
             Gamma matrix
+
         """
         # return np.ones((1, n_cols))
         return np.eye(n_cols)
@@ -246,13 +320,14 @@ def _maximize_likelihood(self, X: NDArray, mask_na: NDArray) -> NDArray:
         X : NDArray
             Input numpy array without missingness
         mask_na : NDArray
-            Boolean dataframe indicating which coefficients should be resampled, and are therefore
-            the variables of the optimization
+            Boolean dataframe indicating which coefficients should be
+            resampled, and are therefore the variables of the optimization
 
         Returns
         -------
         NDArray
             DataFrame with imputed values.
+
         """
 
         def fun_obj(x):
@@ -267,7 +342,8 @@ def fun_jac(x):
             grad_x = grad_x[mask_na]
             return grad_x
 
-        # the method BFGS is much slower, probabily not adapted to the high-dimension setting
+        # the method BFGS is much slower, probabily not adapted
+        # to the high-dimension setting
         res = spo.minimize(fun_obj, X[mask_na], jac=fun_jac, method="CG")
         x = res.x
 
@@ -281,27 +357,31 @@ def _sample_ou(
         mask_na: NDArray,
         estimate_params: bool = True,
     ) -> NDArray:
-        """
-        Samples the Gaussian distribution under the constraint that not na values must remain
+        """Sample the Gaussian distribution.
+
+        Under the constraint that not na values must remain
         unchanged, using a projected Ornstein-Uhlenbeck process.
-        The sampled distribution tends to the target distribution in the limit dt -> 0 and
-        n_iter_ou x dt -> infty.
+        The sampled distribution tends to the target distribution
+        in the limit dt -> 0 and n_iter_ou x dt -> infty.
 
         Parameters
         ----------
-        df : NDArray
-            Inital dataframe to be imputed, which should have been already imputed using a simple
-            method. This first imputation will be used as an initial guess.
+        X : NDArray
+            Inital dataframe to be imputed, which should have been already
+            imputed using a simple method. This first imputation will be used
+            as an initial guess.
         mask_na : NDArray
-            Boolean dataframe indicating which coefficients should be resampled.
+            Boolean dataframe indicating which coefficients should be
+            resampled.
         estimate_params : bool
-            Indicates if the parameters of the distribution should be estimated while the data are
-            sampled.
+            Indicates if the parameters of the distribution should be estimated
+            while the data are sampled.
 
         Returns
         -------
         NDArray
             Sampled data matrix
+
         """
         X_copy = X.copy()
         n_rows, n_cols = X_copy.shape
@@ -314,7 +394,10 @@ def _sample_ou(
         for i in range(self.n_iter_ou):
             noise = self.ampli * self.rng.normal(0, 1, size=(n_rows, n_cols))
             grad_X = -self.gradient_X_loglik(X_copy)
-            X_copy += -self.dt * grad_X @ gamma + np.sqrt(2 * self.dt) * noise @ sqrt_gamma
+            X_copy += (
+                -self.dt * grad_X @ gamma
+                + np.sqrt(2 * self.dt) * noise @ sqrt_gamma
+            )
             X_copy[~mask_na] = X_init[~mask_na]
             if estimate_params:
                 self.update_parameters(X_copy)
@@ -322,6 +405,14 @@ def _sample_ou(
         return X_copy
 
     def fit_X(self, X: NDArray) -> None:
+        """Ft X array.
+
+        Parameters
+        ----------
+        X : NDArray
+            Input array.
+
+        """
         mask_na = np.isnan(X)
 
         # first imputation
@@ -351,14 +442,14 @@ def fit_X(self, X: NDArray) -> None:
         self.dict_criteria_stop = {key: [] for key in self.dict_criteria_stop}
         self.X = X
 
-    def fit(self, X: NDArray) -> Self:
-        """
-        Fit the statistical distribution with the input X array.
+    def fit(self, X: NDArray) -> "EM":
+        """Fit the statistical distribution with the input X array.
 
         Parameters
         ----------
         X : NDArray
             Numpy array to be imputed
+
         """
         X = X.copy()
         self.shape_original = X.shape
@@ -394,8 +485,7 @@ def fit(self, X: NDArray) -> Self:
         return self
 
     def transform(self, X: NDArray) -> NDArray:
-        """
-        Transform the input X array by imputing the missing values.
+        """Transform the input X array by imputing the missing values.
 
         Parameters
         ----------
@@ -406,6 +496,7 @@ def transform(self, X: NDArray) -> NDArray:
         -------
         NDArray
             Final array after EM sampling.
+
         """
         mask_na = np.isnan(X)
         X = X.copy()
@@ -432,8 +523,7 @@ def transform(self, X: NDArray) -> NDArray:
         return X
 
     def pretreatment(self, X, mask_na) -> Tuple[NDArray, NDArray]:
-        """
-        Pretreats the data before imputation by EM, making it more robust.
+        """Pretreat the data before imputation by EM, making it more robust.
 
         Parameters
         ----------
@@ -448,13 +538,15 @@ def pretreatment(self, X, mask_na) -> Tuple[NDArray, NDArray]:
             A tuple containing:
             - X the pretreatd data matrix
             - mask_na the updated mask
+
         """
         return X, mask_na
 
     def _check_conditionning(self, X: NDArray):
-        """
-        Check that the data matrix X is not ill-conditioned. Running the EM algorithm on data with
-        colinear columns leads to numerical instability and unconsistent results.
+        """Check that the data matrix X is not ill-conditioned.
+
+        Running the EM algorithm on data with colinear columns leads to
+        numerical instability and unconsistent results.
 
         Parameters
         ----------
@@ -465,6 +557,7 @@ def _check_conditionning(self, X: NDArray):
         ------
         IllConditioned
             Data matrix is ill-conditioned due to colinear columns.
+
         """
         n_samples, n_cols = X.shape
         # if n_rows == 1 the function np.cov returns a float
@@ -476,17 +569,20 @@ def _check_conditionning(self, X: NDArray):
         min_sv = min(np.sqrt(sv))
         if min_sv < self.min_std:
             warnings.warn(
-                f"The covariance matrix is ill-conditioned, indicating high-colinearity: the "
-                f"smallest singular value of the data matrix is smaller than the threshold "
-                f"min_std ({min_sv} < {self.min_std}). Consider removing columns of decreasing "
-                f"the threshold."
+                "The covariance matrix is ill-conditioned, "
+                "indicating high-colinearity: the "
+                "smallest singular value of the data matrix is smaller "
+                "than the threshold "
+                f"min_std ({min_sv} < {self.min_std}). "
+                "Consider removing columns of decreasing the threshold."
             )
 
 
 class MultiNormalEM(EM):
-    """
-    Imputation of missing values using a multivariate Gaussian model through EM optimization and
-    using a projected Ornstein-Uhlenbeck process.
+    """Multinormal EM imputer.
+
+    Imputation of missing values using a multivariate Gaussian model through
+    EM optimization and using a projected Ornstein-Uhlenbeck process.
 
     Parameters
     ----------
@@ -498,28 +594,32 @@ class MultiNormalEM(EM):
         Number of iterations for the Gibbs sampling method (+ noise addition),
         necessary for convergence, by default 50.
     n_samples : int, optional
-        Number of data samples used to estimate the parameters of the distribution. Default, 10
+        Number of data samples used to estimate the parameters of the
+        distribution. Default, 10
     ampli : float, optional
         Whether to sample the posterior (1)
         or to maximise likelihood (0), by default 1.
     random_state : int, optional
-        The seed of the pseudo random number generator to use, for reproductibility.
+        The seed of the pseudo random number generator to use,
+        for reproductibility.
     dt : float
-        Process integration time step, a large value increases the sample bias and can make
-        the algorithm unstable, but compensates for a smaller n_iter_ou. By default, 2e-2.
+        Process integration time step, a large value increases the sample bias
+        and can make the algorithm unstable, but compensates for a
+        smaller n_iter_ou. By default, 2e-2.
     tolerance : float, optional
-        Threshold below which a L infinity norm difference indicates the convergence of the
-        parameters
+        Threshold below which a L infinity norm difference indicates the
+        convergence of the parameters
     stagnation_threshold : float, optional
-        Threshold below which a L infinity norm difference indicates the convergence of the
-        parameters
-    stagnation_loglik : float, optional
-        Threshold below which an absolute difference of the log likelihood indicates the
+        Threshold below which a L infinity norm difference indicates the
         convergence of the parameters
+    stagnation_loglik : float, optional
+        Threshold below which an absolute difference of the log likelihood
+        indicates the convergence of the parameters
     period : int, optional
         Integer used to fold the temporal data periodically
     verbose : bool, optional
         Verbosity level, if False the warnings are silenced
+
     """
 
     def __init__(
@@ -554,9 +654,11 @@ def __init__(
         self.dict_criteria_stop = {"logliks": [], "means": [], "covs": []}
 
     def get_loglikelihood(self, X: NDArray) -> float:
-        """
-        Value of the log-likelihood up to a constant for the provided X, using the attributes
-        `means` and `cov_inv` for the multivariate normal distribution.
+        """Get the log-likelihood.
+
+        Value of the log-likelihood up to a constant for the provided X,
+        using the attributes `means` and `cov_inv` for the multivariate
+        normal distribution.
 
         Parameters
         ----------
@@ -567,13 +669,15 @@ def get_loglikelihood(self, X: NDArray) -> float:
         -------
         float
             Computed value
+
         """
         Xc = X - self.means
         return -((Xc @ self.cov_inv) * Xc).sum().sum() / 2
 
     def gradient_X_loglik(self, X: NDArray) -> NDArray:
-        """
-        Gradient of the log-likelihood for the provided X, using the attributes
+        """Compute the gradient of the log-likelihood for the provided X.
+
+        It uses  the attributes
         `means` and `cov_inv` for the multivariate normal distribution.
 
         Parameters
@@ -584,15 +688,19 @@ def gradient_X_loglik(self, X: NDArray) -> NDArray:
         Returns
         -------
         NDArray
-            The gradient of the log-likelihood with respect to the input variable `X`.
+            The gradient of the log-likelihood with respect to the input
+            variable `X`.
+
         """
         grad_X = -(X - self.means) @ self.cov_inv
         return grad_X
 
     def get_gamma(self, n_cols: int) -> NDArray:
-        """
-        If the covariance matrix is not full-rank, defines the projection matrix keeping the
-        sampling process in the relevant subspace.
+        """Get gamma.
+
+        If the covariance matrix is not full-rank, defines the
+        projection matrix keeping the sampling process in the relevant
+        subspace.
 
         Parameters
         ----------
@@ -603,6 +711,7 @@ def get_gamma(self, n_cols: int) -> NDArray:
         -------
         NDArray
             Gamma matrix
+
         """
         U, diag, Vt = spl.svd(self.cov)
         diag_trunc = np.where(diag < self.min_std**2, 0, diag)
@@ -614,13 +723,13 @@ def get_gamma(self, n_cols: int) -> NDArray:
         return gamma
 
     def update_criteria_stop(self, X: NDArray):
-        """
-        Updates the variables which will be used to compute the stop critera
+        """Update the variables to compute the stopping critera.
 
         Parameters
         ----------
         X : NDArray
             Input matrix with variables in column
+
         """
         self.loglik = self.get_loglikelihood(X)
         self.dict_criteria_stop["means"].append(self.means)
@@ -628,20 +737,18 @@ def update_criteria_stop(self, X: NDArray):
         self.dict_criteria_stop["logliks"].append(self.loglik)
 
     def reset_learned_parameters(self):
-        """
-        Resets all lists of estimated parameters before starting a new estimation.
-        """
+        """Reset lists of parameters before starting a new estimation."""
         self.list_means = []
         self.list_cov = []
 
     def update_parameters(self, X):
-        """
-        Retains statistics relative to the current sample, in prevision of combining them.
+        """Retain statistics relative to the current sample.
 
         Parameters
         ----------
         X : NDArray
             Input matrix with variables in column
+
         """
         n_rows, n_cols = X.shape
         means = np.mean(X, axis=0)
@@ -654,9 +761,9 @@ def update_parameters(self, X):
         self.list_cov.append(cov)
 
     def combine_parameters(self):
-        """
-        Combine all statistics computed for each sample in the update step, using the MANOVA
-        formula.
+        """Combine all statistics computed for each sample in the update step.
+
+        If uses the MANOVA formula.
         """
         list_means = self.list_means[-self.n_samples :]
         list_cov = self.list_cov[-self.n_samples :]
@@ -674,20 +781,21 @@ def combine_parameters(self):
         self.cov_inv = np.linalg.pinv(self.cov)
 
     def fit_parameters_with_missingness(self, X: NDArray):
-        """
-        First estimation of the model parameters based on data with missing values.
+        """Fit the first estimation of the model parameters.
+
+        It is based on data with missing values.
 
         Parameters
         ----------
         X : NDArray
             Data matrix with missingness
+
         """
         self.means, self.cov = utils.nan_mean_cov(X)
         self.cov_inv = np.linalg.pinv(self.cov)
 
     def set_parameters(self, means: NDArray, cov: NDArray):
-        """
-        Sets the model parameters from a user value.
+        """Set the model parameters from a user value.
 
         Parameters
         ----------
@@ -695,27 +803,28 @@ def set_parameters(self, means: NDArray, cov: NDArray):
             Specified value for the mean vector
         cov : NDArray
             Specified value for the covariance matrix
+
         """
         self.means = means
         self.cov = cov
         self.cov_inv = np.linalg.pinv(self.cov)
 
     def _maximize_likelihood(self, X: NDArray, mask_na: NDArray) -> NDArray:
-        """
-        Get the argmax of a posterior distribution.
+        """Get the argmax of a posterior distribution.
 
         Parameters
         ----------
         X : NDArray
             Input DataFrame without missingness
         mask_na : NDArray
-            Boolean dataframe indicating which coefficients should be resampled, and are therefore
-            the variables of the optimization
+            Boolean dataframe indicating which coefficients should be
+            resampled, and are therefore the variables of the optimization
 
         Returns
         -------
         NDArray
             DataFrame with imputed values.
+
         """
         X_center = X - self.means
         X_imputed = _conjugate_gradient(self.cov_inv, X_center, mask_na)
@@ -723,8 +832,7 @@ def _maximize_likelihood(self, X: NDArray, mask_na: NDArray) -> NDArray:
         return X_imputed
 
     def init_imputation(self, X: NDArray) -> NDArray:
-        """
-        First simple imputation before iterating.
+        """First simple imputation before iterating.
 
         Parameters
         ----------
@@ -735,24 +843,29 @@ def init_imputation(self, X: NDArray) -> NDArray:
         -------
         NDArray
             Imputed matrix
+
         """
         return utils.impute_nans(X, method="median")
 
     def _check_convergence(self) -> bool:
-        """
-        Check if the EM algorithm has converged. Three criteria:
-        1) if the differences between the estimates of the parameters (mean and covariance) is
-        less than a threshold (min_diff_reached - tolerance).
-        2) if the difference of the consecutive differences of the estimates is less than a
-        threshold, i.e. stagnates over the last 5 interactions (min_diff_stable -
-        stagnation_threshold).
+        """Check if the EM algorithm has converged.
+
+        Three criteria:
+        1) if the differences between the estimates of the parameters
+        (mean and covariance) is less than a threshold
+        (min_diff_reached - tolerance).
+        2) if the difference of the consecutive differences of the estimates
+        is less than a threshold, i.e. stagnates over the last 5 interactions
+        (min_diff_stable - stagnation_threshold).
         3) if the likelihood of the data no longer increases,
-        i.e. stagnates over the last 5 iterations (max_loglik - stagnation_loglik).
+        i.e. stagnates over the last 5 iterations
+        (max_loglik - stagnation_loglik).
 
         Returns
         -------
         bool
             True/False if the algorithm has converged
+
         """
         list_means = self.dict_criteria_stop["means"]
         list_covs = self.dict_criteria_stop["covs"]
@@ -764,7 +877,10 @@ def _check_convergence(self) -> bool:
 
         min_diff_means1 = max_diff_Linf(list_means, n_steps=1)
         min_diff_covs1 = max_diff_Linf(list_covs, n_steps=1)
-        min_diff_reached = min_diff_means1 < self.tolerance and min_diff_covs1 < self.tolerance
+        min_diff_reached = (
+            min_diff_means1 < self.tolerance
+            and min_diff_covs1 < self.tolerance
+        )
 
         if min_diff_reached:
             return True
@@ -789,9 +905,11 @@ def _check_convergence(self) -> bool:
 
 
 class VARpEM(EM):
-    """
-    Imputation of missing values using a vector autoregressive model through EM optimization and
-    using a projected Ornstein-Uhlenbeck process. Equations and notations and from the following
+    """VAR(p) EM imputer.
+
+    Imputation of missing values using a vector autoregressive model through
+    EM optimization and using a projected Ornstein-Uhlenbeck process.
+    Equations and notations and from the following
     reference, matrices are transposed for consistency:
     Lütkepohl (2005) New Introduction to Multiple Time Series Analysis
 
@@ -810,19 +928,21 @@ class VARpEM(EM):
         Whether to sample the posterior (1)
         or to maximise likelihood (0), by default 1.
     random_state : int, optional
-        The seed of the pseudo random number generator to use, for reproductibility.
+        The seed of the pseudo random number generator to use,
+        for reproductibility.
     dt : float
-        Process integration time step, a large value increases the sample bias and can make
-        the algorithm unstable, but compensates for a smaller n_iter_ou. By default, 2e-2.
+        Process integration time step, a large value increases the sample bias
+        and can make the algorithm unstable, but compensates for
+        a smaller n_iter_ou. By default, 2e-2.
     tolerance : float, optional
-        Threshold below which a L infinity norm difference indicates the convergence of the
-        parameters
+        Threshold below which a L infinity norm difference indicates
+        the convergence of the parameters
     stagnation_threshold : float, optional
-        Threshold below which a L infinity norm difference indicates the convergence of the
-        parameters
-    stagnation_loglik : float, optional
-        Threshold below which an absolute difference of the log likelihood indicates the
+        Threshold below which a L infinity norm difference indicates the
         convergence of the parameters
+    stagnation_loglik : float, optional
+        Threshold below which an absolute difference of the log likelihood
+        indicates the convergence of the parameters
     period : int, optional
         Integer used to fold the temporal data periodically
     verbose: bool
@@ -831,18 +951,19 @@ class VARpEM(EM):
     Attributes
     ----------
     X_intermediate : list
-        List of pd.DataFrame giving the results of the EM process as function of the
-        iteration number.
+        List of pd.DataFrame giving the results of the EM process as function
+        of the iteration number.
 
     Examples
     --------
     >>> import numpy as np
     >>> from qolmat.imputations.em_sampler import VARpEM
     >>> imputer = VARpEM(method="sample", random_state=11)
-    >>> X = np.array([[1, 1, 1, 1],
-    ...               [np.nan, np.nan, 3, 2],
-    ...               [1, 2, 2, 1], [2, 2, 2, 2]])
+    >>> X = np.array(
+    ...     [[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]]
+    ... )
     >>> imputer.fit_transform(X)  # doctest: +SKIP
+
     """
 
     def __init__(
@@ -882,9 +1003,10 @@ def __init__(
             self.p_to_fit = True
 
     def get_loglikelihood(self, X: NDArray) -> float:
-        """
-        Value of the log-likelihood up to a constant for the provided X, using the attributes
-        `nu`, `B` and `S` for the VAR(p) distribution.
+        """Get the log-likelihood.
+
+        Value of the log-likelihood up to a constant for the provided X,
+        using the attributes `nu`, `B` and `S` for the VAR(p) distribution.
 
         Parameters
         ----------
@@ -895,15 +1017,17 @@ def get_loglikelihood(self, X: NDArray) -> float:
         -------
         float
             Computed value
+
         """
         Z, Y = utils.create_lag_matrices(X, self.p)
         U = Y - Z @ self.B
         return -(U @ self.S_inv * U).sum().sum() / 2
 
     def gradient_X_loglik(self, X: NDArray) -> NDArray:
-        """
-        Gradient of the log-likelihood for the provided X, using the attributes
-        `means` and `cov_inv` for the VAR(p) distribution.
+        """Compute the  gradient of the log-likelihood for the provided X.
+
+        It uses the attributes `means` and `cov_inv`
+        for the VAR(p) distribution.
 
         Parameters
         ----------
@@ -913,7 +1037,9 @@ def gradient_X_loglik(self, X: NDArray) -> NDArray:
         Returns
         -------
         NDArray
-            The gradient of the log-likelihood with respect to the input variable `X`.
+            The gradient of the log-likelihood with respect
+            to the input variable `X`.
+
         """
         n_rows, n_cols = X.shape
         Z, Y = utils.create_lag_matrices(X, p=self.p)
@@ -928,9 +1054,11 @@ def gradient_X_loglik(self, X: NDArray) -> NDArray:
         return grad_1 + grad_2
 
     def get_gamma(self, n_cols: int) -> NDArray:
-        """
-        If the noise matrix is not full-rank, defines the projection matrix keeping the
-        sampling process in the relevant subspace. Rescales the process to avoid instabilities.
+        """Compue gamma.
+
+        If the noise matrix is not full-rank, defines the projection matrix
+        keeping the sampling process in the relevant subspace.
+        Rescales the process to avoid instabilities.
 
         Parameters
         ----------
@@ -941,6 +1069,7 @@ def get_gamma(self, n_cols: int) -> NDArray:
         -------
         NDArray
             Gamma matrix
+
         """
         U, diag, Vt = spl.svd(self.S)
         diag_trunc = np.where(diag < self.min_std**2, 0, diag)
@@ -952,13 +1081,13 @@ def get_gamma(self, n_cols: int) -> NDArray:
         return gamma
 
     def update_criteria_stop(self, X: NDArray):
-        """
-        Updates the variables which will be used to compute the stop critera
+        """Update the variable to compute the stopping critera.
 
         Parameters
         ----------
         X : NDArray
             Input matrix with variables in column
+
         """
         self.loglik = self.get_loglikelihood(X)
         self.dict_criteria_stop["S"].append(self.list_S[-1])
@@ -966,9 +1095,7 @@ def update_criteria_stop(self, X: NDArray):
         self.dict_criteria_stop["logliks"].append(self.loglik)
 
     def reset_learned_parameters(self):
-        """
-        Resets all lists of estimated parameters before starting a new estimation.
-        """
+        """Reset lists of parameters before starting a new estimation."""
         self.list_ZZ = []
         self.list_ZY = []
         self.list_B = []
@@ -976,15 +1103,14 @@ def reset_learned_parameters(self):
         self.list_YY = []
 
     def update_parameters(self, X: NDArray) -> None:
-        """
-        Retains statistics relative to the current sample, in prevision of combining them.
+        """Retain statistics relative to the current sample.
 
         Parameters
         ----------
         X : NDArray
             Input matrix with variables in column
-        """
 
+        """
         Z, Y = utils.create_lag_matrices(X, self.p)
         n_obs = len(Z)
         ZZ = Z.T @ Z / n_obs
@@ -1002,9 +1128,10 @@ def update_parameters(self, X: NDArray) -> None:
         self.list_YY.append(YY)
 
     def combine_parameters(self) -> None:
-        """
-        Combine all statistics computed for each sample in the update step. The estimation of `nu`
-         and `B` corresponds to the MLE, whereas `S` is approximated.
+        """Combine statistics computed for each sample in the update step.
+
+        The estimation of `nu` and `B` corresponds to the MLE,
+        whereas `S` is approximated.
         """
         list_ZZ = self.list_ZZ[-self.n_samples :]
         list_ZY = self.list_ZY[-self.n_samples :]
@@ -1018,28 +1145,32 @@ def combine_parameters(self) -> None:
         self.B = self.ZZ_inv @ self.ZY
         stack_YY = np.stack(list_YY)
         self.YY = np.mean(stack_YY, axis=0)
-        self.S = self.YY - self.ZY.T @ self.B - self.B.T @ self.ZY + self.B.T @ self.ZZ @ self.B
+        self.S = (
+            self.YY
+            - self.ZY.T @ self.B
+            - self.B.T @ self.ZY
+            + self.B.T @ self.ZZ @ self.B
+        )
         self.S[np.abs(self.S) < 1e-12] = 0
         self.S_inv = np.linalg.pinv(self.S, rcond=1e-10)
 
     def set_parameters(self, B: NDArray, S: NDArray):
-        """
-        Sets the model parameters from a user value.
+        """Set the model parameters from a user value.
 
         Parameters
         ----------
-        means : NDArray
+        B : NDArray
             Specified value for the autoregression matrix
         S : NDArray
             Specified value for the noise covariance matrix
+
         """
         self.B = B
         self.S = S
         self.S_inv = np.linalg.pinv(self.S)
 
     def init_imputation(self, X: NDArray) -> NDArray:
-        """
-        First simple imputation before iterating.
+        """First simple imputation before iterating.
 
         Parameters
         ----------
@@ -1050,14 +1181,16 @@ def init_imputation(self, X: NDArray) -> NDArray:
         -------
         NDArray
             Imputed matrix
+
         """
         return utils.linear_interpolation(X)
 
     def pretreatment(self, X, mask_na) -> Tuple[NDArray, NDArray]:
-        """
-        Pretreats the data before imputation by EM, making it more robust. In the case of the
-        VAR(p) model we freeze the naive imputation on the first observations if all variables are
-        missing to avoid explosive imputations.
+        """Pretreat the data before imputation by EM, making it more robust.
+
+        In the case of the
+        VAR(p) model we freeze the naive imputation on the first observations
+        if all variables are missing to avoid explosive imputations.
 
         Parameters
         ----------
@@ -1072,6 +1205,7 @@ def pretreatment(self, X, mask_na) -> Tuple[NDArray, NDArray]:
             A tuple containing:
             - X the pretreatd data matrix
             - mask_na the updated mask
+
         """
         if self.p == 0:
             return X, mask_na
@@ -1081,22 +1215,25 @@ def pretreatment(self, X, mask_na) -> Tuple[NDArray, NDArray]:
         return X, mask_na
 
     def _check_convergence(self) -> bool:
-        """
-        Check if the EM algorithm has converged. Three criteria:
-        1) if the differences between the estimates of the parameters (mean and covariance) is
-        less than a threshold (min_diff_reached - tolerance).
-        OR 2) if the difference of the consecutive differences of the estimates is less than a
-        threshold, i.e. stagnates over the last 5 interactions (min_diff_stable -
-        stagnation_threshold).
+        """Check if the EM algorithm has converged.
+
+        Three criteria:
+        1) if the differences between the estimates of the parameters
+        (mean and covariance) is less than a threshold
+        (min_diff_reached - tolerance).
+        OR 2) if the difference of the consecutive differences of the
+        estimates is less than a threshold, i.e. stagnates over the
+        last 5 interactions (min_diff_stable - stagnation_threshold).
         OR 3) if the likelihood of the data no longer increases,
-        i.e. stagnates over the last 5 iterations (max_loglik - stagnation_loglik).
+        i.e. stagnates over the last 5 iterations
+        (max_loglik - stagnation_loglik).
 
         Returns
         -------
         bool
             True/False if the algorithm has converged
-        """
 
+        """
         list_B = self.dict_criteria_stop["B"]
         list_S = self.dict_criteria_stop["S"]
         list_logliks = self.dict_criteria_stop["logliks"]
@@ -1107,7 +1244,9 @@ def _check_convergence(self) -> bool:
 
         min_diff_B1 = max_diff_Linf(list_B, n_steps=1)
         min_diff_S1 = max_diff_Linf(list_S, n_steps=1)
-        min_diff_reached = min_diff_B1 < self.tolerance and min_diff_S1 < self.tolerance
+        min_diff_reached = (
+            min_diff_B1 < self.tolerance and min_diff_S1 < self.tolerance
+        )
 
         if min_diff_reached:
             return True
@@ -1118,7 +1257,8 @@ def _check_convergence(self) -> bool:
         min_diff_B5 = max_diff_Linf(list_B, n_steps=5)
         min_diff_S5 = max_diff_Linf(list_S, n_steps=5)
         min_diff_stable = (
-            min_diff_B5 < self.stagnation_threshold and min_diff_S5 < self.stagnation_threshold
+            min_diff_B5 < self.stagnation_threshold
+            and min_diff_S5 < self.stagnation_threshold
         )
 
         max_loglik5_ord1 = max_diff_Linf(list_logliks, n_steps=5, order=1)
diff --git a/qolmat/imputations/imputers.py b/qolmat/imputations/imputers.py
index a2550700..170a1659 100644
--- a/qolmat/imputations/imputers.py
+++ b/qolmat/imputations/imputers.py
@@ -1,54 +1,48 @@
+"""Script for the imputers."""
+
 import copy
-from functools import partial
 import warnings
-from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
-from typing_extensions import Self
 from abc import abstractmethod
+from functools import partial
+from typing import Any, Callable, Dict, Literal, Optional, Tuple, Union
 
 import numpy as np
-from numpy.typing import NDArray
-from scipy import sparse
 import pandas as pd
 import sklearn as skl
+from numpy.typing import NDArray
 from sklearn import utils as sku
-from sklearn.impute import SimpleImputer
 from sklearn.base import BaseEstimator
-from sklearn.experimental import enable_iterative_imputer
+from sklearn.experimental import enable_iterative_imputer  # noqa
 from sklearn.impute import IterativeImputer, KNNImputer
 from sklearn.impute._base import _BaseImputer
-from sklearn.utils.validation import (
-    _check_feature_names_in,
-    _num_samples,
-    check_array,
-    check_is_fitted,
-)
 from statsmodels.tsa import seasonal as tsa_seasonal
 
-from qolmat.imputations import em_sampler
-from qolmat.imputations.rpca import rpca, rpca_noisy, rpca_pcp
-from qolmat.imputations import softimpute
+# from typing_extensions import Self
+from qolmat.imputations import em_sampler, softimpute
+from qolmat.imputations.rpca import rpca_noisy, rpca_pcp
 from qolmat.utils import utils
-from qolmat.utils.exceptions import NotDataFrame, TypeNotHandled
-from qolmat.utils.utils import HyperValue
+from qolmat.utils.exceptions import NotDataFrame
 
 
 class _Imputer(_BaseImputer):
-    """
-    Base class for all imputers.
+    """Base class for all imputers.
 
     Parameters
     ----------
     columnwise : bool, optional
-        If True, the imputer will be computed for each column, else it will be computed on the
-        whole dataframe, by default False
+        If True, the imputer will be computed for each column, else it will be
+        computed on the whole dataframe, by default False
     shrink : bool, optional
-        Indicates if the elementwise imputation method returns a single value, by default False
+        Indicates if the elementwise imputation method returns a single value,
+        by default False
     random_state : Union[None, int, np.random.RandomState], optional
         Controls the randomness of the fit_transform, by default None
     imputer_params: Tuple[str, ...]
-        List of parameters of the imputer, which can be specified globally or columnwise
+        List of parameters of the imputer, which can be specified globally or
+        columnwise
     groups: Tuple[str, ...]
         List of column names to group by, by default []
+
     """
 
     def __init__(
@@ -67,9 +61,11 @@ def __init__(
         self.missing_values = np.nan
 
     def get_hyperparams(self, col: Optional[str] = None):
-        """
-        Filter hyperparameters based on the specified column, the dictionary keys in the form
-        name_params/column are only relevent for the specified column and are filtered accordingly.
+        """Filter hyperparameters based on the specified column.
+
+        The dictionary keys in the form
+        name_params/column are only relevent for the specified column and
+        are filtered accordingly.
 
         Parameters
         ----------
@@ -96,8 +92,7 @@ def get_hyperparams(self, col: Optional[str] = None):
         return hyperparams
 
     def _check_dataframe(self, X: NDArray):
-        """
-        Checks that the input X is a dataframe, otherwise raises an error.
+        """Check that the input X is a dataframe, otherwise raises an error.
 
         Parameters
         ----------
@@ -108,32 +103,37 @@ def _check_dataframe(self, X: NDArray):
         ------
         ValueError
             Input has to be a pandas.DataFrame.
+
         """
         if not isinstance(X, (pd.DataFrame)):
             raise NotDataFrame(type(X))
 
     def _more_tags(self):
-        """
-        This method indicates that this class allows inputs with categorical data and nans. It
-        modifies the behaviour of the functions checking data.
-        """
-        return {"X_types": ["2darray", "categorical", "string"], "allow_nan": True}
+        """Indicate this class allows inputs with categorical data and nans.
 
-    def fit(self, X: pd.DataFrame, y=None) -> Self:
+        It modifies the behaviour of the functions checking data.
         """
-        Fit the imputer on X.
+        return {
+            "X_types": ["2darray", "categorical", "string"],
+            "allow_nan": True,
+        }
+
+    def fit(self, X: pd.DataFrame, y: pd.DataFrame = None) -> "_Imputer":
+        """Fit the imputer on X.
 
         Parameters
         ----------
         X : pd.DataFrame
             Data matrix on which the Imputer must be fitted.
+        y : pd.DataFrame
+            None.
 
         Returns
         -------
         self : Self
             Returns self.
-        """
 
+        """
         df = utils._validate_input(X)
         self.n_features_in_ = len(df.columns)
 
@@ -143,11 +143,15 @@ def fit(self, X: pd.DataFrame, y=None) -> Self:
 
         self.columns_ = tuple(df.columns)
         self._rng = sku.check_random_state(self.random_state)
-        if hasattr(self, "estimator") and hasattr(self.estimator, "random_state"):
+        if hasattr(self, "estimator") and hasattr(
+            self.estimator, "random_state"
+        ):
             self.estimator.random_state = self._rng
 
         if self.groups:
-            self.ngroups_ = df.groupby(list(self.groups)).ngroup().rename("_ngroup")
+            self.ngroups_ = (
+                df.groupby(list(self.groups)).ngroup().rename("_ngroup")
+            )
         else:
             self.ngroups_ = pd.Series(0, index=df.index).rename("_ngroup")
 
@@ -161,12 +165,14 @@ def fit(self, X: pd.DataFrame, y=None) -> Self:
         return self
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
-        """
-        Returns a dataframe with same shape as `X`, unchanged values, where all nans are replaced
-        by non-nan values. Depending on the imputer parameters, the dataframe can be imputed with
+        """Transform/impute a dataframe.
+
+        It retruns a dataframe with same shape as `X`,
+        unchanged values, where all nans are replaced by non-nan values.
+        Depending on the imputer parameters, the dataframe can be imputed with
         columnwise and/or groupwise methods.
-        Also works for numpy arrays, returning numpy arrays, but the use of pandas dataframe is
-        advised.
+        Also works for numpy arrays, returning numpy arrays, but the use of
+        pandas dataframe is advised.
 
         Parameters
         ----------
@@ -177,12 +183,13 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         -------
         pd.DataFrame
             Imputed dataframe.
-        """
 
+        """
         df = utils._validate_input(X)
         if tuple(df.columns) != self.columns_:
             raise ValueError(
-                """The number of features is different from the counterpart in fit.
+                """The number of features is different
+                from the counterpart in fit.
                 Reshape your data"""
             )
 
@@ -198,7 +205,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             if self.columnwise:
                 df_imputed = df.copy()
                 for col in cols_with_nans:
-                    df_imputed[col] = self._transform_allgroups(df[[col]], col=col)
+                    df_imputed[col] = self._transform_allgroups(
+                        df[[col]], col=col
+                    )
             else:
                 df_imputed = self._transform_allgroups(df)
 
@@ -207,29 +216,35 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         return df_imputed
 
-    def fit_transform(self, X: pd.DataFrame, y=None) -> pd.DataFrame:
-        """
-        Returns a dataframe with same shape as `X`, unchanged values, where all nans are replaced
-        by non-nan values.
-        Depending on the imputer parameters, the dataframe can be imputed with columnwise and/or
-        groupwise methods.
+    def fit_transform(
+        self, X: pd.DataFrame, y: pd.DataFrame = None
+    ) -> pd.DataFrame:
+        """Return a imputed dataframe.
+
+        The retruned df has same shape as `X`, with unchanged values,
+        but all nans are replaced by non-nan values.
+        Depending on the imputer parameters, the dataframe can be imputed
+        with columnwise and/or groupwise methods.
 
         Parameters
         ----------
         X : pd.DataFrame
             Dataframe to impute.
+        y : pd.DataFrame
+            None
 
         Returns
         -------
         pd.DataFrame
             Imputed dataframe.
+
         """
         self.fit(X)
         return self.transform(X)
 
     def _fit_transform_fallback(self, df: pd.DataFrame) -> pd.DataFrame:
-        """
-        Impute `df` by the median of each column if it still contains missing values.
+        """Impute `df` with each column's median if missing values remain.
+
         This can introduce data leakage for forward imputers if unchecked.
 
         Parameters
@@ -241,6 +256,7 @@ def _fit_transform_fallback(self, df: pd.DataFrame) -> pd.DataFrame:
         -------
         pd.DataFrame
             Dataframe df imputed by the median of each column.
+
         """
         self._check_dataframe(df)
         cols_with_nan = df.columns[df.isna().any()]
@@ -250,9 +266,12 @@ def _fit_transform_fallback(self, df: pd.DataFrame) -> pd.DataFrame:
             df[col] = df[col].fillna(df[col].mode()[0])
         return df
 
-    def _fit_allgroups(self, df: pd.DataFrame, col: str = "__all__") -> Self:
-        """
-        Fits the Imputer either on a column, for a columnwise setting, on or all columns.
+    def _fit_allgroups(
+        self, df: pd.DataFrame, col: str = "__all__"
+    ) -> "_Imputer":
+        """Fit the imputer.
+
+        Either on a column, for a columnwise setting, on or all columns.
 
         Parameters
         ----------
@@ -270,8 +289,8 @@ def _fit_allgroups(self, df: pd.DataFrame, col: str = "__all__") -> Self:
         ------
         ValueError
             Input has to be a pandas.DataFrame.
-        """
 
+        """
         self._check_dataframe(df)
         fun_on_col = partial(self._fit_element, col=col)
         if self.groups:
@@ -283,16 +302,14 @@ def _fit_allgroups(self, df: pd.DataFrame, col: str = "__all__") -> Self:
         return self
 
     def _setup_fit(self) -> None:
-        """
-        Setup step of the fit function, before looping over the columns.
-        """
-        self._dict_fitting: Dict[str, Any] = dict()
+        """Set up step of the fit function, before looping over the columns."""
+        self._dict_fitting: Dict[str, Any] = {}
         return
 
-    def _apply_groupwise(self, fun: Callable, df: pd.DataFrame, **kwargs) -> Any:
-        """
-        Applies the function `fun`in a groupwise manner to the dataframe `df`.
-
+    def _apply_groupwise(
+        self, fun: Callable, df: pd.DataFrame, **kwargs
+    ) -> Any:
+        """Apply the function `fun`in a groupwise manner to the dataframe `df`.
 
         Parameters
         ----------
@@ -300,11 +317,14 @@ def _apply_groupwise(self, fun: Callable, df: pd.DataFrame, **kwargs) -> Any:
             Function applied groupwise to the dataframe with arguments kwargs
         df : pd.DataFrame
             Dataframe on which the function is applied
+        **kwargs: dict
+            Additional arguments
 
         Returns
         -------
         Any
             Depends on the function signature
+
         """
         self._check_dataframe(df)
         fun_on_col = partial(fun, **kwargs)
@@ -317,11 +337,15 @@ def _apply_groupwise(self, fun: Callable, df: pd.DataFrame, **kwargs) -> Any:
         else:
             return fun_on_col(df)
 
-    def _transform_allgroups(self, df: pd.DataFrame, col: str = "__all__") -> pd.DataFrame:
-        """
-        Impute `df` by applying the specialized method `transform_element` on each group, if
-        groups have been given. If the method leaves nan, `fit_transform_fallback` is called in
-        order to return a dataframe without nan.
+    def _transform_allgroups(
+        self, df: pd.DataFrame, col: str = "__all__"
+    ) -> pd.DataFrame:
+        """Impute `df`.
+
+        It doe sit by applying the specialized method `transform_element`
+        on each group, if groups have been given. If the method leaves nan,
+        `fit_transform_fallback` is called in order to return a dataframe
+        without nan.
 
         Parameters
         ----------
@@ -339,10 +363,13 @@ def _transform_allgroups(self, df: pd.DataFrame, col: str = "__all__") -> pd.Dat
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         df = df.copy()
-        imputation_values = self._apply_groupwise(self._transform_element, df, col=col)
+        imputation_values = self._apply_groupwise(
+            self._transform_element, df, col=col
+        )
 
         df = df.fillna(imputation_values)
         # fill na by applying imputation method without groups
@@ -353,10 +380,13 @@ def _transform_allgroups(self, df: pd.DataFrame, col: str = "__all__") -> pd.Dat
         return df
 
     @abstractmethod
-    def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0) -> Any:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+    def _fit_element(
+        self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
+    ) -> Any:
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -376,6 +406,7 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         return self
@@ -384,9 +415,10 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -406,14 +438,14 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         return df
 
 
 class ImputerOracle(_Imputer):
-    """
-    Perfect imputer, requires to know real values.
+    """Perfect imputer, requires to know real values.
 
     Used as a reference to evaluate imputation metrics.
 
@@ -423,6 +455,7 @@ class ImputerOracle(_Imputer):
         Dataframe containing real values.
     groups: Tuple[str, ...]
         List of column names to group by, by default []
+
     """
 
     def __init__(
@@ -431,38 +464,45 @@ def __init__(
         super().__init__()
 
     def set_solution(self, df: pd.DataFrame):
-        """Sets the true values to be returned by the oracle.
+        """Set the true values to be returned by the oracle.
 
         Parameters
         ----------
-        X : pd.DataFrame
+        df : pd.DataFrame
             True dataset with mask
+
         """
         self.df_solution = df
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
-        """Impute df with corresponding known values
+        """Impute df with corresponding known values.
 
         Parameters
         ----------
-        df : pd.DataFrame
+        X : pd.DataFrame
             dataframe to impute
+
         Returns
         -------
         pd.DataFrame
             dataframe imputed with premasked values
+
         """
         df = utils._validate_input(X)
 
         if tuple(df.columns) != self.columns_:
             raise ValueError(
-                """The number of features is different from the counterpart in fit.
+                """The number of features is different from
+                the counterpart in fit.
                 Reshape your data"""
             )
         if hasattr(self, "df_solution"):
             df_imputed = df.fillna(self.df_solution)
         else:
-            warnings.warn("OracleImputer not initialized! Returning imputation with zeros")
+            warnings.warn(
+                "OracleImputer not initialized! "
+                "Returning imputation with zeros"
+            )
             df_imputed = df.fillna(0)
 
         if isinstance(X, (np.ndarray)):
@@ -471,8 +511,10 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
 
 class ImputerSimple(_Imputer):
-    """
-    Impute each column by its mean, its median or its mode (if its categorical).
+    """Simple imputer.
+
+    Impute each column by its mean, its median or its mode
+    (if its categorical).
 
     Parameters
     ----------
@@ -485,27 +527,37 @@ class ImputerSimple(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> imputer = imputers.ImputerSimple()
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                         [np.nan, np.nan, np.nan, np.nan],
-    ...                         [1, 2, 2, 5],
-    ...                         [2, 2, 2, 2]],
-    ...                         columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
        var1  var2  var3  var4
     0   1.0   1.0   1.0   1.0
     1   1.0   2.0   2.0   2.0
     2   1.0   2.0   2.0   5.0
     3   2.0   2.0   2.0   2.0
+
     """
 
-    def __init__(self, groups: Tuple[str, ...] = (), strategy="median") -> None:
+    def __init__(
+        self, groups: Tuple[str, ...] = (), strategy="median"
+    ) -> None:
         super().__init__(groups=groups, columnwise=True, shrink=False)
         self.strategy = strategy
 
-    def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0) -> Any:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+    def _fit_element(
+        self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
+    ) -> Any:
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -525,6 +577,7 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         if pd.api.types.is_numeric_dtype(df[col]):
             model = skl.impute.SimpleImputer(strategy=self.strategy)
@@ -535,8 +588,9 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending on self.groups
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
         and self.columnwise.
 
         Parameters
@@ -557,6 +611,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         model = self._dict_fitting[col][ngroup]
         X_imputed = model.fit_transform(df)
@@ -564,8 +619,7 @@ def _transform_element(
 
 
 class ImputerShuffle(_Imputer):
-    """
-    Impute using random samples from the considered column.
+    """Impute using random samples from the considered column.
 
     Parameters
     ----------
@@ -580,17 +634,22 @@ class ImputerShuffle(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> imputer = imputers.ImputerShuffle(random_state=42)
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                         [np.nan, np.nan, np.nan, np.nan],
-    ...                         [1, 2, 2, 5],
-    ...                         [2, 2, 2, 2]],
-    ...                         columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
        var1  var2  var3  var4
     0   1.0   1.0   1.0   1.0
     1   2.0   1.0   2.0   2.0
     2   1.0   2.0   2.0   5.0
     3   2.0   2.0   2.0   2.0
+
     """
 
     def __init__(
@@ -598,14 +657,17 @@ def __init__(
         groups: Tuple[str, ...] = (),
         random_state: Union[None, int, np.random.RandomState] = None,
     ) -> None:
-        super().__init__(groups=groups, columnwise=True, random_state=random_state)
+        super().__init__(
+            groups=groups, columnwise=True, random_state=random_state
+        )
 
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -625,6 +687,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         n_missing = df.isna().sum().sum()
@@ -640,9 +703,10 @@ def _transform_element(
 
 
 class ImputerLOCF(_Imputer):
-    """
-    Impute by the last available value of the column. Relevent for time series.
+    """LOCF imputer.
 
+    It imputes by the last available value of the column.
+    Relevant for time series.
     If the first observations are missing, it is imputed by a NOCB
 
     Parameters
@@ -656,17 +720,22 @@ class ImputerLOCF(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> imputer = imputers.ImputerLOCF()
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                         [np.nan, np.nan, np.nan, np.nan],
-    ...                         [1, 2, 2, 5],
-    ...                         [2, 2, 2, 2]],
-    ...                         columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
        var1  var2  var3  var4
     0   1.0   1.0   1.0   1.0
     1   1.0   1.0   1.0   1.0
     2   1.0   2.0   2.0   5.0
     3   2.0   2.0   2.0   2.0
+
     """
 
     def __init__(
@@ -678,9 +747,10 @@ def __init__(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -700,6 +770,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         df_out = df.copy()
@@ -709,7 +780,8 @@ def _transform_element(
 
 
 class ImputerNOCB(_Imputer):
-    """
+    """NOCB imputer.
+
     Impute by the next available value of the column. Relevent for time series.
     If the last observation is missing, it is imputed by a LOCF.
 
@@ -724,17 +796,22 @@ class ImputerNOCB(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> imputer = imputers.ImputerNOCB()
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                         [np.nan, np.nan, np.nan, np.nan],
-    ...                         [1, 2, 2, 5],
-    ...                         [2, 2, 2, 2]],
-    ...                         columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
        var1  var2  var3  var4
     0   1.0   1.0   1.0   1.0
     1   1.0   2.0   2.0   5.0
     2   1.0   2.0   2.0   5.0
     3   2.0   2.0   2.0   2.0
+
     """
 
     def __init__(
@@ -746,9 +823,10 @@ def __init__(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -768,6 +846,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         df_out = df.copy()
@@ -777,10 +856,11 @@ def _transform_element(
 
 
 class ImputerInterpolation(_Imputer):
-    """
-    This class implements a way to impute time series using some interpolation strategies
-    suppoted by pd.Series.interpolate, such as "linear", "slinear", "quadratic", ...
-    By default, linear interpolation.
+    """Interpolation imputer.
+
+    This class implements a way to impute time series using some interpolation
+    strategies suppoted by pd.Series.interpolate, such as "linear", "slinear",
+    "quadratic", ... By default, linear interpolation.
     As for pd.Series.interpolate, if "method" is "spline" or "polynomial",
     an "order" has to be passed.
 
@@ -789,14 +869,15 @@ class ImputerInterpolation(_Imputer):
     groups: Tuple[str, ...]
         List of column names to group by, by default []
     method : Optional[str] = "linear"
-        name of the method for interpolation: "linear", "cubic", "spline", "slinear", ...
-        see pd.Series.interpolate for more example.
+        name of the method for interpolation: "linear", "cubic", "spline",
+        "slinear", ... see pd.Series.interpolate for more example.
         By default, the value is set to "linear".
     order : Optional[int]
         order for the spline interpolation
     col_time : Optional[str]
-        Name of the column representing the time index to use for the interpolation. If None, the
-        index is used assuming it is one-dimensional.
+        Name of the column representing the time index to use for the
+        interpolation. If None, the index is used assuming it
+        is one-dimensional.
 
     Examples
     --------
@@ -804,17 +885,22 @@ class ImputerInterpolation(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> imputer = imputers.ImputerInterpolation(method="spline", order=2)
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                        [np.nan, np.nan, np.nan, np.nan],
-    ...                        [1, 2, 2, 5],
-    ...                        [2, 2, 2, 2]],
-    ...                        columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
            var1      var2      var3      var4
     0  1.000000  1.000000  1.000000  1.000000
     1  0.666667  1.666667  1.666667  4.666667
     2  1.000000  2.000000  2.000000  5.000000
     3  2.000000  2.000000  2.000000  2.000000
+
     """
 
     def __init__(
@@ -824,7 +910,9 @@ def __init__(
         order: Optional[int] = None,
         col_time: Optional[str] = None,
     ) -> None:
-        super().__init__(imputer_params=("method", "order"), groups=groups, columnwise=True)
+        super().__init__(
+            imputer_params=("method", "order"), groups=groups, columnwise=True
+        )
         self.method = method
         self.order = order
         self.col_time = col_time
@@ -832,9 +920,10 @@ def __init__(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -854,6 +943,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams(col=col)
@@ -869,10 +959,11 @@ def _transform_element(
 
 
 class ImputerResiduals(_Imputer):
-    """
+    """Residual imputer.
+
     This class implements an imputation method based on a STL decomposition.
-    The series are de-seasonalised, de-trended, residuals are imputed, then residuals are
-    re-seasonalised and re-trended.
+    The series are de-seasonalised, de-trended, residuals are imputed,
+    then residuals are re-seasonalised and re-trended.
 
     Parameters
     ----------
@@ -883,7 +974,8 @@ class ImputerResiduals(_Imputer):
         the index of x does not have a frequency. Overrides default
         periodicity of x if x is a pandas object with a timeseries index.
     model_tsa : Optional[str]
-        Type of seasonal component "additive" or "multiplicative". Abbreviations are accepted.
+        Type of seasonal component "additive" or "multiplicative".
+        Abbreviations are accepted.
         By default, the value is set to "additive"
     extrapolate_trend : int or 'freq', optional
         If set to > 0, the trend resulting from the convolution is
@@ -900,15 +992,20 @@ class ImputerResiduals(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations.imputers import ImputerResiduals
     >>> np.random.seed(100)
-    >>> df = pd.DataFrame(index=pd.date_range('2015-01-01','2020-01-01'))
+    >>> df = pd.DataFrame(index=pd.date_range("2015-01-01", "2020-01-01"))
     >>> mean = 5
     >>> offset = 10
-    >>> df['y'] = np.cos(df.index.dayofyear/365*2*np.pi - np.pi)*mean + offset
+    >>> df["y"] = (
+    ...     np.cos(df.index.dayofyear / 365 * 2 * np.pi - np.pi) * mean
+    ...     + offset
+    ... )
     >>> trend = 5
-    >>> df['y'] = df['y'] + trend*np.arange(0,df.shape[0])/df.shape[0]
+    >>> df["y"] = df["y"] + trend * np.arange(0, df.shape[0]) / df.shape[0]
     >>> noise_mean = 0
     >>> noise_var = 2
-    >>> df['y'] = df['y'] + np.random.normal(noise_mean, noise_var, df.shape[0])
+    >>> df["y"] = df["y"] + np.random.normal(
+    ...     noise_mean, noise_var, df.shape[0]
+    ... )
     >>> mask = np.random.choice([True, False], size=df.shape)
     >>> df = df.mask(mask)
     >>> imputor = ImputerResiduals(period=365, model_tsa="additive")
@@ -927,6 +1024,7 @@ class ImputerResiduals(_Imputer):
     2020-01-01  12.780517
     <BLANKLINE>
     [1827 rows x 1 columns]
+
     """
 
     def __init__(
@@ -955,9 +1053,10 @@ def __init__(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -977,13 +1076,16 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams(col=col)
         name = df.columns[0]
         values = df[df.columns[0]]
         values_interp = (
-            values.interpolate(method=hyperparams["method_interpolation"]).ffill().bfill()
+            values.interpolate(method=hyperparams["method_interpolation"])
+            .ffill()
+            .bfill()
         )
         result = tsa_seasonal.seasonal_decompose(
             values_interp,
@@ -996,15 +1098,18 @@ def _transform_element(
 
         residuals[values.isna()] = np.nan
         residuals = (
-            residuals.interpolate(method=hyperparams["method_interpolation"]).ffill().bfill()
+            residuals.interpolate(method=hyperparams["method_interpolation"])
+            .ffill()
+            .bfill()
+        )
+        df_result = pd.DataFrame(
+            {name: result.seasonal + result.trend + residuals}
         )
-        df_result = pd.DataFrame({name: result.seasonal + result.trend + residuals})
         return df_result
 
 
 class ImputerKNN(_Imputer):
-    """
-    This class implements an imputation by the k-nearest neighbors.
+    """K-nnearest neighbors imputer.
 
     Parameters
     ----------
@@ -1029,17 +1134,22 @@ class ImputerKNN(_Imputer):
     >>> import pandas as pd
     >>> from qolmat.imputations import imputers
     >>> imputer = imputers.ImputerKNN(n_neighbors=2)
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                        [np.nan, np.nan, np.nan, np.nan],
-    ...                        [1, 2, 2, 5],
-    ...                        [2, 2, 2, 2]],
-    ...                        columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
            var1      var2      var3      var4
     0  1.000000  1.000000  1.000000  1.000000
     1  1.333333  1.666667  1.666667  2.666667
     2  1.000000  2.000000  2.000000  5.000000
     3  2.000000  2.000000  2.000000  2.000000
+
     """
 
     def __init__(
@@ -1049,15 +1159,20 @@ def __init__(
         weights: str = "distance",
     ) -> None:
         super().__init__(
-            imputer_params=("n_neighbors", "weights"), groups=groups, columnwise=False
+            imputer_params=("n_neighbors", "weights"),
+            groups=groups,
+            columnwise=False,
         )
         self.n_neighbors = n_neighbors
         self.weights = weights
 
-    def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0) -> KNNImputer:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+    def _fit_element(
+        self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
+    ) -> KNNImputer:
+        """Fit. the imputer on `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1077,9 +1192,13 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
-        assert col == "__all__"
+        if col != "__all__":
+            raise ValueError(
+                f"col must be '__all__', but '{col}' has been passed."
+            )
         hyperparameters = self.get_hyperparams()
         model = KNNImputer(metric="nan_euclidean", **hyperparameters)
         model = model.fit(df)
@@ -1088,9 +1207,10 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1110,31 +1230,37 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
-        assert col == "__all__"
+        if col != "__all__":
+            raise ValueError(
+                f"col must be '__all__', but '{col}' has been passed."
+            )
         model = self._dict_fitting["__all__"][ngroup]
         X_imputed = model.fit_transform(df)
         return pd.DataFrame(data=X_imputed, columns=df.columns, index=df.index)
 
 
 class ImputerMICE(_Imputer):
-    """
-    Wrapper of the class sklearn.impute.IterativeImputer in our framework. This imputer relies
-    on a estimator which is iteratively
+    """MICE imputer.
+
+    Wrapper of the class sklearn.impute.IterativeImputer in our framework.
+    This imputer relies on a estimator which is iterative.
 
     Parameters
     ----------
     groups : Tuple[str, ...], optional
-        _description_, by default ()
+        specific groups for groupby, by default ()
     estimator : Optional[BaseEstimator], optional
-        _description_, by default None
+        estimator to use, by default None
     random_state : Union[None, int, np.random.RandomState], optional
-        _description_, by default None
+        random state, by default None
     sample_posterior : bool, optional
-        _description_, by default False
+        true if sample, false otherwise, by default False
     max_iter : int, optional
-        _description_, by default 100
+        maximum number of iterations, by default 100
+
     """
 
     def __init__(
@@ -1158,9 +1284,10 @@ def __init__(
     def _fit_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> IterativeImputer:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1180,9 +1307,13 @@ def _fit_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
-        assert col == "__all__"
+        if col != "__all__":
+            raise ValueError(
+                f"col must be '__all__', but '{col}' has been passed."
+            )
         hyperparameters = self.get_hyperparams()
         model = IterativeImputer(estimator=self.estimator, **hyperparameters)
         model = model.fit(df)
@@ -1192,8 +1323,9 @@ def _fit_element(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending on self.groups
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
         and self.columnwise.
 
         Parameters
@@ -1214,20 +1346,24 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
-        """
 
+        """
         self._check_dataframe(df)
-        assert col == "__all__"
+        if col != "__all__":
+            raise ValueError(
+                f"col must be '__all__', but '{col}' has been passed."
+            )
         model = self._dict_fitting["__all__"][ngroup]
         X_imputed = model.fit_transform(df)
         return pd.DataFrame(data=X_imputed, columns=df.columns, index=df.index)
 
 
 class ImputerRegressor(_Imputer):
-    """
+    """Regressor imputer.
+
     This class implements a regression imputer in the multivariate case.
-    It imputes each column using a single fit-predict for a given estimator, based on the colunms
-    which have no missing values.
+    It imputes each column using a single fit-predict for a given estimator,
+    based on the colunms which have no missing values.
 
     Parameters
     ----------
@@ -1238,8 +1374,8 @@ class ImputerRegressor(_Imputer):
     handler_nan : str
         Can be `fit, `row` or `column`:
         - if `fit`, the estimator is assumed to be robust to missing values
-        - if `row` all non complete rows will be removed from the train dataset, and will not be
-        used for the inferance,
+        - if `row` all non complete rows will be removed from the
+        train dataset, and will not be used for the inference,
         - if `column` all non complete columns will be ignored.
         By default, `row`
     random_state : Union[None, int, np.random.RandomState], optional
@@ -1252,17 +1388,22 @@ class ImputerRegressor(_Imputer):
     >>> from qolmat.imputations import imputers
     >>> from sklearn.ensemble import ExtraTreesRegressor
     >>> imputer = imputers.ImputerRegressor(estimator=ExtraTreesRegressor())
-    >>> df = pd.DataFrame(data=[[1, 1, 1, 1],
-    ...                        [np.nan, np.nan, np.nan, np.nan],
-    ...                        [1, 2, 2, 5],
-    ...                        [2, 2, 2, 2]],
-    ...                        columns=["var1", "var2", "var3", "var4"])
+    >>> df = pd.DataFrame(
+    ...     data=[
+    ...         [1, 1, 1, 1],
+    ...         [np.nan, np.nan, np.nan, np.nan],
+    ...         [1, 2, 2, 5],
+    ...         [2, 2, 2, 2],
+    ...     ],
+    ...     columns=["var1", "var2", "var3", "var4"],
+    ... )
     >>> imputer.fit_transform(df)
        var1  var2  var3  var4
     0   1.0   1.0   1.0   1.0
     1   1.0   2.0   2.0   2.0
     2   1.0   2.0   2.0   5.0
     3   2.0   2.0   2.0   2.0
+
     """
 
     def __init__(
@@ -1288,7 +1429,29 @@ def _predict_estimator(self, estimator, X) -> pd.Series:
         pred = estimator.predict(X)
         return pd.Series(pred, index=X.index)
 
-    def get_Xy_valid(self, df: pd.DataFrame, col: str) -> Tuple[pd.DataFrame, pd.Series]:
+    def get_Xy_valid(
+        self, df: pd.DataFrame, col: str
+    ) -> Tuple[pd.DataFrame, pd.Series]:
+        """Get a valid couple (X,y).
+
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Input dataframe
+        col : str
+            column name.
+
+        Returns
+        -------
+        Tuple[pd.DataFrame, pd.Series]
+            Valid X and y.
+
+        Raises
+        ------
+        ValueError
+            _description_
+
+        """
         X = df.drop(columns=col, errors="ignore")
         if self.handler_nan == "none":
             pass
@@ -1298,7 +1461,8 @@ def get_Xy_valid(self, df: pd.DataFrame, col: str) -> Tuple[pd.DataFrame, pd.Ser
             X = X.dropna(how="any", axis=1)
         else:
             raise ValueError(
-                f"Value '{self.handler_nan}' is not correct for argument `handler_nan'"
+                f"Value '{self.handler_nan}' is not correct "
+                "for argument `handler_nan'."
             )
         # X = pd.get_dummies(X, prefix_sep="=")
         y = df.loc[X.index, col]
@@ -1307,8 +1471,9 @@ def get_Xy_valid(self, df: pd.DataFrame, col: str) -> Tuple[pd.DataFrame, pd.Ser
     def _fit_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> Optional[BaseEstimator]:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending onself.groups and
         self.columnwise.
 
         Parameters
@@ -1329,13 +1494,18 @@ def _fit_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
-        assert col == "__all__"
+        if col != "__all__":
+            raise ValueError(
+                f"col must be '__all__', but '{col}' has been passed."
+            )
         cols_with_nans = df.columns[df.isna().any()]
-        dict_estimators: Dict[str, BaseEstimator] = dict()
+        dict_estimators: Dict[str, BaseEstimator] = {}
         for col in cols_with_nans:
-            # Selects only the valid values in the Train Set according to the chosen method
+            # Selects only the valid values in the Train Set according
+            # to the chosen method
             X, y = self.get_Xy_valid(df, col)
 
             # Selects only non-NaN values for the Test Set
@@ -1343,7 +1513,8 @@ def _fit_element(
             X = X[~is_na]
             y = y[~is_na]
 
-            # Train the model according to an ML or DL method and after predict the imputation
+            # Train the model according to an ML or DL method and
+            # after predict the imputation
             if not X.empty:
                 estimator = copy.deepcopy(self.estimator)
                 dict_estimators[col] = self._fit_estimator(estimator, X, y)
@@ -1354,9 +1525,10 @@ def _fit_element(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1376,9 +1548,13 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
-        assert col == "__all__"
+        if col != "__all__":
+            raise ValueError(
+                f"col must be '__all__', but '{col}' has been passed."
+            )
 
         df_imputed = df.copy()
         cols_with_nans = df.columns[df.isna().any()]
@@ -1402,10 +1578,12 @@ def _transform_element(
 
 
 class ImputerRpcaPcp(_Imputer):
-    """
-    This class implements the Robust Principal Component Analysis imputation with Principal
-    Component Pursuit. The imputation minimizes a loss function combining a low-rank criterium on
-    the dataframe and a L1 penalization on the residuals.
+    """PCP RPCA imputer.
+
+    This class implements the Robust Principal Component Analysis imputation
+    with Principal Component Pursuit. The imputation minimizes a loss function
+    combining a low-rank criterium on the dataframe and a L1 penalization on
+    the residuals.
 
     Parameters
     ----------
@@ -1414,9 +1592,11 @@ class ImputerRpcaPcp(_Imputer):
     columnwise : bool
         For the RPCA method to be applied columnwise (with reshaping of
         each column into an array)
-        or to be applied directly on the dataframe. By default, the value is set to False.
+        or to be applied directly on the dataframe.
+        By default, the value is set to False.
     random_state : Union[None, int, np.random.RandomState], optional
         Controls the randomness of the fit_transform, by default None
+
     """
 
     def __init__(
@@ -1452,13 +1632,13 @@ def __init__(
         self.verbose = verbose
 
     def get_model(self, **hyperparams) -> rpca_pcp.RpcaPcp:
-        """
-        Get the underlying model of the imputer based on its attributes.
+        """Get the underlying model of the imputer based on its attributes.
 
         Returns
         -------
         rpca.RPCA
             RPCA model to be used in the fit and transform methods.
+
         """
         hyperparams = {
             key: hyperparams[key]
@@ -1469,16 +1649,19 @@ def get_model(self, **hyperparams) -> rpca_pcp.RpcaPcp:
                 "tolerance",
             ]
         }
-        model = rpca_pcp.RpcaPcp(random_state=self._rng, verbose=self.verbose, **hyperparams)
+        model = rpca_pcp.RpcaPcp(
+            random_state=self._rng, verbose=self.verbose, **hyperparams
+        )
 
         return model
 
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1498,6 +1681,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams()
@@ -1515,22 +1699,21 @@ def _transform_element(
         D_scale = (D - means) / stds
         M, A = model.decompose(D_scale, Omega)
         M = M * stds + means
-        A = A * stds + means
 
         M_final = utils.get_shape_original(M, X.shape)
-        A_final = utils.get_shape_original(A, X.shape)
-        X_imputed = M_final + A_final
 
-        df_imputed = pd.DataFrame(X_imputed, index=df.index, columns=df.columns)
+        df_imputed = pd.DataFrame(M_final, index=df.index, columns=df.columns)
         df_imputed = df.where(~df.isna(), df_imputed)
 
         return df_imputed
 
 
 class ImputerRpcaNoisy(_Imputer):
-    """
-    This class implements the Robust Principal Component Analysis imputation with added noise.
-    The imputation minimizes a loss function combining a low-rank criterium on the dataframe and
+    """Noise RPCA imputer.
+
+    This class implements the Robust Principal Component Analysis imputation
+    with added noise. The imputation minimizes a loss function combining
+    a low-rank criterium on the dataframe and
     a L1 penalization on the residuals.
 
     Parameters
@@ -1540,9 +1723,11 @@ class ImputerRpcaNoisy(_Imputer):
     columnwise : bool
         For the RPCA method to be applied columnwise (with reshaping of
         each column into an array)
-        or to be applied directly on the dataframe. By default, the value is set to False.
+        or to be applied directly on the dataframe.
+        By default, the value is set to False.
     random_state : Union[None, int, np.random.RandomState], optional
         Controls the randomness of the fit_transform, by default None
+
     """
 
     def __init__(
@@ -1593,15 +1778,14 @@ def __init__(
         self.verbose = verbose
 
     def get_model(self, **hyperparams) -> rpca_noisy.RpcaNoisy:
-        """
-        Get the underlying model of the imputer based on its attributes.
+        """Get the underlying model of the imputer based on its attributes.
 
         Returns
         -------
         rpca.RPCA
             RPCA model to be used in the fit and transform methods.
-        """
 
+        """
         hyperparams = {
             key: hyperparams[key]
             for key in [
@@ -1615,15 +1799,18 @@ def get_model(self, **hyperparams) -> rpca_noisy.RpcaNoisy:
                 "norm",
             ]
         }
-        model = rpca_noisy.RpcaNoisy(random_state=self._rng, verbose=self.verbose, **hyperparams)
+        model = rpca_noisy.RpcaNoisy(
+            random_state=self._rng, verbose=self.verbose, **hyperparams
+        )
         return model
 
     def _fit_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> Tuple[NDArray, NDArray, NDArray]:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending on self.groups and
-        self.columnwise.
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending on self.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1646,6 +1833,7 @@ def _fit_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams()
@@ -1667,9 +1855,10 @@ def _fit_element(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1689,6 +1878,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams()
@@ -1705,7 +1895,6 @@ def _transform_element(
         D_scale = (D - means) / stds
         M, A = model.decompose_on_basis(D_scale, Omega, Q)
         M = M * stds + means
-        A = A * stds + means
 
         M_final = utils.get_shape_original(M, X.shape)
 
@@ -1716,13 +1905,15 @@ def _transform_element(
 
 
 class ImputerSoftImpute(_Imputer):
-    """
-    This class implements the Soft Impute method:
+    """SoftIMpute imputer.
 
-    Hastie, Trevor, et al. Matrix completion and low-rank SVD via fast alternating least squares.
-    The Journal of Machine Learning Research 16.1 (2015): 3367-3402.
+    This class implements the Soft Impute method:
+    Hastie, Trevor, et al. Matrix completion and low-rank SVD via fast
+    alternating least squares. The Journal of Machine Learning Research 16.1
+    (2015): 3367-3402.
 
-    This imputation technique is less robust than the RPCA, although it can provide faster.
+    This imputation technique is less robust than the RPCA,
+    although it can provide faster.
 
     Parameters
     ----------
@@ -1731,9 +1922,11 @@ class ImputerSoftImpute(_Imputer):
     columnwise : bool
         For the RPCA method to be applied columnwise (with reshaping of
         each column into an array)
-        or to be applied directly on the dataframe. By default, the value is set to False.
+        or to be applied directly on the dataframe.
+        By default, the value is set to False.
     random_state : Union[None, int, np.random.RandomState], optional
         Controls the randomness of the fit_transform, by default None
+
     """
 
     def __init__(
@@ -1769,13 +1962,13 @@ def __init__(
         self.verbose = verbose
 
     def get_model(self, **hyperparams) -> softimpute.SoftImpute:
-        """
-        Get the underlying model of the imputer based on its attributes.
+        """Get the underlying model of the imputer based on its attributes.
 
         Returns
         -------
         softimpute.SoftImpute
             Soft Impute model to be used in the transform method.
+
         """
         hyperparams = {
             key: hyperparams[key]
@@ -1785,7 +1978,9 @@ def get_model(self, **hyperparams) -> softimpute.SoftImpute:
                 "tolerance",
             ]
         }
-        model = softimpute.SoftImpute(random_state=self._rng, verbose=self.verbose, **hyperparams)
+        model = softimpute.SoftImpute(
+            random_state=self._rng, verbose=self.verbose, **hyperparams
+        )
 
         return model
 
@@ -1793,8 +1988,8 @@ def get_model(self, **hyperparams) -> softimpute.SoftImpute:
     #     self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     # ) -> softimpute.SoftImpute:
     #     """
-    #     Fits the imputer on `df`, at the group and/or column level depending on
-    #     self.groups and self.columnwise.
+    #     Fits the imputer on `df`, at the group and/or column level depending
+    #     on self.groups and self.columnwise.
 
     #     Parameters
     #     ----------
@@ -1825,9 +2020,10 @@ def get_model(self, **hyperparams) -> softimpute.SoftImpute:
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -1847,6 +2043,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams()
@@ -1863,7 +2060,9 @@ def _transform_element(
         A_final = utils.get_shape_original(A, X.shape)
         X_imputed = M_final + A_final
 
-        df_imputed = pd.DataFrame(X_imputed, index=df.index, columns=df.columns)
+        df_imputed = pd.DataFrame(
+            X_imputed, index=df.index, columns=df.columns
+        )
         df_imputed = df.where(~df.isna(), df_imputed)
 
         return df_imputed
@@ -1871,33 +2070,42 @@ def _transform_element(
     def _more_tags(self):
         return {
             "_xfail_checks": {
-                "check_fit2d_1sample": "This test shouldn't be running at all!",
-                "check_fit2d_1feature": "This test shouldn't be running at all!",
+                "check_fit2d_1sample": (
+                    "This test shouldn't be running at all!"
+                ),
+                "check_fit2d_1feature": (
+                    "This test shouldn't be running at all!"
+                ),
             },
         }
 
 
 class ImputerEM(_Imputer):
-    """
-    This class implements an imputation method based on joint modelling and an inference using a
-    Expectation-Minimization algorithm.
+    """EM imputer.
+
+    This class implements an imputation method based on joint modelling and
+    an inference using a Expectation-Minimization algorithm.
 
     Parameters
     ----------
     groups: Tuple[str, ...]
         List of column names to group by, by default []
     method : {'multinormal', 'VAR'}, default='multinormal'
-        Method defining the hypothesis made on the data distribution. Possible values:
-        - 'multinormal' : the data points a independent and uniformly distributed following a
-        multinormal distribution
+        Method defining the hypothesis made on the data distribution.
+        Possible values:
+        - 'multinormal' : the data points a independent and uniformly
+        distributed following a multinormal distribution
         - 'VAR' : the data is a time series modeled by a VAR(p) process
     columnwise : bool
-        If False, correlations between variables will be used, which is advised.
-        If True, each column is imputed independently. For the multinormal case each
-        value will be imputed by the mean up to a noise with fixed noise, for the VAR1 case the
-        imputation will be a noisy temporal interpolation.
+        If False, correlations between variables will be used,
+        which is advised.
+        If True, each column is imputed independently. For the multinormal case
+        each value will be imputed by the mean up to a noise with fixed noise,
+        for the VAR1 case the imputation will be a noisy temporal
+        interpolation.
     random_state : Union[None, int, np.random.RandomState], optional
         Controls the randomness of the fit_transform, by default None
+
     """
 
     def __init__(
@@ -1954,6 +2162,7 @@ def get_model(self, **hyperparams) -> em_sampler.EM:
         -------
         em_sampler.EM
             EM model to be used in the fit and transform methods.
+
         """
         if self.model == "multinormal":
             hyperparams.pop("p")
@@ -1980,9 +2189,10 @@ def get_model(self, **hyperparams) -> em_sampler.EM:
     def _fit_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> em_sampler.EM:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -2002,6 +2212,7 @@ def _fit_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hyperparams = self.get_hyperparams()
@@ -2012,9 +2223,10 @@ def _fit_element(
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -2034,6 +2246,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
 
@@ -2044,6 +2257,8 @@ def _transform_element(
         X = df.values.astype(float)
         X_imputed = model.transform(X)
 
-        df_transformed = pd.DataFrame(X_imputed, columns=df.columns, index=df.index)
+        df_transformed = pd.DataFrame(
+            X_imputed, columns=df.columns, index=df.index
+        )
 
         return df_transformed
diff --git a/qolmat/imputations/imputers_pytorch.py b/qolmat/imputations/imputers_pytorch.py
index 1cf7d5d3..aff2b32f 100644
--- a/qolmat/imputations/imputers_pytorch.py
+++ b/qolmat/imputations/imputers_pytorch.py
@@ -1,15 +1,20 @@
-import pandas as pd
-import numpy as np
+"""Script for pytroch imputers."""
+
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
-from typing import Any, Callable, List, Optional, Tuple, Union, Dict
-from typing_extensions import Self
+import numpy as np
+import pandas as pd
 from numpy.typing import NDArray
-from sklearn.preprocessing import StandardScaler
 from sklearn.base import BaseEstimator
+from sklearn.preprocessing import StandardScaler
 
-from qolmat.imputations.imputers import _Imputer, ImputerRegressor
-from qolmat.utils.exceptions import EstimatorNotDefined, PyTorchExtraNotInstalled
+# from typing_extensions import Self
 from qolmat.benchmark import metrics
+from qolmat.imputations.imputers import ImputerRegressor, _Imputer
+from qolmat.utils.exceptions import (
+    EstimatorNotDefined,
+    PyTorchExtraNotInstalled,
+)
 
 try:
     import torch
@@ -20,8 +25,10 @@
 
 
 class ImputerRegressorPyTorch(ImputerRegressor):
-    """
-    This class inherits from the class ImputerRegressor and allows for PyTorch regressors.
+    """Imputer regressor based on PyTorch.
+
+    This class inherits from the class ImputerRegressor
+    and allows for PyTorch regressors.
 
     Parameters
     ----------
@@ -32,8 +39,8 @@ class ImputerRegressorPyTorch(ImputerRegressor):
     handler_nan : str
         Can be `fit, `row` or `column`:
         - if `fit`, the estimator is assumed to be fitted on parcelar data,
-        - if `row` all non complete rows will be removed from the train dataset, and will not be
-        used for the inferance,
+        - if `row` all non complete rows will be removed from the train
+        dataset, and will not be used for the inference,
         - if `column`all non complete columns will be ignored.
         By default, `row`
     epochs: int
@@ -42,6 +49,7 @@ class ImputerRegressorPyTorch(ImputerRegressor):
         Learning rate hen fitting the autoencoder, by default 0.001
     loss_fn: Callable
         Loss used when fitting the autoencoder, by default nn.L1Loss()
+
     """
 
     def __init__(
@@ -63,12 +71,15 @@ def __init__(
         self.loss_fn = loss_fn
         self.estimator = estimator
 
-    def _fit_estimator(self, estimator: nn.Sequential, X: pd.DataFrame, y: pd.DataFrame) -> Any:
-        """
-        Fit the PyTorch estimator using the provided input and target data.
+    def _fit_estimator(
+        self, estimator: nn.Sequential, X: pd.DataFrame, y: pd.DataFrame
+    ) -> Any:
+        """Fit the PyTorch estimator using the provided input and target data.
 
         Parameters
         ----------
+        estimator: torch.nn.Sequential
+            PyTorch estimator for imputing a column based on the others.
         X : pd.DataFrame
             The input data for training.
         y : pd.DataFrame
@@ -78,36 +89,41 @@ def _fit_estimator(self, estimator: nn.Sequential, X: pd.DataFrame, y: pd.DataFr
         -------
         Any
             Return fitted PyTorch estimator.
+
         """
         if not estimator:
             raise EstimatorNotDefined()
         optimizer = optim.Adam(estimator.parameters(), lr=self.learning_rate)
         loss_fn = self.loss_fn
-        if estimator is None:
-            assert EstimatorNotDefined()
-        else:
-            for epoch in range(self.epochs):
-                estimator.train()
-                optimizer.zero_grad()
-
-                input_data = torch.Tensor(X.values)
-                target_data = torch.Tensor(y.values)
-                target_data = target_data.unsqueeze(1)
-                outputs = estimator(input_data)
-                loss = loss_fn(outputs, target_data)
-
-                loss.backward()
-                optimizer.step()
-                if (epoch + 1) % 10 == 0:
-                    print(f"Epoch [{epoch + 1}/{self.epochs}], Loss: {loss.item():.4f}")
+
+        for epoch in range(self.epochs):
+            estimator.train()
+            optimizer.zero_grad()
+
+            input_data = torch.Tensor(X.values)
+            target_data = torch.Tensor(y.values)
+            target_data = target_data.unsqueeze(1)
+            outputs = estimator(input_data)
+            loss = loss_fn(outputs, target_data)
+
+            loss.backward()
+            optimizer.step()
+            if (epoch + 1) % 10 == 0:
+                print(
+                    f"Epoch [{epoch + 1}/{self.epochs}], "
+                    f"Loss: {loss.item():.4f}"
+                )
         return estimator
 
-    def _predict_estimator(self, estimator: nn.Sequential, X: pd.DataFrame) -> pd.Series:
-        """
-        Perform predictions using the trained PyTorch estimator.
+    def _predict_estimator(
+        self, estimator: nn.Sequential, X: pd.DataFrame
+    ) -> pd.Series:
+        """Perform predictions using the trained PyTorch estimator.
 
         Parameters
         ----------
+        estimator: torch.nn.Sequential
+            PyTorch estimator for imputing a column based on the others.
         X : pd.DataFrame
             The input data for prediction.
 
@@ -120,6 +136,7 @@ def _predict_estimator(self, estimator: nn.Sequential, X: pd.DataFrame) -> pd.Se
         ------
         EstimatorNotDefined
             Raises an error if the attribute estimator is not defined.
+
         """
         if not estimator:
             raise EstimatorNotDefined()
@@ -130,8 +147,7 @@ def _predict_estimator(self, estimator: nn.Sequential, X: pd.DataFrame) -> pd.Se
 
 
 class Autoencoder(nn.Module):
-    """
-    Wrapper of a PyTorch autoencoder allowing to encode
+    """Wrapper of a PyTorch autoencoder allowing to encode.
 
     Parameters
     ----------
@@ -145,6 +161,7 @@ class Autoencoder(nn.Module):
         Learning rate for optimization, by default 0.001.
     loss_fn : Callable, optional
         Loss function for training, by default nn.L1Loss().
+
     """
 
     def __init__(
@@ -166,8 +183,7 @@ def __init__(
         self.scaler = StandardScaler()
 
     def forward(self, x: NDArray) -> nn.Sequential:
-        """
-        Forward pass through the autoencoder.
+        """Forward pass through the autoencoder.
 
         Parameters
         ----------
@@ -178,14 +194,14 @@ def forward(self, x: NDArray) -> nn.Sequential:
         -------
         pd.DataFrame
             Decoded data.
+
         """
         encode = self.encoder(x)
         decode = self.decoder(encode)
         return decode
 
-    def fit(self, X: NDArray, y: NDArray) -> Self:
-        """
-        Fit the autoencoder to the data.
+    def fit(self, X: NDArray, y: NDArray) -> "Autoencoder":
+        """Fit the autoencoder to the data.
 
         Parameters
         ----------
@@ -198,6 +214,7 @@ def fit(self, X: NDArray, y: NDArray) -> Self:
         -------
         Self
             Return Self
+
         """
         optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
         loss_fn = self.loss_fn
@@ -214,14 +231,16 @@ def fit(self, X: NDArray, y: NDArray) -> Self:
             loss.backward()
             optimizer.step()
             if (epoch + 1) % 10 == 0:
-                print(f"Epoch [{epoch + 1}/{self.epochs}], Loss: {loss.item():.4f}")
+                print(
+                    f"Epoch [{epoch + 1}/{self.epochs}], "
+                    f"Loss: {loss.item():.4f}"
+                )
             list_loss.append(loss.item())
         self.loss.extend([list_loss])
         return self
 
     def decode(self, Z: NDArray) -> NDArray:
-        """
-        Decode encoded data.
+        """Decode encoded data.
 
         Parameters
         ----------
@@ -232,6 +251,7 @@ def decode(self, Z: NDArray) -> NDArray:
         -------
         ndarray
             Decoded data.
+
         """
         Z_decoded = self.scaler.inverse_transform(Z)
         Z_decoded = self.decoder(torch.Tensor(Z_decoded))
@@ -239,8 +259,7 @@ def decode(self, Z: NDArray) -> NDArray:
         return Z_decoded
 
     def encode(self, X: NDArray) -> NDArray:
-        """
-        Encode input data.
+        """Encode input data.
 
         Parameters
         ----------
@@ -251,6 +270,7 @@ def encode(self, X: NDArray) -> NDArray:
         -------
         ndarray
             Encoded data.
+
         """
         X_encoded = self.encoder(torch.Tensor(X))
         X_encoded = X_encoded.detach().numpy()
@@ -275,6 +295,7 @@ class ImputerAutoencoder(_Imputer):
         Learning rate hen fitting the autoencoder, by default 0.001
     loss_fn: Callable
         Loss used when fitting the autoencoder, by default nn.L1Loss()
+
     """
 
     def __init__(
@@ -289,7 +310,12 @@ def __init__(
         learning_rate: float = 0.001,
         loss_fn: Callable = nn.L1Loss(),
     ) -> None:
-        super().__init__(groups=groups, columnwise=False, shrink=False, random_state=random_state)
+        super().__init__(
+            groups=groups,
+            columnwise=False,
+            shrink=False,
+            random_state=random_state,
+        )
         self.loss_fn = loss_fn
         self.lamb = lamb
         self.max_iterations = max_iterations
@@ -298,10 +324,13 @@ def __init__(
         self.encoder = encoder
         self.decoder = decoder
 
-    def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0) -> Autoencoder:
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+    def _fit_element(
+        self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
+    ) -> Autoencoder:
+        """Fit the imputer on `df`.
+
+        It does that at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -321,6 +350,7 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         autoencoder = Autoencoder(
@@ -336,9 +366,10 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0)
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+        """Transform the dataframe `df`.
+
+        It does that at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -358,6 +389,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         autoencoder = self._dict_fitting[col][ngroup]
         df_train = df.copy()
@@ -378,7 +410,9 @@ def _transform_element(
             X_next = autoencoder.decode(Z_next)
             X[mask] = X_next[mask]
         df_imputed = pd.DataFrame(
-            scaler.inverse_transform(X), index=df_train.index, columns=df_train.columns
+            scaler.inverse_transform(X),
+            index=df_train.index,
+            columns=df_train.columns,
         )
         return df_imputed
 
@@ -389,8 +423,7 @@ def build_mlp(
     output_dim: int = 1,
     activation: Callable = nn.ReLU,
 ) -> nn.Sequential:
-    """
-    Constructs a multi-layer perceptron (MLP) with a custom architecture.
+    """Construct a multi-layer perceptron (MLP) with a custom architecture.
 
     Parameters
     ----------
@@ -401,7 +434,8 @@ def build_mlp(
     output_dim : int, optional
         Dimension of the output layer, defaults to 1.
     activation : nn.Module, optional
-        Activation function to use between hidden layers, defaults to nn.ReLU().
+        Activation function to use between hidden layers,
+        defaults to nn.ReLU().
 
     Returns
     -------
@@ -415,7 +449,9 @@ def build_mlp(
 
     Examples
     --------
-    >>> model = build_mlp(input_dim=10, list_num_neurons=[32, 64, 128], output_dim=1)
+    >>> model = build_mlp(
+    ...     input_dim=10, list_num_neurons=[32, 64, 128], output_dim=1
+    ... )
     >>> print(model)
     Sequential(
       (0): Linear(in_features=10, out_features=32, bias=True)
@@ -426,6 +462,7 @@ def build_mlp(
       (5): ReLU()
       (6): Linear(in_features=128, out_features=1, bias=True)
     )
+
     """
     layers = []
     for num_neurons in list_num_neurons:
@@ -445,8 +482,7 @@ def build_autoencoder(
     output_dim: int = 1,
     activation: Callable = nn.ReLU,
 ) -> Tuple[nn.Sequential, nn.Sequential]:
-    """
-    Constructs an autoencoder with a custom architecture.
+    """Construct an autoencoder with a custom architecture.
 
     Parameters
     ----------
@@ -459,7 +495,8 @@ def build_autoencoder(
     output_dim : int, optional
         Dimension of the output layer, defaults to 1.
     activation : nn.Module, optional
-        Activation function to use between hidden layers, defaults to nn.ReLU().
+        Activation function to use between hidden layers,
+        defaults to nn.ReLU().
 
     Returns
     -------
@@ -473,10 +510,12 @@ def build_autoencoder(
 
     Examples
     --------
-    >>> encoder, decoder = build_autoencoder(input_dim=10,
-    ...                                      latent_dim=4,
-    ...                                      list_num_neurons=[32, 64, 128],
-    ...                                      output_dim=252)
+    >>> encoder, decoder = build_autoencoder(
+    ...     input_dim=10,
+    ...     latent_dim=4,
+    ...     list_num_neurons=[32, 64, 128],
+    ...     output_dim=252,
+    ... )
     >>> print(encoder)
     Sequential(
       (0): Linear(in_features=10, out_features=128, bias=True)
@@ -497,8 +536,8 @@ def build_autoencoder(
       (5): ReLU()
       (6): Linear(in_features=128, out_features=252, bias=True)
     )
-    """
 
+    """
     encoder = build_mlp(
         input_dim=input_dim,
         output_dim=latent_dim,
@@ -515,7 +554,9 @@ def build_autoencoder(
 
 
 class ImputerDiffusion(_Imputer):
-    """This class inherits from the class _Imputer.
+    """Imputer based on diffusion models.
+
+    This class inherits from the class _Imputer.
     It is a wrapper for imputers based on diffusion models.
     """
 
@@ -536,8 +577,7 @@ def __init__(
         index_datetime: str = "",
         freq_str: str = "1D",
     ):
-        """This class inherits from the class _Imputer.
-        It is a wrapper for imputers based on diffusion models.
+        """Init ImputerDiffusion.
 
         Parameters
         ----------
@@ -555,8 +595,8 @@ def __init__(
         print_valid : bool, optional
             Print model performance for after several epochs, by default False
         metrics_valid : Tuple[Callable, ...], optional
-            Set of validation metrics, by default ( metrics.mean_absolute_error,
-            metrics.dist_wasserstein )
+            Set of validation metrics, by default (metrics.mean_absolute_error,
+            metrics.dist_wasserstein)
         round : int, optional
             Number of decimal places to round to, for better displaying model
             performance, by default 10
@@ -564,10 +604,12 @@ def __init__(
             Name of columns that need to be imputed, by default ()
         index_datetime : str
             Name of datetime-like index.
-            It is for processing time-series data, used in diffusion models e.g., TsDDPM.
+            It is for processing time-series data, used in diffusion models
+            e.g., TsDDPM.
         freq_str : str
             Frequency string of DateOffset of Pandas.
-            It is for processing time-series data, used in diffusion models e.g., TsDDPM.
+            It is for processing time-series data, used in diffusion models
+            e.g., TsDDPM.
 
         Examples
         --------
@@ -575,10 +617,20 @@ def __init__(
         >>> from qolmat.imputations.imputers_pytorch import ImputerDiffusion
         >>> from qolmat.imputations.diffusions.ddpms import TabDDPM
         >>>
-        >>> X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
-        >>> imputer = ImputerDiffusion(model=TabDDPM(random_state=11), epochs=50, batch_size=1)
+        >>> X = np.array(
+        ...     [
+        ...         [1, 1, 1, 1],
+        ...         [np.nan, np.nan, 3, 2],
+        ...         [1, 2, 2, 1],
+        ...         [2, 2, 2, 2],
+        ...     ]
+        ... )
+        >>> imputer = ImputerDiffusion(
+        ...     model=TabDDPM(random_state=11), epochs=50, batch_size=1
+        ... )
         >>>
         >>> df_imputed = imputer.fit_transform(X)
+
         """
         super().__init__(groups=groups, columnwise=False)
         self.model = model
@@ -603,10 +655,13 @@ def _more_tags(self):
             },
         }
 
-    def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0):
-        """
-        Fits the imputer on `df`, at the group and/or column level depending onself.groups and
-        self.columnwise.
+    def _fit_element(
+        self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
+    ):
+        """Fit the imputer on `df`.
+
+        It does it at the group and/or column level depending onself.groups
+        and self.columnwise.
 
         Parameters
         ----------
@@ -626,6 +681,7 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0):
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         self._check_dataframe(df)
         hp = self._get_params_fit()
@@ -634,8 +690,9 @@ def _fit_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0):
     def _transform_element(
         self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
     ) -> pd.DataFrame:
-        """
-        Transforms the dataframe `df`, at the group and/or column level depending on self.groups
+        """Transform the dataframe `df`.
+
+        It does it at the group and/or column level depending on self.groups
         and self.columnwise.
 
         Parameters
@@ -656,6 +713,7 @@ def _transform_element(
         ------
         NotDataFrame
             Input has to be a pandas.DataFrame.
+
         """
         df_imputed = self.model.predict(df)
         return df_imputed
@@ -682,9 +740,25 @@ def _get_params_fit(self) -> Dict:
         return hyperparams
 
     def get_summary_training(self) -> Dict:
+        """Get the summary of the training.
+
+        Returns
+        -------
+        Dict
+            Summary of the training
+
+        """
         return self.model.summary
 
     def get_summary_architecture(self) -> Dict:
+        """Get the summary of the architecture.
+
+        Returns
+        -------
+        Dict
+            Summary of the architecture
+
+        """
         return {
             "number_parameters": self.model.num_params,
             "epsilon_model": self.model._eps_model,
diff --git a/qolmat/imputations/mimca/estim_ncpMCA.py b/qolmat/imputations/mimca/estim_ncpMCA.py
new file mode 100644
index 00000000..18cf78e7
--- /dev/null
+++ b/qolmat/imputations/mimca/estim_ncpMCA.py
@@ -0,0 +1,412 @@
+"""Estimate the optimal number of dimensions for MCA using CV or LOO."""
+
+
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+
+def moy_p(V, weights):
+    """Compute the weighted mean of a vector, ignoring NaNs.
+
+    Parameters
+    ----------
+    V : array-like
+        Input vector with possible NaN values.
+    weights : array-like
+        Weights corresponding to each element in V.
+
+    Returns
+    -------
+    float
+        Weighted mean of non-NaN elements.
+
+    """
+    mask = ~np.isnan(V)
+    total_weight = np.sum(weights[mask])
+    if total_weight == 0:
+        return 0.0
+    return np.sum(V[mask] * weights[mask]) / total_weight
+
+def tab_disjonctif_NA(df):
+    """Create a disjunctive table for categorical variables, preserving NaNs.
+
+    Parameters
+    ----------
+    df : DataFrame
+        Input DataFrame with categorical and numeric variables.
+
+    Returns
+    -------
+    DataFrame
+        Disjunctive table with one-hot encoding, preserving NaNs.
+
+    """
+    df_encoded_list = []
+    for col in df.columns:
+        if df[col].dtype.name == "category" or df[col].dtype == object:
+            df[col] = df[col].astype("category")
+            encoded = pd.get_dummies(
+                df[col],
+                prefix=col,
+                prefix_sep="_",
+                dummy_na=False,
+                dtype=float,
+            )
+            categories = df[col].cat.categories.tolist()
+            col_names = [f"{col}_{cat}" for cat in categories]
+            encoded = encoded.reindex(columns=col_names, fill_value=0.0)
+            encoded[df[col].isna()] = np.nan
+            df_encoded_list.append(encoded)
+        else:
+            df_encoded_list.append(df[[col]])
+    df_encoded = pd.concat(df_encoded_list, axis=1)
+    return df_encoded
+
+def prodna(data, noNA, rng):
+    """Introduce random missing values into a DataFrame.
+
+    Parameters
+    ----------
+    data : DataFrame
+        Input data.
+    noNA : float
+        Proportion of missing values to introduce.
+    rng : numpy.random.Generator
+        Random number generator.
+
+    Returns
+    -------
+    DataFrame
+        DataFrame with introduced missing values.
+
+    """
+    data = data.copy()
+    n_rows, n_cols = data.shape
+    total_values = n_rows * n_cols
+    n_missing = int(np.floor(total_values * noNA))
+    missing_indices = rng.choice(total_values, n_missing, replace=False)
+    row_indices = missing_indices // n_cols
+    col_indices = missing_indices % n_cols
+    for i in range(n_missing):
+        row = row_indices[i]
+        col = col_indices[i]
+        data.iloc[row, col] = np.nan
+    return data
+
+def find_category(df_original, tab_disj):
+    """Reconstruct original categorical variables from disjunctive table.
+
+    Parameters
+    ----------
+    df_original : DataFrame
+        Original DataFrame with categorical variables.
+    tab_disj : DataFrame
+        Disjunctive table after imputation.
+
+    Returns
+    -------
+    DataFrame
+        Reconstructed DataFrame with imputed categorical variables.
+
+    """
+    df_reconstructed = df_original.copy()
+    start_idx = 0
+    for col in df_original.columns:
+        if df_original[col].dtype.name == "category" or df_original[col].dtype == object: # noqa: E501
+            categories = df_original[col].cat.categories.tolist()
+            num_categories = len(categories)
+            sub_tab = tab_disj.iloc[:, start_idx : start_idx + num_categories]
+            max_indices = sub_tab.values.argmax(axis=1)
+            df_reconstructed[col] = [categories[idx] for idx in max_indices]
+            df_reconstructed[col].replace("__MISSING__", np.nan, inplace=True)
+            start_idx += num_categories
+        else:
+            start_idx += 1
+    return df_reconstructed
+
+def imputeMCA(
+    don,
+    ncp=2,
+    method="Regularized",
+    row_w=None,
+    coeff_ridge=1,
+    threshold=1e-6,
+    seed=None,
+    maxiter=1000,
+):
+    """Impute missing values in a dataset using MCA.
+
+    Parameters
+    ----------
+    don : DataFrame
+        Input dataset with missing values.
+    ncp : int, optional
+        Number of principal components for MCA. Default is 2.
+    method : str, optional
+        Imputation method ('Regularized' or 'EM'). Default is 'Regularized'.
+    row_w : array-like, optional
+        Row weights. If None, uniform weights are applied. Default is None.
+    coeff_ridge : float, optional
+        Regularization coefficient for 'Regularized' MCA. Default is 1.
+    threshold : float, optional
+        Convergence threshold. Default is 1e-6.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+    maxiter : int, optional
+        Maximum number of iterations for the imputation process.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+            - "tab_disj": Disjunctive coded table after imputation.
+            - "completeObs": Complete dataset with missing values imputed.
+
+    """
+    don = pd.DataFrame(don)
+    don = don.copy()
+    for col in don.columns:
+        if not pd.api.types.is_numeric_dtype(don[col]) or don[col].dtype == "bool":  # noqa: E501
+            don[col] = don[col].astype("category")
+            new_categories = don[col].cat.categories.astype(str)
+            don[col] = don[col].cat.rename_categories(new_categories) # noqa: E501
+        else:
+            unique_values = don[col].dropna().unique()
+            if set(unique_values).issubset({0, 1}):
+                don[col] = don[col].astype("category")
+                new_categories = don[col].cat.categories.astype(str)
+                don[col] = don[col].cat.rename_categories(new_categories) # noqa: E501
+    if row_w is None:
+        row_w = np.ones(len(don)) / len(don)
+    else:
+        row_w = np.array(row_w, dtype=float)
+        row_w /= row_w.sum()
+    tab_disj_NA = tab_disjonctif_NA(don)
+    if ncp == 0:
+        tab_disj_comp_mean = tab_disj_NA.apply(lambda col: moy_p(col.values, row_w))  # noqa: E501
+        tab_disj_comp = tab_disj_NA.fillna(tab_disj_comp_mean)
+        completeObs = find_category(don, tab_disj_comp)
+        return {"tab_disj": tab_disj_comp, "completeObs": completeObs}
+    tab_disj_comp = tab_disj_NA.copy()
+    hidden = tab_disj_NA.isna()
+    tab_disj_comp.fillna(tab_disj_comp.mean(), inplace=True)
+    tab_disj_rec_old = tab_disj_comp.copy()
+    nbiter = 0
+    continue_flag = True
+    while continue_flag:
+        nbiter += 1
+        M = tab_disj_comp.apply(lambda col: moy_p(col.values, row_w)) / don.shape[1]  # noqa: E501
+        M = M.replace({0: np.finfo(float).eps})
+        M = M.fillna(np.finfo(float).eps)
+        tab_disj_comp_mean = tab_disj_comp.apply(lambda col: moy_p(col.values, row_w))  # noqa: E501
+        tab_disj_comp_mean = tab_disj_comp_mean.replace({0: np.finfo(float).eps})  # noqa: E501
+        Z = tab_disj_comp.div(tab_disj_comp_mean, axis=1)
+        Z_mean = Z.apply(lambda col: moy_p(col.values, row_w))
+        Z = Z.subtract(Z_mean, axis=1)
+        Zscale = Z.multiply(np.sqrt(M), axis=1)
+        U, s, Vt = np.linalg.svd(Zscale.values, full_matrices=False)
+        V = Vt.T
+        U = U[:, :ncp]
+        V = V[:, :ncp]
+        s = s[:ncp]
+        if method.lower() == "em":
+            moyeig = 0
+        else:
+            if len(s) > ncp:
+                moyeig = np.mean(s[ncp:] ** 2)
+                moyeig = min(moyeig * coeff_ridge, s[ncp - 1] ** 2)
+            else:
+                moyeig = 0
+        eig_shrunk = (s ** 2 - moyeig) / s
+        eig_shrunk = np.maximum(eig_shrunk, 0)
+        rec = U @ np.diag(eig_shrunk) @ V.T
+        tab_disj_rec = pd.DataFrame(
+            rec, columns=tab_disj_comp.columns, index=tab_disj_comp.index
+        )
+        tab_disj_rec = tab_disj_rec.div(np.sqrt(M), axis=1) + 1
+        tab_disj_rec = tab_disj_rec.multiply(tab_disj_comp_mean, axis=1)
+        diff = tab_disj_rec - tab_disj_rec_old
+        diff_values = diff.values
+        hidden_values = hidden.values
+        diff_values[~hidden_values] = 0
+        relch = np.sum((diff_values**2) * row_w[:, None])
+        tab_disj_rec_old = tab_disj_rec.copy()
+        tab_disj_comp.values[hidden_values] = tab_disj_rec.values[hidden_values] # noqa: E501
+        continue_flag = (relch > threshold) and (nbiter < maxiter)
+    completeObs = find_category(don, tab_disj_comp)
+    return {"tab_disj": tab_disj_comp, "completeObs": completeObs}
+
+def estim_ncpMCA(
+    don,
+    ncp_min=0,
+    ncp_max=5,
+    method="Regularized",
+    method_cv="Kfold",
+    nbsim=100,
+    pNA=0.05,
+    ind_sup=None,
+    quanti_sup=None,
+    quali_sup=None,
+    threshold=1e-4,
+    verbose=True,
+    seed=None
+):
+    """Estimate the optimal number of dimensions for MCA using CV.
+
+    Parameters
+    ----------
+    don : DataFrame
+        Input data.
+    ncp_min : int, optional
+        Minimum number of components to test. Default is 0.
+    ncp_max : int, optional
+        Maximum number of components to test. Default is 5.
+    method : str, optional
+        Imputation method ('Regularized' or 'EM'). Default is 'Regularized'.
+    method_cv : str, optional
+        Cross-validation method ('Kfold' or 'loo'). Default is 'Kfold'.
+    nbsim : int, optional
+        Number of simulations for cross-validation. Default is 100.
+    pNA : float, optional
+        Proportion of missing values to simulate. Default is 0.05.
+    ind_sup : array-like, optional
+        Indices of supplementary individuals to exclude from the analysis.
+        Indices of supplementary quantitative variables to exclude.
+    quali_sup : array-like, optional
+        Indices of supplementary qualitative variables to exclude.
+    quanti_sup= array-like, optional
+        Indices of supplementary quantitative variables to exclude.
+    threshold : float, optional
+        Convergence threshold. Default is 1e-4.
+    verbose : bool, optional
+        Whether to print progress. Default is True.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+            - 'ncp': Optimal number of dimensions.
+            - 'criterion': List of criterion values for each dimension.
+
+    """
+    don = don.copy()
+    if ind_sup is not None:
+        don = don.drop(index=ind_sup)
+    if quanti_sup is not None or quali_sup is not None:
+        cols_to_drop = []
+        if quanti_sup is not None:
+            cols_to_drop.extend(don.columns[quanti_sup])
+        if quali_sup is not None:
+            cols_to_drop.extend(don.columns[quali_sup])
+        don = don.drop(columns=cols_to_drop)
+    method = method.lower()
+    method_cv = method_cv.lower()
+    for col in don.columns:
+        if not pd.api.types.is_categorical_dtype(don[col]):
+            don[col] = don[col].astype("category")
+    vrai_tab = tab_disjonctif_NA(don)
+    criterion = []
+    if seed is not None:
+        rng = np.random.default_rng(seed)
+    else:
+        rng = np.random.default_rng()
+    if method_cv == "kfold":
+        res = np.full((ncp_max - ncp_min + 1, nbsim), np.nan)
+        if verbose:
+            sim_range = tqdm(range(nbsim), desc="Simulations")
+        else:
+            sim_range = range(nbsim)
+        for sim in sim_range:
+            compteur = 0
+            max_attempts = 50
+            while compteur < max_attempts:
+                donNA = prodna(don, pNA, rng)
+                categories_complete = all(
+                    donNA[col].nunique(dropna=True) == don[col].nunique(dropna=True)  # noqa: E501
+                    for col in don.columns
+                )
+                if categories_complete:
+                    break
+                compteur += 1
+            else:
+                raise ValueError(
+                    "It is too difficult to suppress some cells.\n"
+                    "Maybe several categories are taken by only one individual"
+                )
+            for nbaxes in range(ncp_min, ncp_max + 1):
+                imputed = imputeMCA(
+                    donNA,
+                    ncp=nbaxes,
+                    method=method,
+                    threshold=threshold,
+                    seed=seed
+                )
+                tab_disj_comp = imputed["tab_disj"]
+                numerator = ((tab_disj_comp - vrai_tab) ** 2).sum().sum()
+                denominator = tab_disjonctif_NA(donNA).isna().sum().sum() - vrai_tab.isna().sum().sum()  # noqa: E501
+                if denominator == 0:
+                    res[nbaxes - ncp_min, sim] = np.nan
+                else:
+                    res[nbaxes - ncp_min, sim] = numerator / denominator
+        crit = np.nanmean(res, axis=1)
+        if np.all(np.isnan(crit)):
+            raise ValueError("All simulations resulted in NaN error")
+        ncp = int(np.nanargmin(crit) + ncp_min)
+        criterion = crit.tolist()
+        return {"ncp": ncp, "criterion": criterion}
+
+
+    elif method_cv == "loo":
+        criterion = []
+        if verbose:
+            loop = tqdm(total=(ncp_max - ncp_min + 1) * don.shape[0], desc="LOO CV")  # noqa: E501
+        for nbaxes in range(ncp_min, ncp_max + 1):
+            errors = []
+            for i in range(don.shape[0]):
+                donNA = don.copy()
+                for col in don.columns:
+                    if not pd.isna(donNA.at[donNA.index[i], col]):
+                        # Temporarily set the value to NaN
+                        donNA.at[donNA.index[i], col] = np.nan
+                        # Check if all categories are still represented
+                        categories_complete = all(
+                            donNA[col].nunique(dropna=True) == don[col].nunique(dropna=True)  # noqa: E501
+                            for col in don.columns
+                        )
+                        if not categories_complete:
+                            # Skip this iteration if removing the value causes an issue
+                            donNA.at[donNA.index[i], col] = don.at[don.index[i], col]  # noqa: E501
+                            continue
+                        # Impute missing values using MCA
+                        imputed = imputeMCA(
+                            donNA,
+                            ncp=nbaxes,
+                            method=method,
+                            threshold=threshold,
+                            seed=seed
+                        )
+                        tab_disj_comp = imputed["tab_disj"]
+                        vrai_tab = tab_disjonctif_NA(don)
+                        numerator = ((tab_disj_comp - vrai_tab) ** 2).sum().sum()
+                        denominator = 1  # Since we imputed one value
+                        error = numerator / denominator
+                        errors.append(error)
+                        # Restore the original value
+                        donNA.at[donNA.index[i], col] = don.at[don.index[i], col]
+                        if verbose:
+                            loop.update(1)
+            mean_error = np.mean(errors)
+            criterion.append(mean_error)
+        if verbose:
+            loop.close()
+        if np.all(np.isnan(criterion)):
+            raise ValueError("All computations resulted in NaN errors")
+        ncp = int(np.nanargmin(criterion) + ncp_min)
+        return {"ncp": ncp, "criterion": criterion}
+    else:
+        raise ValueError("method_cv must be 'kfold' or 'loo'")
+
+
diff --git a/qolmat/imputations/mimca/imputer_mca.py b/qolmat/imputations/mimca/imputer_mca.py
new file mode 100644
index 00000000..fb371462
--- /dev/null
+++ b/qolmat/imputations/mimca/imputer_mca.py
@@ -0,0 +1,183 @@
+import numpy as np  # noqa: D100
+import pandas as pd
+
+from qolmat.utils.algebra import svdtriplet
+from qolmat.utils.utils import (
+    find_category,
+    moy_p,
+    tab_disjonctif_NA,
+    tab_disjonctif_prop,
+)
+
+
+def imputeMCA(
+    don,
+    ncp=2,
+    method="Regularized",
+    row_w=None,
+    coeff_ridge=1,
+    threshold=1e-6,
+    seed=None,
+    maxiter=1000,
+):
+    """Impute missing values in a dataset using (MCA).
+
+    Parameters
+    ----------
+    don : DataFrame
+        Input dataset with missing values.
+    ncp : int, optional
+        Number of principal components for MCA. Default is 2.
+    method : str, optional
+        Imputation method ('Regularized' or 'EM'). Default is 'Regularized'.
+    row_w : array-like, optional
+        Row weights. If None, uniform weights are applied. Default is None.
+    coeff_ridge : float, optional
+        Regularization coefficient for 'Regularized' MCA. Default is 1.
+    threshold : float, optional
+        Convergence threshold. Default is 1e-6.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+    maxiter : int, optional
+        Maximum number of iterations for the imputation process.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+            - "tab_disj": Disjunctive coded table after imputation.
+            - "completeObs": Complete dataset with missing values imputed.
+
+    """
+    # Ensure the data is a DataFrame
+    don = pd.DataFrame(don)
+    don = don.copy()
+
+    for col in don.columns:
+        if (
+            not pd.api.types.is_numeric_dtype(don[col])
+            or don[col].dtype == "bool"
+        ):  # noqa: E501
+            don[col] = don[col].astype("category")
+            # Convert categories to strings and rename them
+            new_categories = don[col].cat.categories.astype(str)
+            don[col] = don[col].cat.rename_categories(new_categories)
+        else:
+            unique_values = don[col].dropna().unique()
+            if set(unique_values).issubset({0, 1}):
+                don[col] = don[col].astype("category")
+                new_categories = don[col].cat.categories.astype(str)
+                don[col] = don[col].cat.rename_categories(new_categories)  # noqa: E501
+
+    print("Data types after conversion:")
+    print(don.dtypes)
+
+    # Handle row weights
+    if row_w is None:
+        row_w = np.ones(len(don)) / len(don)
+    else:
+        row_w = np.array(row_w, dtype=float)
+        row_w /= row_w.sum()
+
+    # Initial imputation and creation of disjunctive tables
+    tab_disj_NA = tab_disjonctif_NA(don)
+    tab_disj_comp = tab_disjonctif_prop(don, seed=seed)
+    hidden = tab_disj_NA.isna()
+    tab_disj_rec_old = tab_disj_comp.copy()
+
+    # Initialize iteration parameters
+    nbiter = 0
+    continue_flag = True
+
+    while continue_flag:
+        nbiter += 1
+
+        # Step 1: Compute weighted means M
+        M = (
+            tab_disj_comp.apply(lambda col: moy_p(col.values, row_w))
+            / don.shape[1]
+        )  # noqa: E501
+        M = M.replace({0: np.finfo(float).eps})
+        M = M.fillna(np.finfo(float).eps)
+
+        if (M < 0).any():
+            raise ValueError(
+                "Negative values encountered in M. Check data preprocessing."
+            )  # noqa: E501
+
+        print(f"Iteration {nbiter}:")
+        print("Weighted means (M):")
+        print(M.head())
+
+        # Step 2: Center and scale the data
+        tab_disj_comp_mean = tab_disj_comp.apply(
+            lambda col: moy_p(col.values, row_w)
+        )  # noqa: E501
+        tab_disj_comp_mean = tab_disj_comp_mean.replace(
+            {0: np.finfo(float).eps}
+        )  # noqa: E501
+        Z = tab_disj_comp.div(tab_disj_comp_mean, axis=1)
+        Z_mean = Z.apply(lambda col: moy_p(col.values, row_w))
+        Z = Z.subtract(Z_mean, axis=1)
+        Zscale = Z.multiply(np.sqrt(M), axis=1)
+
+        print("Centered and scaled data (Zscale):")
+        print(Zscale.head())
+
+        # Step 3: Perform weighted SVD
+        s, U, V = svdtriplet(Zscale.values, row_w=row_w, ncp=ncp)
+        print("Singular values (s):")
+        print(s)
+        print("Left singular vectors (U):")
+        print(U)
+        print("Right singular vectors (V):")
+        print(V)
+
+        # Step 4: Regularization (Shrinking Eigenvalues)
+        if method.lower() == "em":
+            moyeig = 0
+        else:
+            # Calculate moyeig based on R's imputeMCA logic
+            if len(s) > ncp:
+                moyeig = np.mean(s[ncp:] ** 2)
+                moyeig = min(moyeig * coeff_ridge, s[ncp] ** 2)
+            else:
+                moyeig = 0
+                # Set to 0 when there are no additional singular values
+        eig_shrunk = (s[:ncp] ** 2 - moyeig) / s[:ncp]
+        eig_shrunk = np.maximum(eig_shrunk, 0)  # Ensure non-negative
+        print("Shrunk eigenvalues (eig_shrunk):")
+        print(eig_shrunk)
+
+        # Step 5: Reconstruct the data
+        rec = U @ np.diag(eig_shrunk) @ V.T
+        tab_disj_rec = pd.DataFrame(
+            rec, columns=tab_disj_comp.columns, index=tab_disj_comp.index
+        )  # noqa: E501
+        tab_disj_rec = tab_disj_rec.div(np.sqrt(M), axis=1) + 1
+        tab_disj_rec = tab_disj_rec.multiply(tab_disj_comp_mean, axis=1)
+        print("Reconstructed disjunctive table (tab_disj_rec):")
+        print(tab_disj_rec.head())
+
+        # Step 6: Compute difference and relative change
+        diff = tab_disj_rec - tab_disj_rec_old
+        diff_values = diff.values
+        hidden_values = hidden.values
+        # Zero out observed positions
+        diff_values[~hidden_values] = 0
+        relch = np.sum((diff_values**2) * row_w[:, None])
+        print(f"Relative Change: {relch}\n")
+
+        # Step 7: Update for next iteration
+        tab_disj_rec_old = tab_disj_rec.copy()
+        tab_disj_comp.values[hidden_values] = tab_disj_rec.values[
+            hidden_values
+        ]  # noqa: E501
+
+        # Step 8: Check convergence
+        continue_flag = (relch > threshold) and (nbiter < maxiter)
+
+    # Step 9: Reconstruct categorical data
+    completeObs = find_category(don, tab_disj_comp)
+
+    return {"tab_disj": tab_disj_comp, "completeObs": completeObs}
diff --git a/qolmat/imputations/mimca/mimca.py b/qolmat/imputations/mimca/mimca.py
new file mode 100644
index 00000000..c36c567b
--- /dev/null
+++ b/qolmat/imputations/mimca/mimca.py
@@ -0,0 +1,665 @@
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+
+def moy_p(V, weights):
+    """Compute the weighted mean of a vector, ignoring NaNs.
+
+    Parameters
+    ----------
+    V : array-like
+        Input vector with possible NaN values.
+    weights : array-like
+        Weights corresponding to each element in V.
+
+    Returns
+    -------
+    float
+        Weighted mean of non-NaN elements.
+
+    """
+    mask = ~np.isnan(V)
+    total_weight = np.sum(weights[mask])
+    if total_weight == 0:
+        return 0.0
+    return np.sum(V[mask] * weights[mask]) / total_weight
+
+
+def tab_disjonctif_NA(df) -> pd.DataFrame:
+    """Create a disjunctive table for categorical variables, preserving NaNs.
+
+    Parameters
+    ----------
+    df : DataFrame
+        Input DataFrame with categorical and numeric variables.
+
+    Returns
+    -------
+    DataFrame
+        Disjunctive table with one-hot encoding, preserving NaNs.
+
+    """
+    df_encoded_list = []
+    for col in df.columns:
+        if df[col].dtype.name == "category" or df[col].dtype == object:
+            df[col] = df[col].astype("category")
+            encoded = pd.get_dummies(
+                df[col],
+                prefix=col,
+                prefix_sep="_",
+                dummy_na=False,
+                dtype=float,
+            )
+            categories = df[col].cat.categories.tolist()
+            col_names = [f"{col}_{cat}" for cat in categories]
+            encoded = encoded.reindex(columns=col_names, fill_value=0.0)
+            encoded[df[col].isna()] = np.nan
+            df_encoded_list.append(encoded)
+        else:
+            df_encoded_list.append(df[[col]])
+    df_encoded = pd.concat(df_encoded_list, axis=1)
+    return df_encoded
+
+
+def prodna(data, noNA, rng):
+    """Introduce random missing values into a DataFrame.
+
+    Parameters
+    ----------
+    data : DataFrame
+        Input data.
+    noNA : float
+        Proportion of missing values to introduce.
+    rng : numpy.random.Generator
+        Random number generator.
+
+    Returns
+    -------
+    DataFrame
+        DataFrame with introduced missing values.
+
+    """
+    data = data.copy()
+    n_rows, n_cols = data.shape
+    total_values = n_rows * n_cols
+    n_missing = int(np.floor(total_values * noNA))
+    missing_indices = rng.choice(total_values, n_missing, replace=False)
+    row_indices = missing_indices // n_cols
+    col_indices = missing_indices % n_cols
+    for i in range(n_missing):
+        row = row_indices[i]
+        col = col_indices[i]
+        data.iloc[row, col] = np.nan
+    return data
+
+
+def find_category(df_original, tab_disj):
+    """Reconstruct original categorical variables from disjunctive table.
+
+    Parameters
+    ----------
+    df_original : DataFrame
+        Original DataFrame with categorical variables.
+    tab_disj : DataFrame
+        Disjunctive table after imputation.
+
+    Returns
+    -------
+    DataFrame
+        Reconstructed DataFrame with imputed categorical variables.
+
+    """
+    df_reconstructed = df_original.copy()
+    start_idx = 0
+    for col in df_original.columns:
+        if (
+            df_original[col].dtype.name == "category"
+            or df_original[col].dtype == object
+        ):  # noqa: E501
+            categories = df_original[col].cat.categories.tolist()
+            num_categories = len(categories)
+            sub_tab = tab_disj.iloc[:, start_idx : start_idx + num_categories]
+            max_indices = sub_tab.values.argmax(axis=1)
+            df_reconstructed[col] = [categories[idx] for idx in max_indices]
+            df_reconstructed[col] = df_reconstructed[col].astype("category")
+            start_idx += num_categories
+        else:
+            start_idx += 1
+    return df_reconstructed
+
+
+def imputeMCA(
+    don,
+    ncp=2,
+    method="Regularized",
+    row_w=None,
+    coeff_ridge=1,
+    threshold=1e-6,
+    seed=None,
+    maxiter=1000,
+):
+    """Impute missing values in a dataset using (MCA).
+
+    Parameters
+    ----------
+    don : DataFrame
+        Input dataset with missing values.
+    ncp : int, optional
+        Number of principal components for MCA. Default is 2.
+    method : str, optional
+        Imputation method ('Regularized' or 'EM'). Default is 'Regularized'.
+    row_w : array-like, optional
+        Row weights. If None, uniform weights are applied. Default is None.
+    coeff_ridge : float, optional
+        Regularization coefficient for 'Regularized' MCA. Default is 1.
+    threshold : float, optional
+        Convergence threshold. Default is 1e-6.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+    maxiter : int, optional
+        Maximum number of iterations for the imputation process.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+            - "tab_disj": Disjunctive coded table after imputation.
+            - "completeObs": Complete dataset with missing values imputed.
+
+    """
+    don = pd.DataFrame(don)
+    don = don.copy()
+    for col in don.columns:
+        if (
+            not pd.api.types.is_numeric_dtype(don[col])
+            or don[col].dtype == "bool"
+        ):  # noqa: E501
+            don[col] = don[col].astype("category")
+            new_categories = don[col].cat.categories.astype(str)
+            don[col] = don[col].cat.rename_categories(new_categories)  # noqa: E501
+        else:
+            unique_values = don[col].dropna().unique()
+            if set(unique_values).issubset({0, 1}):
+                don[col] = don[col].astype("category")
+                new_categories = don[col].cat.categories.astype(str)
+                don[col] = don[col].cat.rename_categories(new_categories)  # noqa: E501
+    if row_w is None:
+        row_w = np.ones(len(don)) / len(don)
+    else:
+        row_w = np.array(row_w, dtype=float)
+        row_w /= row_w.sum()
+    tab_disj_NA = tab_disjonctif_NA(don)
+    if ncp == 0:
+        tab_disj_comp_mean = tab_disj_NA.apply(
+            lambda col: moy_p(col.values, row_w)
+        )  # noqa: E501
+        tab_disj_comp = tab_disj_NA.fillna(tab_disj_comp_mean)
+        completeObs = find_category(don, tab_disj_comp)
+        return {"tab_disj": tab_disj_comp, "completeObs": completeObs}
+    tab_disj_comp = tab_disj_NA.copy()
+    hidden = tab_disj_NA.isna()
+    tab_disj_comp.fillna(tab_disj_comp.mean(), inplace=True)
+    tab_disj_rec_old = tab_disj_comp.copy()
+    nbiter = 0
+    continue_flag = True
+    while continue_flag:
+        nbiter += 1
+        M = (
+            tab_disj_comp.apply(lambda col: moy_p(col.values, row_w))
+            / don.shape[1]
+        )  # noqa: E501
+        M = M.replace({0: np.finfo(float).eps})
+        M = M.fillna(np.finfo(float).eps)
+        tab_disj_comp_mean = tab_disj_comp.apply(
+            lambda col: moy_p(col.values, row_w)
+        )  # noqa: E501
+        tab_disj_comp_mean = tab_disj_comp_mean.replace(
+            {0: np.finfo(float).eps}
+        )  # noqa: E501
+        Z = tab_disj_comp.div(tab_disj_comp_mean, axis=1)
+        Z_mean = Z.apply(lambda col: moy_p(col.values, row_w))
+        Z = Z.subtract(Z_mean, axis=1)
+        Zscale = Z.multiply(np.sqrt(M), axis=1)
+        U, s, Vt = np.linalg.svd(Zscale.values, full_matrices=False)
+        V = Vt.T
+        U = U[:, :ncp]
+        V = V[:, :ncp]
+        s = s[:ncp]
+        if method.lower() == "em":
+            moyeig = 0
+        else:
+            if len(s) > ncp:
+                moyeig = np.mean(s[ncp:] ** 2)
+                moyeig = min(moyeig * coeff_ridge, s[ncp - 1] ** 2)
+            else:
+                moyeig = 0
+        eig_shrunk = (s**2 - moyeig) / s
+        eig_shrunk = np.maximum(eig_shrunk, 0)
+        rec = U @ np.diag(eig_shrunk) @ V.T
+        tab_disj_rec = pd.DataFrame(
+            rec, columns=tab_disj_comp.columns, index=tab_disj_comp.index
+        )
+        tab_disj_rec = tab_disj_rec.div(np.sqrt(M), axis=1) + 1
+        tab_disj_rec = tab_disj_rec.multiply(tab_disj_comp_mean, axis=1)
+        diff = tab_disj_rec - tab_disj_rec_old
+        diff_values = diff.values
+        hidden_values = hidden.values
+        diff_values[~hidden_values] = 0
+        relch = np.sum((diff_values**2) * row_w[:, None])
+        tab_disj_rec_old = tab_disj_rec.copy()
+        tab_disj_comp.values[hidden_values] = tab_disj_rec.values[
+            hidden_values
+        ]  # noqa: E501
+        continue_flag = (relch > threshold) and (nbiter < maxiter)
+    completeObs = find_category(don, tab_disj_comp)
+    return {"tab_disj": tab_disj_comp, "completeObs": completeObs}
+
+
+def estim_ncpMCA(
+    don,
+    ncp_min=0,
+    ncp_max=5,
+    method="Regularized",
+    method_cv="Kfold",
+    nbsim=100,
+    pNA=0.05,
+    ind_sup=None,
+    quanti_sup=None,
+    quali_sup=None,
+    threshold=1e-4,
+    verbose=True,
+    seed=None,
+):
+    """Estimate the optimal number of dimensions for MCA using CV.
+
+    Parameters
+    ----------
+    don : DataFrame
+        Input data.
+    ncp_min : int, optional
+        Minimum number of components to test. Default is 0.
+    ncp_max : int, optional
+        Maximum number of components to test. Default is 5.
+    method : str, optional
+        Imputation method ('Regularized' or 'EM'). Default is 'Regularized'.
+    method_cv : str, optional
+        Cross-validation method ('Kfold' or 'loo'). Default is 'Kfold'.
+    nbsim : int, optional
+        Number of simulations for cross-validation. Default is 100.
+    pNA : float, optional
+        Proportion of missing values to simulate. Default is 0.05.
+    ind_sup : array-like, optional
+        Indices of supplementary individuals to exclude from the analysis.
+    quanti_sup : array-like, optional
+        Indices of supplementary quantitative variables to exclude.
+    quali_sup : array-like, optional
+        Indices of supplementary qualitative variables to exclude.
+        Convergence threshold. Default is 1e-4.
+    verbose : bool, optional
+        Whether to print progress. Default is True.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+
+    Returns
+    -------
+    dict
+        Dictionary containing:
+            - 'ncp': Optimal number of dimensions.
+            - 'criterion': List of criterion values dimensions.
+
+    """
+    don = don.copy()
+    if ind_sup is not None:
+        don = don.drop(index=ind_sup)
+    if quanti_sup is not None or quali_sup is not None:
+        cols_to_drop = []
+        if quanti_sup is not None:
+            cols_to_drop.extend(don.columns[quanti_sup])
+        if quali_sup is not None:
+            cols_to_drop.extend(don.columns[quali_sup])
+        don = don.drop(columns=cols_to_drop)
+    method = method.lower()
+    method_cv = method_cv.lower()
+    for col in don.columns:
+        if not pd.api.types.is_categorical_dtype(don[col]):
+            don[col] = don[col].astype("category")
+    vrai_tab = tab_disjonctif_NA(don)
+    criterion = []
+    if seed is not None:
+        rng = np.random.default_rng(seed)
+    else:
+        rng = np.random.default_rng()
+    if method_cv == "kfold":
+        res = np.full((ncp_max - ncp_min + 1, nbsim), np.nan)
+        if verbose:
+            sim_range = tqdm(range(nbsim), desc="Simulations")
+        else:
+            sim_range = range(nbsim)
+        for sim in sim_range:
+            compteur = 0
+            max_attempts = 50
+            while compteur < max_attempts:
+                donNA = prodna(don, pNA, rng)
+                categories_complete = all(
+                    donNA[col].nunique(dropna=True)
+                    == don[col].nunique(dropna=True)  # noqa: E501
+                    for col in don.columns
+                )
+                if categories_complete:
+                    break
+                compteur += 1
+            else:
+                raise ValueError(
+                    "It is too difficult to suppress some cells.\n"
+                    "Maybe several categories by only one individual. "
+                    'You should remove these variables or try with"loo".'
+                )
+            for nbaxes in range(ncp_min, ncp_max + 1):
+                imputed = imputeMCA(
+                    donNA,
+                    ncp=nbaxes,
+                    method=method,
+                    threshold=threshold,
+                    seed=seed,
+                )
+                tab_disj_comp = imputed["tab_disj"]
+                numerator = ((tab_disj_comp - vrai_tab) ** 2).sum().sum()
+                denominator = (
+                    tab_disjonctif_NA(donNA).isna().sum().sum()
+                    - vrai_tab.isna().sum().sum()
+                )  # noqa: E501
+                if denominator == 0:
+                    res[nbaxes - ncp_min, sim] = np.nan
+                else:
+                    res[nbaxes - ncp_min, sim] = numerator / denominator
+        crit = np.nanmean(res, axis=1)
+        if np.all(np.isnan(crit)):
+            raise ValueError(
+                "All simulations resulted in NaN errors. Please check your data and parameters."
+            )  # noqa: E501
+        ncp = int(np.nanargmin(crit) + ncp_min)
+        criterion = crit.tolist()
+        return {"ncp": ncp, "criterion": criterion}
+    elif method_cv == "loo":
+        # LOO cross-validation code (if needed)
+        pass
+    else:
+        raise ValueError("method_cv must be 'kfold' or 'loo'")
+
+
+def imputeMCA_print(
+    don,
+    ncp,
+    method="Regularized",
+    row_w=None,
+    coeff_ridge=1,
+    threshold=1e-6,
+    seed=None,
+    maxiter=1000,
+    verbose=False,
+    print_msg="",
+):
+    """Print progress during MCA imputation.
+
+    Parameters
+    ----------
+    don : DataFrame
+        Input dataset with missing values.
+    ncp : int
+        Number of principal components for MCA.
+    method : str, optional
+        Imputation method ('Regularized' or 'EM'). Default is 'Regularized'.
+    row_w : array-like, optional
+        Row weights. If None, uniform weights are applied. Default is None.
+    coeff_ridge : float, optional
+        Regularization coefficient for 'Regularized' MCA. Default is 1.
+    threshold : float, optional
+        Convergence threshold. Default is 1e-6.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+    maxiter : int, optional
+        Maximum number of iterations for the imputation process.
+    verbose : bool, optional
+        Whether to print progress. Default is False.
+    print_msg : str, optional
+        Message to print during imputation. Default is ''.
+
+    Returns
+    -------
+    dict
+        Result of the MCA imputation.
+
+    """
+    if verbose:
+        print(f"{print_msg}...", end="", flush=True)
+    res = imputeMCA(
+        don=don,
+        ncp=ncp,
+        method=method,
+        row_w=row_w,
+        coeff_ridge=coeff_ridge,
+        threshold=threshold,
+        seed=seed,
+        maxiter=maxiter,
+    )  # noqa: E501
+    if verbose:
+        print("done")
+    return res
+
+
+def normtdc(tab_disj, data_na):
+    """Normalize the disjunctive table to ensure values are between 0 and 1.
+
+    Parameters
+    ----------
+    tab_disj : DataFrame
+        Disjunctive table to normalize.
+    data_na : DataFrame
+        DataFrame with original categorical data.
+
+    Returns
+    -------
+    DataFrame
+        Normalized disjunctive table.
+
+    """
+    tdc = tab_disj.copy()
+    tdc[tdc < 0] = 0
+    tdc[tdc > 1] = 1
+    col_suppr = np.cumsum(
+        [len(col.cat.categories) for _, col in data_na.items()]
+    )  # noqa: E501
+
+    def normalize_row(row, col_suppr):
+        start = 0
+        for end in col_suppr:
+            segment = row[start:end]
+            total = np.sum(segment)
+            if total != 0:
+                row[start:end] = segment / total
+            start = end
+        return row
+
+    tdc = tdc.apply(
+        lambda row: normalize_row(row.values, col_suppr),
+        axis=1,
+        result_type="expand",
+    )  # noqa: E501
+    tdc.columns = tab_disj.columns
+    return tdc
+
+
+def draw(tab_disj, Don, Don_na):
+    """Draw random samples from the normalized disjtable to reconstruct data.
+
+    Parameters
+    ----------
+    tab_disj : DataFrame
+        Normalized disjunctive table.
+    Don : DataFrame
+        Original complete dataset.
+    Don_na : DataFrame
+        Dataset with missing values.
+
+    Returns
+    -------
+    DataFrame
+        Reconstructed dataset with imputed categorical values.
+
+    """
+    Don_res = Don.copy()
+    nbdummy = np.ones(Don.shape[1], dtype=int)
+    is_quali = [
+        i
+        for i, col in enumerate(Don.columns)
+        if not pd.api.types.is_numeric_dtype(Don[col])
+    ]  # noqa: E501
+    nbdummy[is_quali] = [Don.iloc[:, i].nunique() for i in is_quali]
+    vec = np.concatenate(([0], np.cumsum(nbdummy)))
+    for idx, i in enumerate(is_quali):
+        start = vec[idx]
+        end = vec[idx + 1]
+        cols = tab_disj.columns[start:end]
+        probs = tab_disj[cols].values
+        categories = Don.iloc[:, i].cat.categories
+        sampled_indices = []
+        for p in probs:
+            if np.sum(p) > 0:
+                p_normalized = p / np.sum(p)
+                sampled_idx = np.random.choice(len(categories), p=p_normalized)  # noqa: E501
+            else:
+                sampled_idx = np.nan
+            sampled_indices.append(sampled_idx)
+        Don_res.iloc[:, i] = pd.Categorical.from_codes(
+            sampled_indices, categories=categories
+        )  # noqa: E501
+    return Don_res
+
+
+def MIMCA(
+    X,
+    nboot=100,
+    ncp=2,
+    coeff_ridge=1,
+    threshold=1e-6,
+    maxiter=1000,
+    verbose=False,
+):  # noqa: E501
+    """Perform Multiple Imputation with (MIMCA).
+
+    Parameters
+    ----------
+    X : DataFrame
+        Input data with missing values.
+    nboot : int, optional
+        Number of bootstrap samples. Default is 100.
+    ncp : int, optional
+        Number of principal components for MCA. Default is 2.
+    coeff_ridge : float, optional
+        Regularization coefficient for 'Regularized' MCA. Default is 1.
+    threshold : float, optional
+        Convergence threshold. Default is 1e-6.
+    maxiter : int, optional
+        Maximum number of iterations for the imputation process.
+    verbose : bool, optional
+        Whether to print progress. Default is False.
+
+    Returns
+    -------
+    dict
+        Dictionary containing the results of the multiple imputations.
+
+    """
+    import warnings
+
+    X = X.copy()
+    # Convert non-numeric columns to categorical
+    is_quali = [
+        col for col in X.columns if not pd.api.types.is_numeric_dtype(X[col])
+    ]  # noqa: E501
+    X[is_quali] = X[is_quali].apply(lambda col: col.astype("category"))
+    X = X.apply(
+        lambda col: col.cat.remove_unused_categories()
+        if col.dtype.name == "category"
+        else col
+    )  # noqa: E501
+    # Remove variables with only one category
+    OneCat = (
+        X.apply(
+            lambda col: len(col.cat.categories)
+            if col.dtype.name == "category"
+            else np.nan
+        )
+        == 1
+    )  # noqa: E501
+    if OneCat.any():
+        warning_vars = X.columns[OneCat].tolist()
+        warnings.warn(
+            f"The following variables are constant and have been suppressed from the analysis: {', '.join(warning_vars)}"
+        )  # noqa: E501
+        X = X.drop(columns=warning_vars)
+        if X.shape[1] <= 1:
+            raise ValueError(
+                "No sufficient variables have 2 categories or more"
+            )  # noqa: E501
+    n = X.shape[0]
+    # Generate bootstrap weights
+    rng = np.random.default_rng()
+    Boot = rng.integers(low=0, high=n, size=(n, nboot))
+    Weight = np.zeros((n, nboot))
+    for i in range(nboot):
+        counts = np.bincount(Boot[:, i], minlength=n)
+        Weight[:, i] = counts
+    Weight = Weight / Weight.sum(axis=0)
+    # Perform multiple imputations
+    res_imp = []
+    for i in range(nboot):
+        if verbose:
+            print(f"Imputation {i + 1}/{nboot}")
+        weight_i = Weight[:, i]
+        res = imputeMCA_print(
+            don=X,
+            ncp=ncp,
+            coeff_ridge=coeff_ridge,
+            threshold=threshold,  # noqa: E501
+            maxiter=maxiter,
+            row_w=weight_i,
+            verbose=verbose,
+            print_msg=f"Imputation {i + 1}",
+        )  # noqa: E501
+        res_imp.append(res)
+    # Normalize the imputed disjunctive tables
+    tdc_imp = [res["tab_disj"] for res in res_imp]
+    res_comp = [res["completeObs"] for res in res_imp]
+    tdc_norm = [
+        normtdc(tab_disj=tdc, data_na=comp)
+        for tdc, comp in zip(tdc_imp, res_comp)
+    ]  # noqa: E501
+    # Draw the final imputed datasets
+    X_imp = [
+        draw(tab_disj=tdc, Don=comp, Don_na=X)
+        for tdc, comp in zip(tdc_norm, res_comp)
+    ]  # noqa: E501
+    # Compute the final imputed disjunctive table using all data
+    res_imputeMCA = imputeMCA(
+        X,
+        ncp=ncp,
+        coeff_ridge=coeff_ridge,
+        threshold=threshold,
+        maxiter=maxiter,
+    )["tab_disj"]
+    res = {
+        "res_MIs": X_imp,
+        "res_imputeMCA": res_imputeMCA,
+        "call": {
+            "X": X,
+            "nboot": nboot,
+            "ncp": ncp,
+            "coeff_ridge": coeff_ridge,
+            "threshold": threshold,
+            "maxiter": maxiter,
+            "tab_disj_array": np.array([tdc.values for tdc in tdc_imp]),
+        },
+    }
+    return res
diff --git a/qolmat/imputations/preprocessing.py b/qolmat/imputations/preprocessing.py
index 50c54270..12308ffb 100644
--- a/qolmat/imputations/preprocessing.py
+++ b/qolmat/imputations/preprocessing.py
@@ -1,38 +1,39 @@
+"""Script for preprocessing functions."""
+
 import copy
-from typing import Any, Dict, Hashable, List, Optional, Tuple
+from typing import Dict, Hashable, List, Optional, Tuple
+
 import numpy as np
 import pandas as pd
-from sklearn.compose import make_column_selector as selector
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import Pipeline
-from sklearn.ensemble import (
-    HistGradientBoostingRegressor,
-    HistGradientBoostingClassifier,
-)
-from sklearn.compose import ColumnTransformer
+from category_encoders.one_hot import OneHotEncoder
+from numpy.typing import NDArray
 from sklearn.base import (
     BaseEstimator,
     RegressorMixin,
     TransformerMixin,
 )
+from sklearn.compose import ColumnTransformer
+from sklearn.compose import make_column_selector as selector
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 from sklearn.utils.validation import (
-    check_X_y,
     check_array,
     check_is_fitted,
+    check_X_y,
 )
 
-from category_encoders.one_hot import OneHotEncoder
-
-
-from typing_extensions import Self
-from numpy.typing import NDArray
-
+# from typing_extensions import Self
 from qolmat.utils import utils
 
 
 class MixteHGBM(RegressorMixin, BaseEstimator):
-    """
-    A custom scikit-learn estimator implementing a mixed model using
+    """MixteHGBM class.
+
+    This is a custom scikit-learn estimator implementing a mixed model using
     HistGradientBoostingClassifier for string target data and
     HistGradientBoostingRegressor for numeric target data.
     """
@@ -41,19 +42,18 @@ def __init__(self):
         super().__init__()
 
     def set_model_parameters(self, **args_model):
-        """
-        Sets the arguments of the underlying model.
+        """Set the arguments of the underlying model.
 
         Parameters
         ----------
-        **kwargs : dict
+        **args_model : dict
             Additional keyword arguments to be passed to the underlying models.
+
         """
         self.args_model = args_model
 
-    def fit(self, X: NDArray, y: NDArray) -> Self:
-        """
-        Fit the model according to the given training data.
+    def fit(self, X: NDArray, y: NDArray) -> "MixteHGBM":
+        """Fit the model according to the given training data.
 
         Parameters
         ----------
@@ -66,8 +66,11 @@ def fit(self, X: NDArray, y: NDArray) -> Self:
         -------
         self : object
             Returns self.
+
         """
-        X, y = check_X_y(X, y, accept_sparse=True, force_all_finite="allow-nan")
+        X, y = check_X_y(
+            X, y, accept_sparse=True, force_all_finite="allow-nan"
+        )
         self.is_fitted_ = True
         self.n_features_in_ = X.shape[1]
         if hasattr(self, "args_model"):
@@ -85,8 +88,7 @@ def fit(self, X: NDArray, y: NDArray) -> Self:
         return self
 
     def predict(self, X: NDArray) -> NDArray:
-        """
-        Predict using the fitted model.
+        """Predict using the fitted model.
 
         Parameters
         ----------
@@ -97,6 +99,7 @@ def predict(self, X: NDArray) -> NDArray:
         -------
         y_pred : array-like, shape (n_samples,)
             Predicted target values.
+
         """
         X = check_array(X, accept_sparse=True, force_all_finite="allow-nan")
         check_is_fitted(self, "is_fitted_")
@@ -104,26 +107,29 @@ def predict(self, X: NDArray) -> NDArray:
         return y_pred
 
     def _more_tags(self):
+        """Indicate if the class allows inputs with categorical data and nans.
+
+        It modifies the behaviour of the functions checking data.
         """
-        This method indicates that this class allows inputs with categorical data and nans. It
-        modifies the behaviour of the functions checking data.
-        """
-        return {"X_types": ["2darray", "categorical", "string"], "allow_nan": True}
+        return {
+            "X_types": ["2darray", "categorical", "string"],
+            "allow_nan": True,
+        }
 
 
 class BinTransformer(TransformerMixin, BaseEstimator):
-    """
-    Learns the possible values of the provided numerical feature, allowing to transform new values
-    to the closest existing one.
+    """BinTransformer class.
+
+    Learn the possible values of the provided numerical feature,
+    allowing to transform new values to the closest existing one.
     """
 
     def __init__(self, cols: Optional[List] = None):
         super().__init__()
         self.cols = cols
 
-    def fit(self, X: NDArray, y: Optional[NDArray] = None) -> Self:
-        """
-        Fit the BinTransformer to X.
+    def fit(self, X: NDArray, y: Optional[NDArray] = None) -> "BinTransformer":
+        """Fit the BinTransformer to X.
 
         Parameters
         ----------
@@ -138,11 +144,12 @@ def fit(self, X: NDArray, y: Optional[NDArray] = None) -> Self:
         -------
         self : object
             Fitted transformer.
+
         """
         df = utils._validate_input(X)
         self.feature_names_in_ = df.columns
         self.n_features_in_ = len(df.columns)
-        self.dict_df_bins_: Dict[Hashable, pd.DataFrame] = dict()
+        self.dict_df_bins_: Dict[Hashable, pd.DataFrame] = {}
         if self.cols is None:
             cols = df.select_dtypes(include="number").columns
         else:
@@ -156,8 +163,7 @@ def fit(self, X: NDArray, y: Optional[NDArray] = None) -> Self:
         return self
 
     def transform(self, X: NDArray) -> NDArray:
-        """
-        Transform X to existing values learned during fit.
+        """Transform X to existing values learned during fit.
 
         Parameters
         ----------
@@ -168,6 +174,7 @@ def transform(self, X: NDArray) -> NDArray:
         -------
         X_out : ndarray of shape (n_samples,)
             Transformed input.
+
         """
         df = utils._validate_input(X)
         check_is_fitted(self)
@@ -176,7 +183,8 @@ def transform(self, X: NDArray) -> NDArray:
             or df.columns.to_list() != self.feature_names_in_.to_list()
         ):
             raise ValueError(
-                "Feature names in X {df.columns} don't match with expected {feature_names_in_}"
+                f"Feature names in X {df.columns} don't match with "
+                f"expected {self.feature_names_in_}"
             )
         df_out = df.copy()
         for col in df:
@@ -192,8 +200,7 @@ def transform(self, X: NDArray) -> NDArray:
         return df_out
 
     def inverse_transform(self, X: NDArray) -> NDArray:
-        """
-        Transform X to existing values learned during fit.
+        """Transform X to existing values learned during fit.
 
         Parameters
         ----------
@@ -204,37 +211,43 @@ def inverse_transform(self, X: NDArray) -> NDArray:
         -------
         X_out : ndarray of shape (n_samples,)
             Transformed input.
+
         """
         return self.transform(X)
 
     def _more_tags(self):
+        """Indicate if the class allows inputs with categorical data and nans.
+
+        It modifies the behaviour of the functions checking data.
         """
-        This method indicates that this class allows inputs with categorical data and nans. It
-        modifies the behaviour of the functions checking data.
-        """
-        return {"X_types": ["2darray", "categorical", "string"], "allow_nan": True}
+        return {
+            "X_types": ["2darray", "categorical", "string"],
+            "allow_nan": True,
+        }
 
 
 class OneHotEncoderProjector(OneHotEncoder):
-    """
-    Inherits from the class OneHotEncoder imported from category_encoders. The decoding
-    function accepts non boolean values (as it is the case for the sklearn OneHotEncoder). In
-    this case the decoded value corresponds to the largest dummy value.
+    """Class for one-hot encoding of categorical features.
+
+    It inherits from the class OneHotEncoder imported from category_encoders.
+    The decoding function accepts non boolean values (as it is the case for
+    the sklearn OneHotEncoder). In this case the decoded value corresponds to
+    the largest dummy value.
     """
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
     def reverse_dummies(self, X: pd.DataFrame, mapping: Dict) -> pd.DataFrame:
-        """
-        Convert dummy variable into numerical variables
+        """Convert dummy variable into numerical variables.
 
         Parameters
         ----------
         X : DataFrame
+            Input dataframe.
         mapping: list-like
-              Contains mappings of column to be transformed to it's new columns and value
-              represented
+              Mapping of column to be transformed to its
+              new columns and value represented
 
         Returns
         -------
@@ -260,22 +273,55 @@ def reverse_dummies(self, X: pd.DataFrame, mapping: Dict) -> pd.DataFrame:
 
 
 class WrapperTransformer(TransformerMixin, BaseEstimator):
-    """
-    Wraps a transformer with reversible transformers designed to embed the data.
+    """Wrap a transformer.
+
+    Wrapper with reversible transformers designed to embed the data.
     """
 
-    def __init__(self, transformer: TransformerMixin, wrapper: TransformerMixin):
+    def __init__(
+        self, transformer: TransformerMixin, wrapper: TransformerMixin
+    ):
         super().__init__()
         self.transformer = transformer
         self.wrapper = wrapper
 
-    def fit(self, X: NDArray, y: Optional[NDArray] = None) -> Self:
+    def fit(
+        self, X: NDArray, y: Optional[NDArray] = None
+    ) -> "WrapperTransformer":
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : NDArray
+            Input array.
+        y : Optional[NDArray], optional
+            _description_, by default None
+
+        Returns
+        -------
+        Self
+            The object itself.
+
+        """
         X_transformed = copy.deepcopy(X)
         X_transformed = self.wrapper.fit_transform(X_transformed)
         X_transformed = self.transformer.fit(X_transformed)
         return self
 
     def fit_transform(self, X: NDArray) -> NDArray:
+        """Fit the model according to the given training data and transform it.
+
+        Parameters
+        ----------
+        X : NDArray
+            Input array.
+
+        Returns
+        -------
+        NDArray
+            Transformed array.
+
+        """
         X_transformed = copy.deepcopy(X)
         X_transformed = self.wrapper.fit_transform(X_transformed)
         X_transformed = self.transformer.fit_transform(X_transformed)
@@ -283,6 +329,19 @@ def fit_transform(self, X: NDArray) -> NDArray:
         return X_transformed
 
     def transform(self, X: NDArray) -> NDArray:
+        """Transform X.
+
+        Parameters
+        ----------
+        X : NDArray
+            Input array.
+
+        Returns
+        -------
+        NDArray
+            Transformed array.
+
+        """
         X_transformed = copy.deepcopy(X)
         X_transformed = self.wrapper.transform(X_transformed)
         X_transformed = self.transformer.transform(X_transformed)
@@ -293,8 +352,9 @@ def transform(self, X: NDArray) -> NDArray:
 def make_pipeline_mixte_preprocessing(
     scale_numerical: bool = False, avoid_new: bool = False
 ) -> Pipeline:
-    """
-    Create a preprocessing pipeline managing mixed type data by one hot encoding categorical data.
+    """Create a preprocessing pipeline managing mixed type data.
+
+    It does this by one hot encoding categorical data.
 
     Parameters
     ----------
@@ -307,14 +367,19 @@ def make_pipeline_mixte_preprocessing(
     -------
     preprocessor : Pipeline
         Preprocessing pipeline
+
     """
     transformers: List[Tuple] = []
     if scale_numerical:
-        transformers += [("num", StandardScaler(), selector(dtype_include=np.number))]
+        transformers += [
+            ("num", StandardScaler(), selector(dtype_include=np.number))
+        ]
 
     ohe = OneHotEncoder(handle_unknown="ignore", use_cat_names=True)
     transformers += [("cat", ohe, selector(dtype_exclude=np.number))]
-    col_transformer = ColumnTransformer(transformers=transformers, remainder="passthrough")
+    col_transformer = ColumnTransformer(
+        transformers=transformers, remainder="passthrough"
+    )
     col_transformer = col_transformer.set_output(transform="pandas")
     preprocessor = Pipeline(steps=[("col_transformer", col_transformer)])
 
@@ -323,13 +388,19 @@ def make_pipeline_mixte_preprocessing(
     return preprocessor
 
 
-def make_robust_MixteHGB(scale_numerical: bool = False, avoid_new: bool = False) -> Pipeline:
-    """
-    Create a robust pipeline for MixteHGBM by one hot encoding categorical features.
-    This estimator is intended for use in ImputerRegressor to deal with mixed type data.
+def make_robust_MixteHGB(
+    scale_numerical: bool = False, avoid_new: bool = False
+) -> Pipeline:
+    """Create a robust pipeline for MixteHGBM.
 
-    Note that from sklearn 1.4 HistGradientBoosting Natively Supports Categorical DTypes in
-    DataFrames, so that this pipeline is not required anymore.
+    Create a preprocessing pipeline managing mixed type data
+    by one hot encoding categorical features.
+    This estimator is intended for use in ImputerRegressor
+    to deal with mixed type data.
+
+    Note that from sklearn 1.4 HistGradientBoosting Natively Supports
+    Categorical DTypes in DataFrames, so that this pipeline is not
+    required anymore.
 
 
     Parameters
@@ -343,6 +414,7 @@ def make_robust_MixteHGB(scale_numerical: bool = False, avoid_new: bool = False)
     -------
     robust_MixteHGB : object
         A robust pipeline for MixteHGBM.
+
     """
     preprocessor = make_pipeline_mixte_preprocessing(
         scale_numerical=scale_numerical, avoid_new=avoid_new
diff --git a/qolmat/imputations/rpca/rpca.py b/qolmat/imputations/rpca/rpca.py
index 29eeaaf9..a081eae3 100644
--- a/qolmat/imputations/rpca/rpca.py
+++ b/qolmat/imputations/rpca/rpca.py
@@ -1,18 +1,15 @@
+"""Script for the root class of RPCA."""
+
 from __future__ import annotations
 
-from typing import Union, Tuple
-from typing_extensions import Self
+from typing import Union
 
 import numpy as np
-from numpy.typing import NDArray
 from sklearn.base import BaseEstimator, TransformerMixin
 
-from qolmat.utils import utils
-
 
 class RPCA(BaseEstimator, TransformerMixin):
-    """
-    This class is the root class of the RPCA methods.
+    """Root class of the RPCA methods.
 
     Parameters
     ----------
@@ -24,6 +21,7 @@ class RPCA(BaseEstimator, TransformerMixin):
         Tolerance for stopping criteria, by default 1e-6
     verbose: bool
         default `False`
+
     """
 
     def __init__(
diff --git a/qolmat/imputations/rpca/rpca_noisy.py b/qolmat/imputations/rpca/rpca_noisy.py
index 74e68856..ae59ae0a 100644
--- a/qolmat/imputations/rpca/rpca_noisy.py
+++ b/qolmat/imputations/rpca/rpca_noisy.py
@@ -1,13 +1,15 @@
+"""Script for an the noisy RPCA."""
+
 from __future__ import annotations
 
 import warnings
-from typing import Dict, List, Optional, Tuple, TypeVar, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import scipy as scp
+from numpy.typing import NDArray
 from scipy.sparse import dok_matrix, identity
 from scipy.sparse.linalg import spsolve
-from numpy.typing import NDArray
 from sklearn import utils as sku
 
 from qolmat.imputations.rpca import rpca_utils
@@ -16,23 +18,23 @@
 
 
 class RpcaNoisy(RPCA):
-    """
-    This class implements a noisy version of the so-called 'improved RPCA'
+    """Clas for a noisy version of the so-called 'improved RPCA'.
 
     References
     ----------
-    Wang, Xuehui, et al. "An improved robust principal component analysis model for anomalies
-    detection of subway passenger flow."
+    Wang, Xuehui, et al. "An improved robust principal component analysis model
+    for anomalies detection of subway passenger flow."
     Journal of advanced transportation (2018).
 
-    Chen, Yuxin, et al. "Bridging convex and nonconvex optimization in robust PCA: Noise, outliers
-    and missing data."
+    Chen, Yuxin, et al. "Bridging convex and nonconvex optimization
+    in robust PCA: Noise, outliers and missing data."
     The Annals of Statistics 49.5 (2021): 2948-2971.
 
     Parameters
     ----------
     random_state : int, optional
-        The seed of the pseudo random number generator to use, for reproductibility.
+        The seed of the pseudo random number generator to use,
+        for reproductibility.
     rank: Optional[int]
         Upper bound of the rank to be estimated
     mu: Optional[float]
@@ -44,16 +46,19 @@ class RpcaNoisy(RPCA):
     list_periods: Optional[List[int]]
         list of periods, linked to the Toeplitz matrices
     list_etas: Optional[List[float]]
-        list of penalizing parameters for the corresponding period in list_periods
+        list of penalizing parameters for the corresponding period
+        in list_periods
     max_iterations: Optional[int]
-        stopping criteria, maximum number of iterations. By default, the value is set to 10_000
+        stopping criteria, maximum number of iterations.
+        By default, the value is set to 10_000
     tolerance: Optional[float]
-        stoppign critera, minimum difference between 2 consecutive iterations. By default,
-        the value is set to 1e-6
+        stoppign critera, minimum difference between 2 consecutive iterations.
+        By default, the value is set to 1e-6
     norm: Optional[str]
         error norm, can be "L1" or "L2". By default, the value is set to "L2"
     verbose: Optional[bool]
         verbosity level, if False the warnings are silenced
+
     """
 
     def __init__(
@@ -70,7 +75,9 @@ def __init__(
         norm: str = "L2",
         verbose: bool = True,
     ) -> None:
-        super().__init__(max_iterations=max_iterations, tolerance=tolerance, verbose=verbose)
+        super().__init__(
+            max_iterations=max_iterations, tolerance=tolerance, verbose=verbose
+        )
         self.rng = sku.check_random_state(random_state)
         self.rank = rank
         self.mu = mu
@@ -81,8 +88,7 @@ def __init__(
         self.norm = norm
 
     def get_params_scale(self, D: NDArray) -> Dict[str, float]:
-        """
-        Get parameters for scaling in RPCA based on the input data.
+        """Get parameters for scaling in RPCA based on the input data.
 
         Parameters
         ----------
@@ -111,8 +117,7 @@ def get_params_scale(self, D: NDArray) -> Dict[str, float]:
         }
 
     def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
-        """
-        Compute the noisy RPCA with L1 or L2 time penalisation
+        """Compute the noisy RPCA with L1 or L2 time penalisation.
 
         Parameters
         ----------
@@ -127,6 +132,7 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
             Low-rank signal
         A: NDArray
             Anomalies
+
         """
         M, A, _, _ = self.decompose_with_basis(D, Omega)
         return M, A
@@ -134,9 +140,9 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
     def decompose_with_basis(
         self, D: NDArray, Omega: NDArray
     ) -> Tuple[NDArray, NDArray, NDArray, NDArray]:
-        """
-        Compute the noisy RPCA with L1 or L2 time penalisation, and returns the decomposition of
-        the low-rank matrix.
+        """Compute the noisy RPCA with L1 or L2 time penalisation.
+
+        It returns the decomposition of the low-rank matrix.
 
         Parameters
         ----------
@@ -155,6 +161,7 @@ def decompose_with_basis(
             Coefficients of the low-rank matrix in the reduced basis
         Q: NDArray
             Reduced basis of the low-rank matrix
+
         """
         D = utils.linear_interpolation(D)
         self.params_scale = self.get_params_scale(D)
@@ -175,8 +182,9 @@ def decompose_with_basis(
         for period in self.list_periods:
             if not period < n_rows:
                 raise ValueError(
-                    "The periods provided in argument in `list_periods` must smaller "
-                    f"than the number of rows in the matrix but {period} >= {n_rows}!"
+                    "The periods provided in argument in `list_periods` "
+                    "must smaller than the number of rows "
+                    f"in the matrix but {period} >= {n_rows}!"
                 )
 
         M, A, L, Q = self.minimise_loss(
@@ -211,12 +219,12 @@ def minimise_loss(
         tolerance: float = 1e-6,
         norm: str = "L2",
     ) -> Tuple:
-        """
-        Compute the noisy RPCA with a L2 time penalisation.
+        """Compute the noisy RPCA with a L2 time penalisation.
 
-        This function computes the noisy Robust Principal Component Analysis (RPCA) using a L2 time
-        penalisation. It iteratively minimizes a loss function to separate the low-rank and sparse
-        components from the input data matrix.
+        This function computes the noisy Robust Principal Component Analysis
+        (RPCA) using a L2 time penalisation. It iteratively minimizes a loss
+        function to separate the low-rank and sparse components from the
+        input data matrix.
 
         Parameters
         ----------
@@ -231,18 +239,19 @@ def minimise_loss(
         lam : float
             Penalizing parameter for the sparse matrix.
         mu : float, optional
-            Initial stiffness parameter for the constraint on M, L, and Q. Defaults
-            to 1e-2.
+            Initial stiffness parameter for the constraint on M, L, and Q.
+            Defaults to 1e-2.
         list_periods : List[int], optional
             List of periods linked to the Toeplitz matrices. Defaults to [].
         list_etas : List[float], optional
-            List of penalizing parameters for the corresponding periods in list_periods. Defaults
+            List of penalizing parameters for the corresponding periods
+            in list_periods. Defaults
             to [].
         max_iterations : int, optional
             Stopping criteria, maximum number of iterations. Defaults to 10000.
         tolerance : float, optional
-            Stopping criteria, minimum difference between 2 consecutive iterations.
-            Defaults to 1e-6.
+            Stopping criteria, minimum difference between 2
+            consecutive iterations. Defaults to 1e-6.
         norm : str, optional
             Error norm, can be "L1" or "L2". Defaults to "L2".
 
@@ -264,8 +273,8 @@ def minimise_loss(
         ValueError
             If the periods provided in the argument in `list_periods` are not
             smaller than the number of rows in the matrix.
-        """
 
+        """
         rho = 1.1
         n_rows, n_cols = D.shape
 
@@ -288,10 +297,15 @@ def minimise_loss(
         mu_bar = mu * 1e3
 
         # matrices for temporal correlation
-        list_H = [rpca_utils.toeplitz_matrix(period, n_rows) for period in list_periods]
+        list_H = [
+            rpca_utils.toeplitz_matrix(period, n_rows)
+            for period in list_periods
+        ]
         HtH = dok_matrix((n_rows, n_rows))
         for i_period, _ in enumerate(list_periods):
-            HtH += list_etas[i_period] * (list_H[i_period].T @ list_H[i_period])
+            HtH += list_etas[i_period] * (
+                list_H[i_period].T @ list_H[i_period]
+            )
 
         Ir = np.eye(rank)
         In = identity(n_rows)
@@ -335,7 +349,9 @@ def minimise_loss(
             if norm == "L1":
                 for i_period, _ in enumerate(list_periods):
                     eta = list_etas[i_period]
-                    R[i_period] = rpca_utils.soft_thresholding(R[i_period] / mu, eta / mu)
+                    R[i_period] = rpca_utils.soft_thresholding(
+                        R[i_period] / mu, eta / mu
+                    )
 
             mu = min(mu * rho, mu_bar)
 
@@ -364,9 +380,11 @@ def decompose_on_basis(
         Omega: NDArray,
         Q: NDArray,
     ) -> Tuple[NDArray, NDArray]:
-        """
-        Decompose the matrix D with an observation matrix Omega using the noisy RPCA algorithm,
-        with a fixed reduced basis given by the matrix Q. This allows to impute new data without
+        """Decompose the matrix D with an observation matrix Omega.
+
+        It uses the noisy RPCA algorithm,
+        with a fixed reduced basis given by the matrix Q.
+        This allows to impute new data without
         resolving the optimization problem on the whole dataset.
 
         Parameters
@@ -384,6 +402,7 @@ def decompose_on_basis(
             A tuple representing the decomposition of D with:
             - M: low-rank matrix
             - A: sparse matrix
+
         """
         D = utils.linear_interpolation(D)
         params_scale = self.get_params_scale(D)
@@ -434,8 +453,9 @@ def _check_cost_function_minimized(
         tau: float,
         lam: float,
     ):
-        """
-        Check that the functional minimized by the RPCA is smaller at the end than at the
+        """Check cost function.
+
+        The functional minimized by the RPCA is smaller at the end than at the
         beginning.
 
         Parameters
@@ -452,6 +472,7 @@ def _check_cost_function_minimized(
             parameter penalizing the nuclear norm of the low rank part
         lam : float
             parameter penalizing the L1-norm of the anomaly/sparse part
+
         """
         cost_start = self.cost_function(
             D,
@@ -482,8 +503,11 @@ def _check_cost_function_minimized(
 
         if self.verbose and (cost_end > cost_start * (1 + 1e-6)):
             warnings.warn(
-                f"RPCA algorithm may provide bad results. Function {function_str} increased from"
-                f" {cost_start} to {cost_end} instead of decreasing!".format("%.2f")
+                "RPCA algorithm may provide bad results. "
+                f"Function {function_str} increased from"
+                f" {cost_start} to {cost_end} instead of decreasing!".format(
+                    "%.2f"
+                )
             )
 
     @staticmethod
@@ -498,8 +522,7 @@ def cost_function(
         list_etas: List[float] = [],
         norm: str = "L2",
     ):
-        """
-        Estimated cost function for the noisy RPCA algorithm
+        """Estimate cost function for the noisy RPCA algorithm.
 
         Parameters
         ----------
@@ -518,27 +541,34 @@ def cost_function(
         list_periods: Optional[List[int]]
             list of periods, linked to the Toeplitz matrices
         list_etas: Optional[List[float]]
-            list of penalizing parameters for the corresponding period in list_periods
+            list of penalizing parameters for the corresponding period in
+            list_periods
         norm: Optional[str]
-            error norm, can be "L1" or "L2". By default, the value is set to "L2"
+            error norm, can be "L1" or "L2".
+            By default, the value is set to "L2"
 
 
         Returns
         -------
         float
             Value of the cost function minimized by the RPCA
-        """
 
+        """
         temporal_norm: float = 0
         if len(list_etas) > 0:
             # matrices for temporal correlation
-            list_H = [rpca_utils.toeplitz_matrix(period, D.shape[0]) for period in list_periods]
+            list_H = [
+                rpca_utils.toeplitz_matrix(period, D.shape[0])
+                for period in list_periods
+            ]
             if norm == "L1":
                 for eta, H_matrix in zip(list_etas, list_H):
                     temporal_norm += eta * np.sum(np.abs(H_matrix @ M))
             elif norm == "L2":
                 for eta, H_matrix in zip(list_etas, list_H):
-                    temporal_norm += eta * float(np.linalg.norm(H_matrix @ M, "fro"))
+                    temporal_norm += eta * float(
+                        np.linalg.norm(H_matrix @ M, "fro")
+                    )
         anomalies_norm = np.sum(np.abs(A * Omega))
         cost = (
             1 / 2 * ((Omega * (D - M - A)) ** 2).sum()
diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py
index f3b8e751..500605fb 100644
--- a/qolmat/imputations/rpca/rpca_pcp.py
+++ b/qolmat/imputations/rpca/rpca_pcp.py
@@ -1,3 +1,5 @@
+"""Script for the PCP RPCA."""
+
 from __future__ import annotations
 
 import warnings
@@ -13,8 +15,9 @@
 
 
 class RpcaPcp(RPCA):
-    """
-    This class implements the basic RPCA decomposition using Alternating Lagrangian Multipliers.
+    """Class for the basic RPCA decomposition.
+
+    It uses Alternating Lagrangian Multipliers.
 
     References
     ----------
@@ -24,7 +27,8 @@ class RpcaPcp(RPCA):
     Parameters
     ----------
     random_state : int, optional
-        The seed of the pseudo random number generator to use, for reproductibility.
+        The seed of the pseudo random number generator to use,
+        for reproductibility.
     period: Optional[int]
         number of rows of the reshaped matrix if the signal is a 1D-array
     rank: Optional[int]
@@ -34,12 +38,14 @@ class RpcaPcp(RPCA):
     lam: Optional[float]
         penalizing parameter for the sparse matrix
     max_iterations: Optional[int]
-        stopping criteria, maximum number of iterations. By default, the value is set to 10_000
+        stopping criteria, maximum number of iterations.
+        By default, the value is set to 10_000
     tolerance: Optional[float]
-        stoppign critera, minimum difference between 2 consecutive iterations. By default,
-        the value is set to 1e-6
+        stoppign critera, minimum difference between 2 consecutive iterations.
+        By default, the value is set to 1e-6
     verbose: Optional[bool]
         verbosity level, if False the warnings are silenced
+
     """
 
     def __init__(
@@ -51,14 +57,15 @@ def __init__(
         tolerance: float = 1e-6,
         verbose: bool = True,
     ) -> None:
-        super().__init__(max_iterations=max_iterations, tolerance=tolerance, verbose=verbose)
+        super().__init__(
+            max_iterations=max_iterations, tolerance=tolerance, verbose=verbose
+        )
         self.rng = sku.check_random_state(random_state)
         self.mu = mu
         self.lam = lam
 
     def get_params_scale(self, D: NDArray):
-        """
-        Get parameters for scaling in RPCA based on the input data.
+        """Get parameters for scaling in RPCA based on the input data.
 
         Parameters
         ----------
@@ -81,8 +88,9 @@ def get_params_scale(self, D: NDArray):
         return dict_params
 
     def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
-        """
-        Estimate the relevant parameters then compute the PCP RPCA decomposition, using the
+        """Estimate the relevant parameters.
+
+        It computes the PCP RPCA decomposition, using the
         Augumented Largrangian Multiplier (ALM)
 
         Parameters
@@ -98,6 +106,7 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
             Low-rank signal
         A: NDArray
             Anomalies
+
         """
         D = utils.linear_interpolation(D)
         if np.all(D == 0):
@@ -116,7 +125,6 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
 
         M: NDArray = D - A
         for iteration in range(self.max_iterations):
-
             M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
             A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
             A[~Omega] = (D - M)[~Omega]
@@ -141,7 +149,9 @@ def _check_cost_function_minimized(
         Omega: NDArray,
         lam: float,
     ):
-        """Check that the functional minimized by the RPCA
+        """Check that the functional minimized by the RPCA.
+
+        Check that the functional minimized by the RPCA
         is smaller at the end than at the beginning
 
         Parameters
@@ -156,12 +166,16 @@ def _check_cost_function_minimized(
             boolean matrix indicating the observed values
         lam : float
             parameter penalizing the L1-norm of the anomaly/sparse part
+
         """
         cost_start = np.linalg.norm(observations, "nuc")
-        cost_end = np.linalg.norm(low_rank, "nuc") + lam * np.sum(Omega * np.abs(anomalies))
+        cost_end = np.linalg.norm(low_rank, "nuc") + lam * np.sum(
+            Omega * np.abs(anomalies)
+        )
         if self.verbose and round(cost_start, 4) - round(cost_end, 4) <= -1e-2:
             function_str = "||D||_* + lam ||A||_1"
             warnings.warn(
-                f"RPCA algorithm may provide bad results. Function {function_str} increased from"
-                f" {cost_start} to {cost_end} instead of decreasing!"
+                "RPCA algorithm may provide bad results. "
+                f"Function {function_str} increased from {cost_start} "
+                f"to {cost_end} instead of decreasing!"
             )
diff --git a/qolmat/imputations/rpca/rpca_utils.py b/qolmat/imputations/rpca/rpca_utils.py
index 9e6c8945..0d3b6d5f 100644
--- a/qolmat/imputations/rpca/rpca_utils.py
+++ b/qolmat/imputations/rpca/rpca_utils.py
@@ -1,12 +1,7 @@
-"""
-Modular utility functions for RPCA
-"""
+"""Modular utility functions for RPCA."""
 
-from typing import Tuple
 import numpy as np
 from numpy.typing import NDArray
-import scipy
-from scipy.linalg import toeplitz
 from scipy import sparse as sps
 
 
@@ -14,8 +9,7 @@ def approx_rank(
     M: NDArray,
     threshold: float = 0.95,
 ) -> int:
-    """
-    Estimate a bound on the rank of an array by SVD.
+    """Estimate a bound on the rank of an array by SVD.
 
     Parameters
     ----------
@@ -45,8 +39,7 @@ def soft_thresholding(
     X: NDArray,
     threshold: float,
 ) -> NDArray:
-    """
-    Shrinkage operator (i.e. soft thresholding) on the elements of X.
+    """Shrinkage operator (i.e. soft thresholding) on the elements of X.
 
     Parameters
     ----------
@@ -59,13 +52,13 @@ def soft_thresholding(
     -------
     NDArray
         Array V such that V = sign(X) * max(abs(X - threshold,0)
+
     """
     return np.sign(X) * np.maximum(np.abs(X) - threshold, 0)
 
 
 def svd_thresholding(X: NDArray, threshold: float) -> NDArray:
-    """
-    Apply the shrinkage operator to the singular values obtained from the SVD of X.
+    """Apply shrinkage to the singular values from X's SVD.
 
     Parameters
     ----------
@@ -81,6 +74,7 @@ def svd_thresholding(X: NDArray, threshold: float) -> NDArray:
             U is the array of left singular vectors of X
             V is the array of the right singular vectors of X
             s is the array of the singular values as a diagonal array
+
     """
     U, s, Vh = np.linalg.svd(X, full_matrices=False)
     s = soft_thresholding(s, threshold)
@@ -88,8 +82,7 @@ def svd_thresholding(X: NDArray, threshold: float) -> NDArray:
 
 
 def l1_norm(M: NDArray) -> float:
-    """
-    L1 norm of an array
+    """Compute the L1 norm of an array.
 
     Parameters
     ----------
@@ -100,13 +93,15 @@ def l1_norm(M: NDArray) -> float:
     -------
     float
         L1 norm of M
+
     """
     return np.sum(np.abs(M))
 
 
-def toeplitz_matrix(T: int, dimension: int) -> NDArray:
-    """
-    Create a sparse Toeplitz square matrix H to take into account temporal correlations in the RPCA
+def toeplitz_matrix(T: int, dimension: int) -> sps.spmatrix:
+    """Create a sparse Toeplitz square matrix H.
+
+    It is useful to take into account temporal correlations in the RPCA
     H=Toeplitz(0,1,-1), in which the central diagonal is defined as ones and
     the T upper diagonal is defined as minus ones.
 
@@ -121,11 +116,13 @@ def toeplitz_matrix(T: int, dimension: int) -> NDArray:
     -------
     NDArray
         Sparse Toeplitz matrix using scipy format
-    """
 
+    """
     n_lags = dimension - T
     diagonals = [np.ones(n_lags), -np.ones(n_lags)]
-    H_top = sps.diags(diagonals, offsets=[0, T], shape=(n_lags, dimension), format="csr")
+    H_top = sps.diags(
+        diagonals, offsets=[0, T], shape=(n_lags, dimension), format="csr"
+    )
     H = sps.dok_matrix((dimension, dimension))
     H[:n_lags] = H_top
     return H
diff --git a/qolmat/imputations/softimpute.py b/qolmat/imputations/softimpute.py
index 5d04b39b..72d3a8c4 100644
--- a/qolmat/imputations/softimpute.py
+++ b/qolmat/imputations/softimpute.py
@@ -1,19 +1,22 @@
+"""Script for SoftImpute class."""
+
 from __future__ import annotations
 
-from typing import Optional, Tuple, Union
 import warnings
+from typing import Optional, Tuple, Union
 
 import numpy as np
 from numpy.typing import NDArray
 from sklearn import utils as sku
 from sklearn.base import BaseEstimator, TransformerMixin
 
-from qolmat.utils import utils
 from qolmat.imputations.rpca import rpca_utils
+from qolmat.utils import utils
 
 
 class SoftImpute(BaseEstimator, TransformerMixin):
-    """
+    """Class for the Rank Restricted Soft SVD algorithm.
+
     This class implements the Rank Restricted Soft SVD algorithm presented in
     Hastie, Trevor, et al. "Matrix completion and low-rank SVD
     via fast alternating least squares." The Journal of Machine Learning
@@ -36,7 +39,8 @@ class SoftImpute(BaseEstimator, TransformerMixin):
     max_iterations : int
         Maximum number of iterations
     random_state : int, optional
-        The seed of the pseudo random number generator to use, for reproductibility
+        The seed of the pseudo random number generator to use,
+        for reproductibility
     verbose : bool
         flag for verbosity
 
@@ -44,7 +48,9 @@ class SoftImpute(BaseEstimator, TransformerMixin):
     --------
     >>> import numpy as np
     >>> from qolmat.imputations.softimpute import SoftImpute
-    >>> D = np.array([[1, 2, np.nan, 4], [1, 5, 3, np.nan], [4, 2, 3, 2], [1, 1, 5, 4]])
+    >>> D = np.array(
+    ...     [[1, 2, np.nan, 4], [1, 5, 3, np.nan], [4, 2, 3, 2], [1, 1, 5, 4]]
+    ... )
     >>> Omega = ~np.isnan(D)
     >>> M, A = SoftImpute(random_state=11).decompose(D, Omega)
     >>> print(M + A)
@@ -52,6 +58,7 @@ class SoftImpute(BaseEstimator, TransformerMixin):
      [1.         5.         3.         0.87217939]
      [4.         2.         3.         2.        ]
      [1.         1.         5.         4.        ]]
+
     """
 
     def __init__(
@@ -73,8 +80,7 @@ def __init__(
         self.verbose = verbose
 
     def get_params_scale(self, X: NDArray):
-        """
-        Get parameters for scaling in Soft Impute based on the input data.
+        """Get parameters for scaling in Soft Impute based on the input data.
 
         Parameters
         ----------
@@ -98,8 +104,7 @@ def get_params_scale(self, X: NDArray):
         return dict_params
 
     def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
-        """
-        Compute the Soft Impute decomposition
+        """Compute the Soft Impute decomposition.
 
         Parameters
         ----------
@@ -114,11 +119,13 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
             Low-rank signal
         A: NDArray
             Anomalies
+
         """
         params_scale = self.get_params_scale(X)
         rank = params_scale["rank"] if self.rank is None else self.rank
         tau = params_scale["tau"] if self.tau is None else self.tau
-        assert tau > 0
+        if tau <= 0:
+            raise ValueError(f"Parameter tau has negative value: {tau}")
 
         # Step 1 : Initializing
         n, m = X.shape
@@ -138,7 +145,9 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
 
             # Step 2 : Upate on B
             D2_invreg = (D**2 + tau) ** (-1)
-            Btilde = ((U * D).T @ np.where(Omega, X - A @ B.T, 0) + (B * D**2).T).T
+            Btilde = (
+                (U * D).T @ np.where(Omega, X - A @ B.T, 0) + (B * D**2).T
+            ).T
             Btilde = Btilde * D2_invreg
 
             Utilde, D2tilde, _ = np.linalg.svd(Btilde * D, full_matrices=False)
@@ -148,7 +157,9 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
 
             # Step 3 : Upate on A
             D2_invreg = (D**2 + tau) ** (-1)
-            Atilde = ((V * D).T @ np.where(Omega, X - A @ B.T, 0).T + (A * D**2).T).T
+            Atilde = (
+                (V * D).T @ np.where(Omega, X - A @ B.T, 0).T + (A * D**2).T
+            ).T
             Atilde = Atilde * D2_invreg
 
             Utilde, D2tilde, _ = np.linalg.svd(Atilde * D, full_matrices=False)
@@ -162,7 +173,8 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
                 print(f"Iteration {iter_}: ratio = {round(ratio, 4)}")
                 if ratio < self.tolerance:
                     print(
-                        f"Convergence reached at iteration {iter_} with ratio = {round(ratio, 4)}"
+                        f"Convergence reached at iteration {iter_} "
+                        f"with ratio = {round(ratio, 4)}"
                     )
                     break
 
@@ -178,7 +190,9 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
         if self.verbose and (cost_end > cost_start + 1e-9):
             warnings.warn(
                 f"Convergence failed: cost function increased from"
-                f" {cost_start} to {cost_end} instead of decreasing!".format("%.2f")
+                f" {cost_start} to {cost_end} instead of decreasing!".format(
+                    "%.2f"
+                )
             )
 
         return M, A
@@ -192,7 +206,9 @@ def _check_convergence(
         D: NDArray,
         V: NDArray,
     ) -> float:
-        """Given a pair of iterates (U_old, D_old, V_old) and (U, D, V),
+        """Check if the convergence has been reached.
+
+        Given a pair of iterates (U_old, D_old, V_old) and (U, D, V),
         it computes the relative change in Frobenius norm given by
         || U_old @  D_old^2 @ V_old.T - U @  D^2 @ V.T ||_F^2
         / || U_old @  D_old^2 @ V_old.T ||_F^2
@@ -216,6 +232,7 @@ def _check_convergence(
         -------
         float
             relative change
+
         """
         if any(arg is None for arg in (U_old, D_old, V_old, U, D, V)):
             raise ValueError("One or more arguments are None.")
@@ -261,8 +278,7 @@ def cost_function(
         Omega: NDArray,
         tau: float,
     ):
-        """
-        Compute cost function for different RPCA algorithm
+        """Compute cost function for different RPCA algorithm.
 
         Parameters
         ----------
@@ -281,6 +297,7 @@ def cost_function(
         -------
         float
             Value of the cost function minimized by the Soft Impute algorithm
+
         """
         norm_frobenius = np.sum(np.where(Omega, X - M, 0) ** 2)
         norm_nuclear = np.linalg.norm(M, "nuc")
diff --git a/qolmat/utils/algebra.py b/qolmat/utils/algebra.py
index 9e2af1a6..a38fba62 100644
--- a/qolmat/utils/algebra.py
+++ b/qolmat/utils/algebra.py
@@ -1,6 +1,10 @@
+"""Utils algebra functions for qolmat package."""
+
+from typing import Optional, Tuple
+
 import numpy as np
 import scipy
-from numpy.typing import NDArray, ArrayLike
+from numpy.typing import NDArray
 
 
 def frechet_distance_exact(
@@ -9,13 +13,18 @@ def frechet_distance_exact(
     means2: NDArray,
     cov2: NDArray,
 ) -> float:
-    """Compute the Fréchet distance between two dataframes df1 and df2
-    Frechet_distance = || mu_1 - mu_2 ||_2^2 + Tr(Sigma_1 + Sigma_2 - 2(Sigma_1 . Sigma_2)^(1/2))
-    It is normalized, df1 and df2 are first scaled by a factor (std(df1) + std(df2)) / 2
+    """Compute the Fréchet distance between two dataframes df1 and df2.
+
+    Frechet_distance = || mu_1 - mu_2 ||_2^2
+        + Tr(Sigma_1 + Sigma_2 - 2(Sigma_1 . Sigma_2)^(1/2))
+    It is normalized, df1 and df2 are first scaled
+    by a factor (std(df1) + std(df2)) / 2
     and then centered around (mean(df1) + mean(df2)) / 2
-    The result is divided by the number of samples to get an homogeneous result.
-    Based on: Dowson, D. C., and BV666017 Landau. "The Fréchet distance between multivariate normal
-    distributions." Journal of multivariate analysis 12.3 (1982): 450-455.
+    The result is divided by the number of samples to get
+    an homogeneous result.
+    Based on: Dowson, D. C., and BV666017 Landau.
+        "The Fréchet distance between multivariate normal distributions."
+        Journal of multivariate analysis 12.3 (1982): 450-455.
 
     Parameters
     ----------
@@ -32,9 +41,14 @@ def frechet_distance_exact(
     -------
     float
         Frechet distance
+
     """
     n = len(means1)
-    if (means2.shape != (n,)) or (cov1.shape != (n, n)) or (cov2.shape != (n, n)):
+    if (
+        (means2.shape != (n,))
+        or (cov1.shape != (n, n))
+        or (cov2.shape != (n, n))
+    ):
         raise ValueError("Inputs have to be of same dimensions.")
 
     ssdiff = np.sum((means1 - means2) ** 2.0)
@@ -52,8 +66,9 @@ def frechet_distance_exact(
 def kl_divergence_gaussian_exact(
     means1: NDArray, cov1: NDArray, means2: NDArray, cov2: NDArray
 ) -> float:
-    """
-    Exact Kullback-Leibler divergence computed between two multivariate normal distributions
+    """Compute the exact Kullback-Leibler divergence.
+
+    This is computed between two multivariate normal distributions
     Based on https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
 
     Parameters
@@ -66,10 +81,12 @@ def kl_divergence_gaussian_exact(
         Mean of the second distribution
     cov2: NDArray
         Covariance matrx of the second distribution
+
     Returns
     -------
     float
         Kulback-Leibler divergence
+
     """
     n_variables = len(means1)
     L1, _ = scipy.linalg.cho_factor(cov1)
@@ -81,3 +98,53 @@ def kl_divergence_gaussian_exact(
     term_diag_L = 2 * np.sum(np.log(np.diagonal(L2) / np.diagonal(L1)))
     div_kl = 0.5 * (norm_M - n_variables + norm_y + term_diag_L)
     return div_kl
+
+
+def svdtriplet(X, row_w=None, ncp=np.inf):
+    """Perform weighted SVD on matrix X with row weights.
+
+    Parameters
+    ----------
+    X : ndarray
+        Data matrix of shape (n_samples, n_features).
+    row_w : array-like, optional
+        Row weights. If None, uniform weights are assumed. Default is None.
+    ncp : int
+        Number of principal components to retain. Default is infinity.
+
+    Returns
+    -------
+    s : ndarray
+        Singular values.
+    U : ndarray
+        Left singular vectors.
+    V : ndarray
+        Right singular vectors.
+
+    """
+    if not isinstance(X, np.ndarray):
+        X = np.array(X, dtype=float)
+    else:
+        X = X.astype(float)
+    if row_w is None:
+        row_w = np.ones(X.shape[0]) / X.shape[0]
+    else:
+        row_w = np.array(row_w, dtype=float)
+        row_w /= row_w.sum()
+    ncp = int(min(ncp, X.shape[0] - 1, X.shape[1]))
+    # Apply weights to rows
+    X_weighted = X * np.sqrt(row_w[:, None])
+    # Perform SVD
+    U, s, Vt = np.linalg.svd(X_weighted, full_matrices=False)
+    V = Vt.T
+    U = U[:, :ncp]
+    V = V[:, :ncp]
+    s = s[:ncp]
+    # Adjust signs to ensure consistency
+    mult = np.sign(np.sum(V, axis=0))
+    mult[mult == 0] = 1
+    U *= mult
+    V *= mult
+    # Rescale U by the square root of row weights
+    U /= np.sqrt(row_w[:, None])
+    return s, U, V
diff --git a/qolmat/utils/data.py b/qolmat/utils/data.py
index 2adecf4e..0ea1823b 100644
--- a/qolmat/utils/data.py
+++ b/qolmat/utils/data.py
@@ -1,9 +1,11 @@
+"""Utils data for qolmat package."""
+
 import os
 import sys
 import zipfile
 from datetime import datetime
 from math import pi
-from typing import List, Tuple, Union
+from typing import Dict, List, Tuple, Union
 from urllib import request
 
 import numpy as np
@@ -16,28 +18,33 @@
 
 
 def read_csv_local(data_file_name: str, **kwargs) -> pd.DataFrame:
-    """Load csv files
+    """Load csv files.
 
     Parameters
     ----------
     data_file_name : str
-        Filename. Has to be "beijing" or "conductors"
-    kwargs : dict
+        Filename. Has to be "beijing" or "conductors".
+    **kwargs : dict, optional
+        Additional keyword arguments passed to `pandas.read_csv`.
 
     Returns
     -------
     df : pd.DataFrame
         dataframe
+
     """
-    df = pd.read_csv(os.path.join(ROOT_DIR, "data", f"{data_file_name}.csv"), **kwargs)
+    df = pd.read_csv(
+        os.path.join(ROOT_DIR, "data", f"{data_file_name}.csv"), **kwargs
+    )
     return df
 
 
 def download_data_from_zip(
     zipname: str, urllink: str, datapath: str = "data/"
 ) -> List[pd.DataFrame]:
-    """
-    Downloads and extracts ZIP files from a URL, then loads DataFrames from CSV files.
+    """Download and extracts ZIP files from a URL.
+
+    It also loads DataFrames from CSV files.
 
     Parameters
     ----------
@@ -52,7 +59,9 @@ def download_data_from_zip(
     Returns
     -------
     List[pd.DataFrame]
-        A list of DataFrames loaded from the CSV files within the extracted directory.
+        A list of DataFrames loaded from the CSV files
+        within the extracted directory.
+
     """
     path_zip = os.path.join(datapath, zipname)
     path_zip_ext = path_zip + ".zip"
@@ -68,9 +77,11 @@ def download_data_from_zip(
 
 
 def get_dataframes_in_folder(path: str, extension: str) -> List[pd.DataFrame]:
-    """
-    Loads all dataframes from files with a specified extension within a directory, including
-    subdirectories. Special handling for '.tsf' files which are converted and immediately returned.
+    """Load all dataframes from files.
+
+    Loads all files with a specified extension within a directory, including
+    subdirectories. Special handling for '.tsf' files which are converted
+    and immediately returned.
 
     Parameters
     ----------
@@ -82,8 +93,10 @@ def get_dataframes_in_folder(path: str, extension: str) -> List[pd.DataFrame]:
     Returns
     -------
     List[pd.DataFrame]
-        A list of pandas DataFrames loaded from the files matching the extension.
-        If a '.tsf' file is found, its converted DataFrame is returned immediately.
+        A list of pandas DataFrames loaded from the files
+        matching the extension. If a '.tsf' file is found,
+        its converted DataFrame is returned immediately.
+
     """
     list_df = []
     for folder, _, files in os.walk(path):
@@ -91,7 +104,9 @@ def get_dataframes_in_folder(path: str, extension: str) -> List[pd.DataFrame]:
             if extension in file:
                 list_df.append(pd.read_csv(os.path.join(folder, file)))
             if ".tsf" in file:
-                loaded_data = convert_tsf_to_dataframe(os.path.join(folder, file))
+                loaded_data = convert_tsf_to_dataframe(
+                    os.path.join(folder, file)
+                )
                 return [loaded_data]
     return list_df
 
@@ -103,8 +118,7 @@ def generate_artificial_ts(
     ratio_anomalies: float,
     amp_noise: float,
 ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    """
-    Generates time series data, anomalies, and noise based on given parameters.
+    """Generate TS data, anomalies, and noise based on given parameters.
 
     Parameters
     ----------
@@ -125,8 +139,8 @@ def generate_artificial_ts(
         Time series data with sine waves (X).
         Anomaly data with specified amplitudes at random positions (A).
         Gaussian noise added to the time series (E).
-    """
 
+    """
     mesh = np.arange(n_samples)
     X = np.ones(n_samples)
     for p in periods:
@@ -135,7 +149,9 @@ def generate_artificial_ts(
     n_anomalies = int(n_samples * ratio_anomalies)
     anomalies = np.random.standard_exponential(size=n_anomalies)
     anomalies *= amp_anomalies * np.random.choice([-1, 1], size=n_anomalies)
-    ind_anomalies = np.random.choice(range(n_samples), size=n_anomalies, replace=False)
+    ind_anomalies = np.random.choice(
+        range(n_samples), size=n_anomalies, replace=False
+    )
     A = np.zeros(n_samples)
     A[ind_anomalies] = anomalies
 
@@ -148,21 +164,23 @@ def get_data(
     datapath: str = "data/",
     n_groups_max: int = sys.maxsize,
 ) -> pd.DataFrame:
-    """
-    Download or generate data
+    """Download or generate data.
 
     Parameters
     ----------
+    name_data: str, optional
+        name of the file, by default "Beijing"
     datapath : str, optional
         data path, by default "data/"
-    download : bool, optional
-        if True: download a public dataset, if False: generate random univariate time series, by
-        default True
+    n_groups_max : int, optional
+        max number of groups, by default sys.maxsize.
+        Only used if name_data == "SNCF"
 
     Returns
     -------
     pd.DataFrame
         requested data
+
     """
     url_zenodo = "https://zenodo.org/record/"
     if name_data == "Beijing":
@@ -175,10 +193,13 @@ def get_data(
         df = read_csv_local("conductors")
         return df
     elif name_data == "Titanic":
-        path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/"
+        path = "https://gist.githubusercontent.com/"
+        "fyyying/4aa5b471860321d7b47fd881898162b7/raw/"
         "6907bb3a38bfbb6fccf3a8b1edfb90e39714d14f/titanic_dataset.csv"
         df = pd.read_csv(path)
-        df = df[["Survived", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]]
+        df = df[
+            ["Survived", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
+        ]
         df["Age"] = pd.to_numeric(df["Age"], errors="coerce")
         df["Fare"] = pd.to_numeric(df["Fare"], errors="coerce")
         return df
@@ -194,7 +215,9 @@ def get_data(
             n_samples, periods, amp_anomalies, ratio_anomalies, amp_noise
         )
         signal = X + A + E
-        df = pd.DataFrame({"signal": signal, "index": range(n_samples), "station": city})
+        df = pd.DataFrame(
+            {"signal": signal, "index": range(n_samples), "station": city}
+        )
         df.set_index(["station", "index"], inplace=True)
 
         df["X"] = X
@@ -206,7 +229,9 @@ def get_data(
         df = pd.read_parquet(path_file)
         sizes_stations = df.groupby("station")["val_in"].mean().sort_values()
         n_groups_max = min(len(sizes_stations), n_groups_max)
-        stations = sizes_stations.index.get_level_values("station").unique()[-n_groups_max:]
+        stations = sizes_stations.index.get_level_values("station").unique()[
+            -n_groups_max:
+        ]
         df = df.loc[stations]
         return df
     elif name_data == "Beijing_online":
@@ -227,20 +252,30 @@ def get_data(
         df = pd.read_csv(csv_url, index_col=0)
         return df
     elif name_data == "Monach_weather":
-        urllink = os.path.join(url_zenodo, "4654822/files/weather_dataset.zip?download=1")
+        urllink = os.path.join(
+            url_zenodo, "4654822/files/weather_dataset.zip?download=1"
+        )
         zipname = "weather_dataset"
-        list_loaded_data = download_data_from_zip(zipname, urllink, datapath=datapath)
+        list_loaded_data = download_data_from_zip(
+            zipname, urllink, datapath=datapath
+        )
         loaded_data = list_loaded_data[0]
         df_list: List[pd.DataFrame] = []
         for k in range(len(loaded_data)):
             values = list(loaded_data["series_value"][k])
             freq = "1D"
             time_index = pd.date_range(
-                start=pd.Timestamp("01/01/2010"), periods=len(values), freq=freq
+                start=pd.Timestamp("01/01/2010"),
+                periods=len(values),
+                freq=freq,
             )
             df_list = df_list + [
                 pd.DataFrame(
-                    {loaded_data.series_name[k] + " " + loaded_data.series_type[k]: values},
+                    {
+                        loaded_data.series_name[k]
+                        + " "
+                        + loaded_data.series_type[k]: values
+                    },
                     index=time_index,
                 )
             ]
@@ -254,18 +289,26 @@ def get_data(
             "4659727/files/australian_electricity_demand_dataset.zip?download=1",
         )
         zipname = "australian_electricity_demand_dataset"
-        list_loaded_data = download_data_from_zip(zipname, urllink, datapath=datapath)
+        list_loaded_data = download_data_from_zip(
+            zipname, urllink, datapath=datapath
+        )
         loaded_data = list_loaded_data[0]
         df_list = []
         for k in range(len(loaded_data)):
             values = list(loaded_data["series_value"][k])
             freq = "30min"
             time_index = pd.date_range(
-                start=loaded_data.start_timestamp[k], periods=len(values), freq=freq
+                start=loaded_data.start_timestamp[k],
+                periods=len(values),
+                freq=freq,
             )
             df_list = df_list + [
                 pd.DataFrame(
-                    {loaded_data.series_name[k] + " " + loaded_data.state[k]: values},
+                    {
+                        loaded_data.series_name[k]
+                        + " "
+                        + loaded_data.state[k]: values
+                    },
                     index=time_index,
                 )
             ]
@@ -278,7 +321,7 @@ def get_data(
 
 
 def preprocess_data_beijing(df: pd.DataFrame) -> pd.DataFrame:
-    """Preprocess data from the "Beijing" datset
+    """Preprocess data from the "Beijing" datset.
 
     Parameters
     ----------
@@ -289,25 +332,39 @@ def preprocess_data_beijing(df: pd.DataFrame) -> pd.DataFrame:
     -------
     pd.DataFrame
         preprocessed dataframe
+
     """
     df["datetime"] = pd.to_datetime(df[["year", "month", "day", "hour"]])
     df["station"] = "Beijing"
     df.set_index(["station", "datetime"], inplace=True)
     df.drop(
-        columns=["year", "month", "day", "hour", "No", "cbwd", "Iws", "Is", "Ir"],
+        columns=[
+            "year",
+            "month",
+            "day",
+            "hour",
+            "No",
+            "cbwd",
+            "Iws",
+            "Is",
+            "Ir",
+        ],
         inplace=True,
     )
     df.sort_index(inplace=True)
     df = df.groupby(
-        ["station", df.index.get_level_values("datetime").floor("d")], group_keys=False
+        ["station", df.index.get_level_values("datetime").floor("d")],
+        group_keys=False,
     ).mean()
     return df
 
 
-def add_holes(df: pd.DataFrame, ratio_masked: float, mean_size: int) -> pd.DataFrame:
-    """
-    Creates holes in a dataset with no missing value, starting from `df`. Only used in the
-    documentation to design examples.
+def add_holes(
+    df: pd.DataFrame, ratio_masked: float, mean_size: int
+) -> pd.DataFrame:
+    """Create holes in a dataset with no missing value, starting from `df`.
+
+    Only used in the documentation to design examples.
 
     Parameters
     ----------
@@ -319,10 +376,12 @@ def add_holes(df: pd.DataFrame, ratio_masked: float, mean_size: int) -> pd.DataF
 
     ratio_masked : float
         Targeted global proportion of nans added in the returned dataset
+
     Returns
     -------
     pd.DataFrame
         dataframe with missing values
+
     """
     groups = df.index.names.difference(["datetime", "date", "index", None])
     if groups != []:
@@ -334,10 +393,16 @@ def add_holes(df: pd.DataFrame, ratio_masked: float, mean_size: int) -> pd.DataF
             1, ratio_masked=ratio_masked, subset=df.columns
         )
 
-    generator.dict_probas_out = {column: 1 / mean_size for column in df.columns}
-    generator.dict_ratios = {column: 1 / len(df.columns) for column in df.columns}
+    generator.dict_probas_out = {
+        column: 1 / mean_size for column in df.columns
+    }
+    generator.dict_ratios = {
+        column: 1 / len(df.columns) for column in df.columns
+    }
     if generator.groups:
-        mask = df.groupby(groups, group_keys=False).apply(generator.generate_mask)
+        mask = df.groupby(groups, group_keys=False).apply(
+            generator.generate_mask
+        )
     else:
         mask = generator.generate_mask(df)
 
@@ -351,8 +416,10 @@ def get_data_corrupted(
     mean_size: int = 90,
     ratio_masked: float = 0.2,
 ) -> pd.DataFrame:
-    """
-    Returns a dataframe with controled corruption optained from the source `name_data`
+    """Corrupt data.
+
+    Return a dataframe with controlled corruption obtained
+    from the source `name_data`.
 
     Parameters
     ----------
@@ -362,10 +429,12 @@ def get_data_corrupted(
         Mean size of the holes to be generated using a geometric law
     ratio_masked: float
         Percent of missing data in each column in the output dataframe
+
     Returns
     -------
     pd.DataFrame
         Dataframe with missing values
+
     """
     df = get_data(name_data)
     df = add_holes(df, mean_size=mean_size, ratio_masked=ratio_masked)
@@ -373,8 +442,7 @@ def get_data_corrupted(
 
 
 def add_station_features(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Create a station feature in the dataset
+    """Create a station feature in the dataset.
 
     Parameters
     ----------
@@ -385,6 +453,7 @@ def add_station_features(df: pd.DataFrame) -> pd.DataFrame:
     -------
     pd.DataFrame
         dataframe with missing values
+
     """
     df = df.copy()
     stations = df.index.get_level_values("station")
@@ -393,9 +462,10 @@ def add_station_features(df: pd.DataFrame) -> pd.DataFrame:
     return df
 
 
-def add_datetime_features(df: pd.DataFrame, col_time: str = "datetime") -> pd.DataFrame:
-    """
-    Create a seasonal feature in the dataset with a cosine function
+def add_datetime_features(
+    df: pd.DataFrame, col_time: str = "datetime"
+) -> pd.DataFrame:
+    """Create a seasonal feature in the dataset with a cosine function.
 
     Parameters
     ----------
@@ -408,11 +478,14 @@ def add_datetime_features(df: pd.DataFrame, col_time: str = "datetime") -> pd.Da
     -------
     pd.DataFrame
         dataframe with missing values
+
     """
     df = df.copy()
     time = df.index.get_level_values(col_time).to_series()
     days_in_year = time.dt.year.apply(
-        lambda x: 366 if ((x % 4 == 0) and (x % 100 != 0)) or (x % 400 == 0) else 365
+        lambda x: 366
+        if ((x % 4 == 0) and (x % 100 != 0)) or (x % 400 == 0)
+        else 365
     )
     ratio = time.dt.dayofyear.values / days_in_year.values
     df["time_cos"] = np.cos(2 * np.pi * ratio)
@@ -421,13 +494,30 @@ def add_datetime_features(df: pd.DataFrame, col_time: str = "datetime") -> pd.Da
 
 
 def convert_tsf_to_dataframe(
-    full_file_path_and_name,
-    replace_missing_vals_with="NaN",
-    value_column_name="series_value",
+    full_file_path_and_name: str,
+    replace_missing_vals_with: Union[str, float, int] = "NaN",
+    value_column_name: str = "series_value",
 ):
+    """Convert a .tsf file to a dataframe.
+
+    Parameters
+    ----------
+    full_file_path_and_name : str
+        Filename
+    replace_missing_vals_with : Union[str, float, int], optional
+        Replace missing values with, by default "NaN"
+    value_column_name : str, optional
+        Name of the column containing the values, by default "series_value"
+
+    Returns
+    -------
+    _type_
+        _description_
+
+    """
     col_names = []
     col_types = []
-    all_data = {}
+    all_data: Dict[str, List] = {}
     line_count = 0
     found_data_tag = False
     found_data_section = False
@@ -443,21 +533,29 @@ def convert_tsf_to_dataframe(
                         line_content = line.split(" ")
                         if line.startswith("@attribute"):
                             if len(line_content) != 3:
-                                raise Exception("Invalid meta-data specification.")
+                                raise Exception(
+                                    "Invalid meta-data specification."
+                                )
 
                             col_names.append(line_content[1])
                             col_types.append(line_content[2])
                         else:
                             if len(line_content) != 2:
-                                raise Exception("Invalid meta-data specification.")
+                                raise Exception(
+                                    "Invalid meta-data specification."
+                                )
                     else:
                         if len(col_names) == 0:
-                            raise Exception("Attribute section must come before data.")
+                            raise Exception(
+                                "Attribute section must come before data."
+                            )
 
                         found_data_tag = True
                 elif not line.startswith("#"):
                     if len(col_names) == 0:
-                        raise Exception(" Attribute section must come before data.")
+                        raise Exception(
+                            " Attribute section must come before data."
+                        )
                     elif not found_data_tag:
                         raise Exception("Missing @data tag.")
                     else:
@@ -472,25 +570,35 @@ def convert_tsf_to_dataframe(
                         full_info = line.split(":")
 
                         if len(full_info) != (len(col_names) + 1):
-                            raise Exception("Missing attributes/values in series.")
+                            raise Exception(
+                                "Missing attributes/values in series."
+                            )
 
                         series = full_info[len(full_info) - 1]
-                        series = series.split(",")
+                        series = series.split(",")  # type: ignore
 
                         if len(series) == 0:
-                            raise Exception(" Missing values should be indicated with ? symbol")
+                            raise Exception(
+                                " Missing values should be indicated "
+                                "with ? symbol"
+                            )
 
                         numeric_series = []
 
                         for val in series:
                             if val == "?":
-                                numeric_series.append(replace_missing_vals_with)
+                                numeric_series.append(
+                                    replace_missing_vals_with
+                                )
                             else:
-                                numeric_series.append(float(val))
+                                numeric_series.append(float(val))  # type: ignore
 
-                        if numeric_series.count(replace_missing_vals_with) == len(numeric_series):
+                        if numeric_series.count(
+                            replace_missing_vals_with
+                        ) == len(numeric_series):
                             raise Exception(
-                                "At least one numeric value should be there in a series."
+                                "At least one numeric value should be "
+                                "there in a series."
                             )
 
                         all_series.append(pd.Series(numeric_series).array)
@@ -500,9 +608,12 @@ def convert_tsf_to_dataframe(
                             if col_types[i] == "numeric":
                                 att_val = int(full_info[i])
                             elif col_types[i] == "string":
-                                att_val = str(full_info[i])
+                                att_val = str(full_info[i])  # type: ignore
                             elif col_types[i] == "date":
-                                att_val = datetime.strptime(full_info[i], "%Y-%m-%d %H-%M-%S")
+                                att_val = datetime.strptime(
+                                    full_info[i],
+                                    "%Y-%m-%d %H-%M-%S",  # type: ignore
+                                )
                             else:
                                 raise Exception("Invalid attribute type.")
 
diff --git a/qolmat/utils/exceptions.py b/qolmat/utils/exceptions.py
index 513e843b..baddfb38 100644
--- a/qolmat/utils/exceptions.py
+++ b/qolmat/utils/exceptions.py
@@ -1,7 +1,11 @@
+"""Exceptions for qolmat package."""
+
 from typing import Any, List, Tuple, Type
 
 
 class PyTorchExtraNotInstalled(Exception):
+    """Raise when pytorch extra is not installed."""
+
     def __init__(self):
         super().__init__(
             """Please install torch xx.xx.xx
@@ -10,6 +14,8 @@ def __init__(self):
 
 
 class SignalTooShort(Exception):
+    """Raise when the signal is too short."""
+
     def __init__(self, period: int, n_cols: int):
         super().__init__(
             f"""`period` must be smaller than the signals duration.
@@ -18,6 +24,8 @@ def __init__(self, period: int, n_cols: int):
 
 
 class NoMissingValue(Exception):
+    """Raise an error when there is no missing value."""
+
     def __init__(self, subset_without_nans: List[str]):
         super().__init__(
             f"No missing value in the columns {subset_without_nans}! "
@@ -26,47 +34,78 @@ def __init__(self, subset_without_nans: List[str]):
 
 
 class SubsetIsAString(Exception):
+    """Raise an error when the subset is a string."""
+
     def __init__(self, subset: Any):
-        super().__init__(f"Provided subset `{subset}` should be None or a list!")
+        super().__init__(
+            f"Provided subset `{subset}` should be None or a list!"
+        )
 
 
 class NotDimension2(Exception):
+    """Raise an error when the matrix is not of dim 2."""
+
     def __init__(self, shape: Tuple[int, ...]):
-        super().__init__(f"Provided matrix is of shape {shape}, which is not of dimension 2!")
+        super().__init__(
+            f"Provided matrix is of shape {shape}, "
+            "which is not of dimension 2!"
+        )
 
 
 class NotDataFrame(Exception):
+    """Raise an error when the input is not a dataframe."""
+
     def __init__(self, X_type: Type[Any]):
-        super().__init__(f"Input musr be a dataframe, not a {X_type}")
+        super().__init__(f"Input must be a dataframe, not a {X_type}")
 
 
 class NotEnoughSamples(Exception):
+    """Raise an error when there is no not enough samples."""
+
     def __init__(self, max_num_row: int, min_n_rows: int):
         super().__init__(
-            f"Not enough valid patterns found. Largest found pattern has {max_num_row} rows, when "
+            f"Not enough valid patterns found. "
+            f"Largest found pattern has {max_num_row} rows, when "
             f"they should have at least min_n_rows={min_n_rows}."
         )
 
 
 class EstimatorNotDefined(Exception):
+    """Raise an error when the estimator is not defined."""
+
     def __init__(self):
-        super().__init__("The underlying estimator should be defined beforehand!")
+        super().__init__(
+            "The underlying estimator should be defined beforehand!"
+        )
 
 
 class SingleSample(Exception):
+    """Raise an error when there is a single sample."""
+
     def __init__(self):
-        super().__init__("""This imputer cannot be fitted on a single sample!""")
+        super().__init__(
+            """This imputer cannot be fitted on a single sample!"""
+        )
 
 
 class IllConditioned(Exception):
+    """Raise an error when the covariance matrix is ill-conditioned."""
+
     def __init__(self, min_sv: float, min_std: float):
         super().__init__(
-            f"The covariance matrix is ill-conditioned, indicating high-colinearity: the smallest "
-            f"singular value of the data matrix is smaller than the threshold min_std ({min_sv} < "
-            f"{min_std}). Consider removing columns of decreasing the threshold."
+            f"The covariance matrix is ill-conditioned, "
+            "indicating high-colinearity: "
+            "the smallest singular value of the data matrix is smaller "
+            f"than the threshold min_std ({min_sv} < {min_std}). "
+            f"Consider removing columns of decreasing the threshold."
         )
 
 
 class TypeNotHandled(Exception):
+    """Raise an error when the type is not handled."""
+
     def __init__(self, col: str, type_col: str):
-        super().__init__(f"The column `{col}` is of type `{type_col}`, which is not handled!")
+        super().__init__(
+            f"The column `{col}` is of type `{type_col}`, "
+            "which is not handled!"
+        )
diff --git a/qolmat/utils/plot.py b/qolmat/utils/plot.py
index c6700e13..e9809425 100644
--- a/qolmat/utils/plot.py
+++ b/qolmat/utils/plot.py
@@ -1,18 +1,17 @@
-"""
-Useful drawing functions
-"""
+"""Useful drawing functions."""
 
 from __future__ import annotations
-from typing import Dict, List, Any, Optional, Tuple, Union
+
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.ticker as plticker
 import numpy as np
-from numpy.typing import NDArray
 import pandas as pd
 import scipy
 from mpl_toolkits.axes_grid1 import make_axes_locatable
+from numpy.typing import NDArray
 
 plt.rcParams["axes.spines.right"] = False
 plt.rcParams["axes.spines.top"] = False
@@ -23,18 +22,20 @@
 tab10 = plt.get_cmap("tab10")
 
 
-def plot_matrices(list_matrices: List[np.ndarray], title: Optional[str] = None) -> None:
-    """Plot RPCA matrices
+def plot_matrices(
+    list_matrices: List[np.ndarray], title: Optional[str] = None
+) -> None:
+    """Plot RPCA matrices.
 
     Parameters
     ----------
     list_matrices : List[np.ndarray]
-        List containing, in the right order, the observations matrix, the low-rank matrix and the
-        sparse matrix
+        List containing, in the right order, the observations matrix,
+        the low-rank matrix and the sparse matrix
     title : Optional[str], optional
         if present, title of the saved figure, by default None
-    """
 
+    """
     suptitles = ["Observations", "Low-rank", "Sparse"]
 
     fig, ax = plt.subplots(1, 3, figsize=(10, 3))
@@ -62,21 +63,21 @@ def plot_signal(
     ylabel: Optional[str] = None,
     dates: Optional[List] = None,
 ) -> None:
-    """Plot RPCA results for time series
+    """Plot RPCA results for time series.
 
     Parameters
     ----------
     list_signals : List[List]
-        List containing, in the right order, the  observed time series, the cleaned signal and
-        the anomalies
+        List containing, in the right order, the  observed time series,
+        the cleaned signal and the anomalies
     title : Optional[str], optional
         if present, title of the saved figure, by default None
     ylabel : Optional[str], optional
         ylabel, by default None
     dates : Optional[List], optional
         dates of the time series (xlabel), by default None
-    """
 
+    """
     suptitles = ["Observations", "Cleaned", "Anomalies"]
     colors = ["black", "darkblue", "crimson"]
     fontsize = 15
@@ -106,7 +107,7 @@ def plot_images(
     dims: Tuple[int, int],
     filename: Optional[str] = None,
 ) -> None:
-    """Plot multiple images in 3 columns for original, background and "foreground"
+    """Plot multiple images for original, background and "foreground".
 
     Parameters
     ----------
@@ -122,8 +123,8 @@ def plot_images(
         dimensions of the reduction
     filename : Optional[str], optional
         filename for saving figure, by default None
-    """
 
+    """
     f = plt.figure(figsize=(15, 10))
     r = len(index_array)
 
@@ -163,8 +164,7 @@ def make_ellipses(
     n_std: float = 2,
     color: Union[str, Any, Tuple[float, float, float]] = "None",
 ):
-    """
-    Create a plot of the covariance confidence ellipse of *x* and *y*.
+    """Create a plot of the covariance confidence ellipse of *x* and *y*.
 
     Parameters
     ----------
@@ -186,16 +186,21 @@ def make_ellipses(
     Returns
     -------
     matplotlib.patches.Ellipse
-    """
 
+    """
     pearson = cov[0, 1] / np.sqrt(cov[0, 0] * cov[1, 1])
     ell_radius_x = np.sqrt(1 + pearson) * 2.5
     ell_radius_y = np.sqrt(1 - pearson) * 2.5
-    ell = mpl.patches.Ellipse((0, 0), width=ell_radius_x, height=ell_radius_y, facecolor=color)
+    ell = mpl.patches.Ellipse(
+        (0, 0), width=ell_radius_x, height=ell_radius_y, facecolor=color
+    )
     scale_x = np.sqrt(cov[0, 0]) * n_std
     scale_y = np.sqrt(cov[1, 1]) * n_std
     transf = (
-        mpl.transforms.Affine2D().rotate_deg(45).scale(scale_x, scale_y).translate(mean_x, mean_y)
+        mpl.transforms.Affine2D()
+        .rotate_deg(45)
+        .scale(scale_x, scale_y)
+        .translate(mean_x, mean_y)
     )
     ell.set_transform(transf + ax.transData)
     ax.add_patch(ell)
@@ -211,8 +216,7 @@ def make_ellipses_from_data(
     n_std: float = 2,
     color: Union[str, Any, Tuple[float, float, float]] = "None",
 ):
-    """
-    Create a plot of the covariance confidence ellipse of *x* and *y*.
+    """Create a plot of the covariance confidence ellipse of *x* and *y*.
 
     Parameters
     ----------
@@ -231,6 +235,7 @@ def make_ellipses_from_data(
     Returns
     -------
     matplotlib.patches.Ellipse
+
     """
     if x.size != y.size:
         raise ValueError("x and y must be the same size")
@@ -248,10 +253,14 @@ def compare_covariances(
     col_y: str,
     ax: mpl.axes.Axes,
     label: str = "",
-    color: Union[None, str, Tuple[float, float, float], Tuple[float, float, float, float]] = None,
+    color: Union[
+        None,
+        str,
+        Tuple[float, float, float],
+        Tuple[float, float, float, float],
+    ] = None,
 ):
-    """
-    Covariance plot: scatter plot with ellipses
+    """Covariance plot: scatter plot with ellipses.
 
     Parameters
     ----------
@@ -265,12 +274,26 @@ def compare_covariances(
         variable y, column's name of dataframe df2 to compare with
     ax : matplotlib.axes._subplots.AxesSubplot
         matplotlib ax handles
+    label: str
+        label of the plot
+    color: Union[None, str, Tuple[float, float, float],
+        Tuple[float, float, float, float]]
+        color of the ellipse
+
     """
     df1 = df_1.dropna()
     df2 = df_2.dropna()
     if color is None:
         color = tab10(0)
-    ax.scatter(df2[col_x], df2[col_y], marker=".", color=color, s=2, alpha=0.7, label="imputed")
+    ax.scatter(
+        df2[col_x],
+        df2[col_y],
+        marker=".",
+        color=color,
+        s=2,
+        alpha=0.7,
+        label="imputed",
+    )
     ax.scatter(
         df1[col_x],
         df1[col_y],
@@ -293,7 +316,9 @@ def multibar(
     colors: Any = None,
     decimals: float = 0,
 ):
-    """Create a multi-bar graph to represent the values of the different dataframe columns.
+    """Create a multi-bar graph.
+
+    It represents the values of the different dataframe columns.
 
     Parameters
     ----------
@@ -307,8 +332,8 @@ def multibar(
         color in multibar plot, by default None
     decimals : float, optional
         the decimals numbers, by default 0
-    """
 
+    """
     if ax is None:
         ax = plt.gca()
         if colors is None:
@@ -346,8 +371,10 @@ def multibar(
     plt.legend(loc=(1, 0))
 
 
-def plot_imputations(df: pd.DataFrame, dict_df_imputed: Dict[str, pd.DataFrame]):
-    """Plot original and imputed dataframes for each imputers
+def plot_imputations(
+    df: pd.DataFrame, dict_df_imputed: Dict[str, pd.DataFrame]
+):
+    """Plot original and imputed dataframes for each imputers.
 
     Parameters
     ----------
@@ -355,6 +382,7 @@ def plot_imputations(df: pd.DataFrame, dict_df_imputed: Dict[str, pd.DataFrame])
         original dataframe
     dict_df_imputed : Dict[str, pd.DataFrame]
         dictionnary of imputed dataframe for each imputers
+
     """
     n_columns = len(df.columns)
     n_imputers = len(dict_df_imputed)
@@ -369,7 +397,9 @@ def plot_imputations(df: pd.DataFrame, dict_df_imputed: Dict[str, pd.DataFrame])
             plt.plot(values_orig, ".", color="black", label="original")
             values_imp = df_imputed[col].copy()
             values_imp[values_orig.notna()] = np.nan
-            plt.plot(values_imp, ".", color=tab10(0), label=name_imputer, alpha=1)
+            plt.plot(
+                values_imp, ".", color=tab10(0), label=name_imputer, alpha=1
+            )
             plt.ylabel(col, fontsize=16)
             if i_plot % n_columns == 0:
                 plt.legend(loc=[1, 0], fontsize=18)
diff --git a/qolmat/utils/utils.py b/qolmat/utils/utils.py
index ce8f7865..3f445e7f 100644
--- a/qolmat/utils/utils.py
+++ b/qolmat/utils/utils.py
@@ -1,23 +1,24 @@
-from typing import List, Optional, Tuple, Union
-import warnings
+"""Utils for qolmat package."""
+
+from typing import List, Tuple, Union
 
 import numpy as np
 import pandas as pd
-
 from numpy.typing import NDArray
 from sklearn.base import check_array
 
-from qolmat.utils.exceptions import NotDimension2, SignalTooShort
+from qolmat.utils.exceptions import NotDimension2
 
 HyperValue = Union[int, float, str]
 
 
 def _get_numerical_features(df1: pd.DataFrame) -> List[str]:
-    """Get numerical features from dataframe
+    """Get numerical features from dataframe.
 
     Parameters
     ----------
     df1 : pd.DataFrame
+        Input dataframe.
 
     Returns
     -------
@@ -28,6 +29,7 @@ def _get_numerical_features(df1: pd.DataFrame) -> List[str]:
     ------
     Exception
         No numerical feature is found
+
     """
     cols_numerical = df1.select_dtypes(include=np.number).columns.tolist()
     if len(cols_numerical) == 0:
@@ -37,11 +39,12 @@ def _get_numerical_features(df1: pd.DataFrame) -> List[str]:
 
 
 def _get_categorical_features(df1: pd.DataFrame) -> List[str]:
-    """Get categorical features from dataframe
+    """Get categorical features from dataframe.
 
     Parameters
     ----------
     df1 : pd.DataFrame
+        Input dataframe.
 
     Returns
     -------
@@ -52,10 +55,12 @@ def _get_categorical_features(df1: pd.DataFrame) -> List[str]:
     ------
     Exception
         No categorical feature is found
-    """
 
+    """
     cols_numerical = df1.select_dtypes(include=np.number).columns.tolist()
-    cols_categorical = [col for col in df1.columns.to_list() if col not in cols_numerical]
+    cols_categorical = [
+        col for col in df1.columns.to_list() if col not in cols_numerical
+    ]
     if len(cols_categorical) == 0:
         raise Exception("No categorical feature is found.")
     else:
@@ -63,9 +68,10 @@ def _get_categorical_features(df1: pd.DataFrame) -> List[str]:
 
 
 def _validate_input(X: NDArray) -> pd.DataFrame:
-    """
-    Checks that the input X can be converted into a DataFrame, and returns the corresponding
-    dataframe.
+    """Calidate the input array.
+
+    Checks that the input X can be converted into a DataFrame,
+    and returns the corresponding dataframe.
 
     Parameters
     ----------
@@ -75,8 +81,9 @@ def _validate_input(X: NDArray) -> pd.DataFrame:
     Returns
     -------
     pd.DataFrame
-        Formatted dataframe, if the input had no column names then the dataframe columns are
-        integers
+        Formatted dataframe, if the input had no column names
+        then the dataframe columns are integers
+
     """
     check_array(X, force_all_finite="allow-nan", dtype=None)
     if not isinstance(X, pd.DataFrame):
@@ -85,7 +92,7 @@ def _validate_input(X: NDArray) -> pd.DataFrame:
             raise ValueError
         if len(X_np.shape) == 1:
             X_np = X_np.reshape(-1, 1)
-        df = pd.DataFrame(X_np, columns=[i for i in range(X_np.shape[1])])
+        df = pd.DataFrame(X_np, columns=list(range(X_np.shape[1])))
         df = df.infer_objects()
     else:
         df = X
@@ -103,7 +110,7 @@ def progress_bar(
     length: int = 100,
     fill: str = "█",
 ):
-    """Call in a loop to create terminal progress bar
+    """Call in a loop to create terminal progress bar.
 
     Parameters
     ----------
@@ -121,8 +128,11 @@ def progress_bar(
         character length of bar, by default 100
     fill : str
         bar fill character, by default "█"
+
     """
-    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
+    percent = ("{0:." + str(decimals) + "f}").format(
+        100 * (iteration / float(total))
+    )
     filled_length = int(length * iteration // total)
     bar = fill * filled_length + "-" * (length - filled_length)
     print(f"\r{prefix} |{bar}| {percent}% {suffix}", end="\r")
@@ -131,7 +141,7 @@ def progress_bar(
 
 
 def acf(values: pd.Series, lag_max: int = 30) -> pd.Series:
-    """Correlation series of dataseries
+    """Correlation series of dataseries.
 
     Parameters
     ----------
@@ -144,6 +154,7 @@ def acf(values: pd.Series, lag_max: int = 30) -> pd.Series:
     -------
     pd.Series
         correlation series of value
+
     """
     acf = pd.Series(0, index=range(lag_max))
     for lag in range(lag_max):
@@ -152,8 +163,7 @@ def acf(values: pd.Series, lag_max: int = 30) -> pd.Series:
 
 
 def impute_nans(M: NDArray, method: str = "zeros") -> NDArray:
-    """
-    Impute the M's nan with the specified method
+    """Impute the M's nan with the specified method.
 
     Parameters
     ----------
@@ -166,6 +176,7 @@ def impute_nans(M: NDArray, method: str = "zeros") -> NDArray:
     -------
     NDArray
         Imputed Array
+
     Raises
     ------
         ValueError
@@ -180,9 +191,13 @@ def impute_nans(M: NDArray, method: str = "zeros") -> NDArray:
         isna = np.isnan(values)
         nna = np.sum(isna)
         if method == "mean":
-            value_imputation = np.nanmean(M) if nna == n_rows else np.nanmean(values)
+            value_imputation = (
+                np.nanmean(M) if nna == n_rows else np.nanmean(values)
+            )
         elif method == "median":
-            value_imputation = np.nanmedian(M) if nna == n_rows else np.nanmedian(values)
+            value_imputation = (
+                np.nanmedian(M) if nna == n_rows else np.nanmedian(values)
+            )
         elif method == "zeros":
             value_imputation = 0
         else:
@@ -193,8 +208,7 @@ def impute_nans(M: NDArray, method: str = "zeros") -> NDArray:
 
 
 def linear_interpolation(X: NDArray) -> NDArray:
-    """
-    Impute missing data with a linear interpolation, column-wise
+    """Impute missing data with a linear interpolation, column-wise.
 
     Parameters
     ----------
@@ -205,6 +219,7 @@ def linear_interpolation(X: NDArray) -> NDArray:
     -------
     X_interpolated : NDArray
         imputed array, by linear interpolation
+
     """
     n_rows, n_cols = X.shape
     indices = np.arange(n_rows)
@@ -224,12 +239,12 @@ def linear_interpolation(X: NDArray) -> NDArray:
 
 
 def fold_signal(X: NDArray, period: int) -> NDArray:
-    """
-    Reshape a time series into a 2D-array
+    """Reshape a time series into a 2D-array.
 
     Parameters
     ----------
     X : NDArray
+        Input array to be reshaped.
     period : int
         Period used to fold the signal of the 2D-array
 
@@ -242,6 +257,7 @@ def fold_signal(X: NDArray, period: int) -> NDArray:
     ------
     ValueError
         if X is not a 1D array
+
     """
     if len(X.shape) != 2:
         raise NotDimension2(X.shape)
@@ -257,8 +273,20 @@ def fold_signal(X: NDArray, period: int) -> NDArray:
 
 
 def prepare_data(X: NDArray, period: int = 1) -> NDArray:
-    """
-    Transform signal to 2D-array in case of 1D-array.
+    """Reshape a time series into a 2D-array.
+
+    Parameters
+    ----------
+    X : NDArray
+        Input array to be reshaped.
+    period : int, optional
+        Period used to fold the signal. Defaults to 1.
+
+    Returns
+    -------
+    NDArray
+        Reshaped array.
+
     """
     if len(X.shape) == 1:
         X = X.reshape(-1, 1)
@@ -267,27 +295,43 @@ def prepare_data(X: NDArray, period: int = 1) -> NDArray:
     return X_fold
 
 
-def get_shape_original(M: NDArray, shape: tuple) -> NDArray:
+def get_shape_original(M: NDArray, shape: Tuple[int, int]) -> NDArray:
     """Shapes an output matrix from the RPCA algorithm into the original shape.
 
     Parameters
     ----------
     M : NDArray
         Matrix to reshape
-    X : NDArray
-        Matrix of the desired shape
+    shape : Tuple[int, int]
+        Desired shape
 
     Returns
     -------
     NDArray
         Reshaped matrix
+
     """
-    size = np.prod(shape)
+    size: int = int(np.prod(shape))
     M_flat = M.flatten()[:size]
     return M_flat.reshape(shape)
 
 
 def create_lag_matrices(X: NDArray, p: int) -> Tuple[NDArray, NDArray]:
+    """Create lag matrices for the VAR(p).
+
+    Parameters
+    ----------
+    X : NDArray
+        Input matrix
+    p : int
+        Number of lags
+
+    Returns
+    -------
+    Tuple[NDArray, NDArray]
+        Z and Y
+
+    """
     n_rows, _ = X.shape
     n_rows_new = n_rows - p
     list_X_lag = [np.ones((n_rows_new, 1))]
@@ -301,8 +345,184 @@ def create_lag_matrices(X: NDArray, p: int) -> Tuple[NDArray, NDArray]:
 
 
 def nan_mean_cov(X: NDArray) -> Tuple[NDArray, NDArray]:
+    """Compute mean and covariance matrix.
+
+    Parameters
+    ----------
+    X : NDArray
+        Input matrix
+
+    Returns
+    -------
+    Tuple[NDArray, NDArray]
+        Means and covariance matrix
+
+    """
     _, n_variables = X.shape
     means = np.nanmean(X, axis=0)
     cov = np.ma.cov(np.ma.masked_invalid(X), rowvar=False).data
     cov = cov.reshape(n_variables, n_variables)
     return means, cov
+
+
+def moy_p(V, weights):
+    """Compute the weighted mean of a vector, ignoring NaNs.
+
+    Parameters
+    ----------
+    V : array-like
+        Input vector with possible NaN values.
+    weights : array-like
+        Weights corresponding to each element in V.
+
+    Returns
+    -------
+    float
+        Weighted mean of non-NaN elements.
+
+    """
+    mask = ~np.isnan(V)
+    total_weight = np.sum(weights[mask])
+    if total_weight == 0:
+        return 0.0  # or use np.finfo(float).eps for a small positive value
+    return np.sum(V[mask] * weights[mask]) / total_weight
+
+
+def tab_disjonctif_NA(df):
+    """Create a disjunctive (one-hot encoded).
+
+    Parameters
+    ----------
+    df : DataFrame
+        Input DataFrame with categorical and numeric variables.
+
+    Returns
+    -------
+    DataFrame
+        Disjunctive table with one-hot encoding.
+
+    """  # noqa: E501
+    df_encoded_list = []
+    for col in df.columns:
+        if df[col].dtype.name == "category" or df[col].dtype == object:
+            df[col] = df[col].astype("category")
+            # Include '__MISSING__' as a category if not already present
+            if "__MISSING__" not in df[col].cat.categories:
+                df[col] = df[col].cat.add_categories(["__MISSING__"])
+            # Fill missing values with '__MISSING__'
+            df[col] = df[col].fillna("__MISSING__")
+            # One-hot encode the categorical variable
+            encoded = pd.get_dummies(
+                df[col],
+                prefix=col,
+                prefix_sep="_",
+                dummy_na=False,
+                dtype=float,
+            )
+            df_encoded_list.append(encoded)
+        else:
+            # Numeric column; keep as is
+            df_encoded_list.append(df[[col]])
+    # Concatenate all encoded columns
+    df_encoded = pd.concat(df_encoded_list, axis=1)
+    return df_encoded
+
+
+def tab_disjonctif_prop(df, seed=None):
+    """Perform probabilistic imputation for categorical columns using observed
+    value distributions, without creating a separate missing category.
+
+    Parameters
+    ----------
+    df : DataFrame
+        DataFrame with categorical columns to impute.
+    seed : int, optional
+        Random seed for reproducibility. Default is None.
+
+    Returns
+    -------
+    DataFrame
+        Disjunctive coded DataFrame with missing values probabilistically
+        imputed.
+
+    """  # noqa: D205
+    if seed is not None:
+        np.random.seed(seed)
+    df = df.copy()
+    df_encoded_list = []
+    for col in df.columns:
+        if df[col].dtype.name == "category" or df[col].dtype == object:
+            # Ensure categories are strings
+            df[col] = df[col].cat.rename_categories(
+                df[col].cat.categories.astype(str)
+            )
+            observed = df[col][df[col].notna()]
+            categories = df[col].cat.categories.tolist()
+            # Get observed frequencies
+            freqs = observed.value_counts(normalize=True)
+            # Impute missing values based on observed frequencies
+            missing_indices = df[col][df[col].isna()].index
+            if len(missing_indices) > 0:
+                imputed_values = np.random.choice(
+                    freqs.index, size=len(missing_indices), p=freqs.values
+                )
+                df.loc[missing_indices, col] = imputed_values
+            # One-hot encode without creating missing category
+            encoded = pd.get_dummies(
+                df[col],
+                prefix=col,
+                prefix_sep="_",
+                dummy_na=False,
+                dtype=float,
+            )
+            col_names = [f"{col}_{cat}" for cat in categories]
+            encoded = encoded.reindex(columns=col_names, fill_value=0.0)
+            df_encoded_list.append(encoded)
+        else:
+            df_encoded_list.append(df[[col]])
+    df_encoded = pd.concat(df_encoded_list, axis=1)
+    return df_encoded
+
+
+def find_category(df_original, tab_disj):
+    """Reconstruct the original categorical variables from the disjunctive.
+
+    Parameters
+    ----------
+    df_original : DataFrame
+        Original DataFrame with categorical variables.
+    tab_disj : DataFrame
+        Disjunctive table after imputation.
+
+    Returns
+    -------
+    DataFrame
+        Reconstructed DataFrame with imputed categorical variables.
+
+    """
+    df_reconstructed = df_original.copy()
+    start_idx = 0
+    for col in df_original.columns:
+        if (
+            df_original[col].dtype.name == "category"
+            or df_original[col].dtype == object
+        ):  # noqa: E501
+            categories = df_original[col].cat.categories.tolist()
+            if "__MISSING__" in categories:
+                missing_cat_index = categories.index("__MISSING__")
+            else:
+                missing_cat_index = None
+            num_categories = len(categories)
+            sub_tab = tab_disj.iloc[:, start_idx : start_idx + num_categories]
+            if missing_cat_index is not None:
+                sub_tab.iloc[:, missing_cat_index] = -np.inf
+            # Find the category with the maximum value for each row
+            max_indices = sub_tab.values.argmax(axis=1)
+            df_reconstructed[col] = [categories[idx] for idx in max_indices]
+            # Replace '__MISSING__' back to NaN
+            df_reconstructed[col].replace("__MISSING__", np.nan, inplace=True)
+            start_idx += num_categories
+        else:
+            # For numeric variables, keep as is
+            start_idx += 1  # Increment start_idx by 1 for numeric columns
+    return df_reconstructed
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 2cf67a1c..00000000
--- a/setup.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import codecs
-
-from setuptools import find_packages, setup
-
-DISTNAME = "qolmat"
-VERSION = "0.1.8"
-DESCRIPTION = "A Python library for optimal data imputation."
-LONG_DESCRIPTION_CONTENT_TYPE = "text/x-rst"
-with codecs.open("README.rst", encoding="utf-8-sig") as f:
-    LONG_DESCRIPTION = f.read()
-
-
-LICENSE = "new BSD"
-MAINTAINER = "Julien ROUSSEL, Anh Khoa NGO HO, Charles-Henri PRAT, Guillaume SAËS"
-MAINTAINER_EMAIL = (
-    "jroussel@quantmetry.com, "
-    "akngoho@quantmetry.com, "
-    "chprat@quantmetry.com, "
-    "gsaes@quantmetry.com, "
-    "mabidi.quantmetry.com"
-)
-URL = "https://github.com/Quantmetry/qolmat"
-DOWNLOAD_URL = "https://pypi.org/project/qolmat/#files"
-PROJECT_URLS = {
-    "Bug Tracker": "https://github.com/Quantmetry/qolmat",
-    "Documentation": "https://qolmat.readthedocs.io/en/latest/",
-    "Source Code": "https://github.com/Quantmetry/qolmat",
-}
-
-PYTHON_REQUIRES = ">=3.8"
-PACKAGES = find_packages()
-INSTALL_REQUIRES = [
-    "category_encoders",
-    "dcor>=0.6",
-    "hyperopt",
-    "numpy>=1.21",
-    "packaging",
-    "pandas>=1.3",
-    "scikit-learn",
-    "scipy",
-    "statsmodels>=0.14",
-    "typing-extensions",
-]
-EXTRAS_REQUIRE = {
-    "tests": ["flake8", "mypy", "pandas", "pytest", "pytest-cov", "typed-ast"],
-    "docs": [
-        "numpydoc",
-        "sphinx",
-        "sphinx-gallery",
-        "sphinx_rtd_theme",
-    ],
-    "pytorch": [
-        "torch==2.0.1",
-    ],
-}
-
-CLASSIFIERS = [
-    "Intended Audience :: Science/Research",
-    "Intended Audience :: Developers",
-    "License :: OSI Approved",
-    "Topic :: Software Development",
-    "Topic :: Scientific/Engineering",
-    "Operating System :: Microsoft :: Windows",
-    "Operating System :: POSIX",
-    "Operating System :: Unix",
-    "Operating System :: MacOS",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-]
-
-PACKAGE_DATA = {"qolmat": ["data/beijing.csv", "data/conductors.csv"]}
-
-setup(
-    name=DISTNAME,
-    version=VERSION,
-    license=LICENSE,
-    description=DESCRIPTION,
-    long_description=LONG_DESCRIPTION,
-    long_description_content_type=LONG_DESCRIPTION_CONTENT_TYPE,
-    url=URL,
-    download_url=DOWNLOAD_URL,
-    project_urls=PROJECT_URLS,
-    packages=PACKAGES,
-    python_requires=PYTHON_REQUIRES,
-    install_requires=INSTALL_REQUIRES,
-    extras_require=EXTRAS_REQUIRE,
-    classifiers=CLASSIFIERS,
-    zip_safe=False,
-    package_data=PACKAGE_DATA,
-)
diff --git a/tests/analysis/test_holes_characterization.py b/tests/analysis/test_holes_characterization.py
index c794b94e..a77ecbb1 100644
--- a/tests/analysis/test_holes_characterization.py
+++ b/tests/analysis/test_holes_characterization.py
@@ -11,7 +11,9 @@
 @pytest.fixture
 def mcar_df() -> pd.DataFrame:
     rng = np.random.default_rng(42)
-    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
+    matrix = rng.multivariate_normal(
+        mean=[0, 0], cov=[[1, 0], [0, 1]], size=200
+    )
     df = pd.DataFrame(data=matrix, columns=["Column_1", "Column_2"])
     hole_gen = UniformHoleGenerator(
         n_splits=1, random_state=42, subset=["Column_2"], ratio_masked=0.2
@@ -23,7 +25,9 @@ def mcar_df() -> pd.DataFrame:
 @pytest.fixture
 def mar_hm_df() -> pd.DataFrame:
     rng = np.random.default_rng(42)
-    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
+    matrix = rng.multivariate_normal(
+        mean=[0, 0], cov=[[1, 0], [0, 1]], size=200
+    )
 
     quantile_95 = norm.ppf(0.975)
     df = pd.DataFrame(matrix, columns=["Column_1", "Column_2"])
@@ -37,7 +41,9 @@ def mar_hm_df() -> pd.DataFrame:
 @pytest.fixture
 def mar_hc_df() -> pd.DataFrame:
     rng = np.random.default_rng(42)
-    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
+    matrix = rng.multivariate_normal(
+        mean=[0, 0], cov=[[1, 0], [0, 1]], size=200
+    )
 
     quantile_95 = norm.ppf(0.975)
     df = pd.DataFrame(matrix, columns=["Column_1", "Column_2"])
@@ -49,7 +55,8 @@ def mar_hc_df() -> pd.DataFrame:
 
 
 @pytest.mark.parametrize(
-    "df_input, expected", [("mcar_df", True), ("mar_hm_df", False), ("mar_hc_df", True)]
+    "df_input, expected",
+    [("mcar_df", True), ("mar_hm_df", False), ("mar_hc_df", True)],
 )
 def test_little_mcar_test(df_input: pd.DataFrame, expected: bool, request):
     mcar_test_little = LittleTest(random_state=42)
diff --git a/tests/benchmark/test_comparator.py b/tests/benchmark/test_comparator.py
index bddb29a3..02971bbb 100644
--- a/tests/benchmark/test_comparator.py
+++ b/tests/benchmark/test_comparator.py
@@ -1,8 +1,8 @@
-import pytest
+from unittest.mock import MagicMock, patch
+
 import numpy as np
 import pandas as pd
 
-from unittest.mock import patch, MagicMock
 from qolmat.benchmark.comparator import Comparator
 
 generator_holes_mock = MagicMock()
@@ -20,7 +20,9 @@
 imputer_mock = MagicMock()
 expected_get_errors = pd.Series(
     [1.0, 1.0, 1.0, 1.0],
-    index=pd.MultiIndex.from_tuples([("mae", "A"), ("mae", "B"), ("mse", "A"), ("mse", "B")]),
+    index=pd.MultiIndex.from_tuples(
+        [("mae", "A"), ("mae", "B"), ("mse", "A"), ("mse", "B")]
+    ),
 )
 
 
@@ -28,10 +30,14 @@
 def test_get_errors(mock_get_metric):
     df_origin = pd.DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]})
     df_imputed = pd.DataFrame({"A": [1, 2, 4], "B": [4, 5, 7]})
-    df_mask = pd.DataFrame({"A": [False, False, True], "B": [False, False, True]})
+    df_mask = pd.DataFrame(
+        {"A": [False, False, True], "B": [False, False, True]}
+    )
 
-    mock_get_metric.return_value = lambda df_origin, df_imputed, df_mask: pd.Series(
-        [1.0, 1.0], index=["A", "B"]
+    mock_get_metric.return_value = (
+        lambda df_origin, df_imputed, df_mask: pd.Series(
+            [1.0, 1.0], index=["A", "B"]
+        )
     )
     errors = comparator.get_errors(df_origin, df_imputed, df_mask)
     pd.testing.assert_series_equal(errors, expected_get_errors)
@@ -65,7 +71,10 @@ def test_compare(mock_evaluate_errors_sample):
 
     errors_imputer1 = pd.Series([0.1, 0.2], index=["mae", "mse"])
     errors_imputer2 = pd.Series([0.3, 0.4], index=["mae", "mse"])
-    mock_evaluate_errors_sample.side_effect = [errors_imputer1, errors_imputer2]
+    mock_evaluate_errors_sample.side_effect = [
+        errors_imputer1,
+        errors_imputer2,
+    ]
 
     df_errors = comparator.compare(df_test)
     assert mock_evaluate_errors_sample.call_count == 2
diff --git a/tests/benchmark/test_hyperparameters.py b/tests/benchmark/test_hyperparameters.py
index 5c6ff85a..cf5b567d 100644
--- a/tests/benchmark/test_hyperparameters.py
+++ b/tests/benchmark/test_hyperparameters.py
@@ -1,20 +1,24 @@
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
+import hyperopt as ho
 import numpy as np
 import pandas as pd
-import pytest
 
 from qolmat.benchmark import hyperparameters
-from qolmat.benchmark.hyperparameters import HyperValue
 
 # from hyperparameters import HyperValue
-from qolmat.benchmark.missing_patterns import _HoleGenerator, EmpiricalHoleGenerator
-from qolmat.imputations.imputers import _Imputer, ImputerRpcaNoisy
-
-import hyperopt as ho
+from qolmat.benchmark.missing_patterns import (
+    EmpiricalHoleGenerator,
+    _HoleGenerator,
+)
+from qolmat.imputations.imputers import ImputerRpcaNoisy, _Imputer
 
-df_origin = pd.DataFrame({"col1": [0, np.nan, 2, 4, np.nan], "col2": [-1, np.nan, 0.5, 1, 1.5]})
-df_imputed = pd.DataFrame({"col1": [0, 1, 2, 3.5, 4], "col2": [-1.5, 0, 1.5, 2, 1.5]})
+df_origin = pd.DataFrame(
+    {"col1": [0, np.nan, 2, 4, np.nan], "col2": [-1, np.nan, 0.5, 1, 1.5]}
+)
+df_imputed = pd.DataFrame(
+    {"col1": [0, 1, 2, 3.5, 4], "col2": [-1.5, 0, 1.5, 2, 1.5]}
+)
 df_mask = pd.DataFrame(
     {
         "col1": [False, False, True, False, False],
@@ -24,7 +28,9 @@
 df_corrupted = df_origin.copy()
 df_corrupted[df_mask] = np.nan
 
-imputer_rpca = ImputerRpcaNoisy(tau=2, random_state=42, columnwise=True, period=1)
+imputer_rpca = ImputerRpcaNoisy(
+    tau=2, random_state=42, columnwise=True, period=1
+)
 dict_imputers_rpca = {"rpca": imputer_rpca}
 generator_holes = EmpiricalHoleGenerator(n_splits=1, ratio_masked=0.5)
 dict_config_opti = {
@@ -41,27 +47,38 @@
 
 
 class ImputerTest(_Imputer):
+    """Group tests for Imputer."""
+
     def __init__(
         self,
         groups: Tuple[str, ...] = (),
         random_state: Union[None, int, np.random.RandomState] = None,
         value: float = 0,
     ) -> None:
-        super().__init__(groups=groups, columnwise=True, random_state=random_state)
+        """Init function."""
+        super().__init__(
+            groups=groups, columnwise=True, random_state=random_state
+        )
         self.value = value
 
-    def _transform_element(self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0):
+    def _transform_element(
+        self, df: pd.DataFrame, col: str = "__all__", ngroup: int = 0
+    ):
         df_out = df.copy()
         df_out = df_out.fillna(self.value)
         return df_out
 
 
 class HoleGeneratorTest(_HoleGenerator):
+    """Group tests for HoleGenerator."""
+
     def __init__(self, mask: pd.Series, subset: Optional[List[str]] = None):
+        """Init HoleGenerator."""
         super().__init__(n_splits=1, subset=subset)
         self.mask = mask
 
     def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
+        """Generate mask."""
         df_out = X.copy()
         for col in df_out:
             df_out[col] = self.mask
@@ -69,19 +86,27 @@ def generate_mask(self, X: pd.DataFrame) -> pd.DataFrame:
 
 
 def test_hyperparameters_get_objective() -> None:
+    """Test get_objective."""
     imputer = ImputerTest()
-    generator = HoleGeneratorTest(pd.Series([False, False, True, True]), subset=["some_col"])
+    generator = HoleGeneratorTest(
+        pd.Series([False, False, True, True]), subset=["some_col"]
+    )
     metric = "mse"
     names_hyperparams = ["value"]
     df = pd.DataFrame({"some_col": [np.nan, 0, 3, 5]})
-    fun_obj = hyperparameters.get_objective(imputer, df, generator, metric, names_hyperparams)
+    fun_obj = hyperparameters.get_objective(
+        imputer, df, generator, metric, names_hyperparams
+    )
     assert fun_obj([4]) == 1
     assert fun_obj([0]) == (3**2 + 5**2) / 2
 
 
 def test_hyperparameters_optimize():
+    """Test optimize."""
     imputer = ImputerTest()
-    generator = HoleGeneratorTest(pd.Series([False, False, True, True]), subset=["some_col"])
+    generator = HoleGeneratorTest(
+        pd.Series([False, False, True, True]), subset=["some_col"]
+    )
     metric = "mse"
     dict_config_opti = {"value": ho.hp.uniform("value", 0, 10)}
     df = pd.DataFrame({"some_col": [np.nan, 0, 3, 5]})
diff --git a/tests/benchmark/test_metrics.py b/tests/benchmark/test_metrics.py
index 0c768054..26fa0f7c 100644
--- a/tests/benchmark/test_metrics.py
+++ b/tests/benchmark/test_metrics.py
@@ -2,12 +2,11 @@
 # # Evaluation metrics #
 # ######################
 
-from math import exp
 import numpy as np
-from numpy import random as npr
 import pandas as pd
 import pytest
 import scipy
+from numpy import random as npr
 
 from qolmat.benchmark import metrics
 from qolmat.utils.exceptions import NotEnoughSamples
@@ -16,9 +15,13 @@
     {"col1": [0, np.nan, 2, 3, np.nan], "col2": [-1, np.nan, 0.5, 1, 1.5]}
 )
 
-df_complete = pd.DataFrame({"col1": [0, 2, 2, 3, 4], "col2": [-1, -2, 0.5, 1, 1.5]})
+df_complete = pd.DataFrame(
+    {"col1": [0, 2, 2, 3, 4], "col2": [-1, -2, 0.5, 1, 1.5]}
+)
 
-df_imputed = pd.DataFrame({"col1": [0, 1, 2, 3.5, 4], "col2": [-1.5, 0, 1.5, 2, 1.5]})
+df_imputed = pd.DataFrame(
+    {"col1": [0, 1, 2, 3.5, 4], "col2": [-1.5, 0, 1.5, 2, 1.5]}
+)
 
 df_mask = pd.DataFrame(
     {
@@ -31,7 +34,9 @@
 @pytest.mark.parametrize("df1", [df_incomplete])
 @pytest.mark.parametrize("df2", [df_imputed])
 @pytest.mark.parametrize("df_mask", [df_mask])
-def test_mean_squared_error(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> None:
+def test_mean_squared_error(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> None:
     assert metrics.mean_squared_error(df1, df1, df_mask).equals(
         pd.Series([0.0, 0.0], index=["col1", "col2"])
     )
@@ -59,7 +64,9 @@ def test_root_mean_squared_error(
 @pytest.mark.parametrize("df1", [df_incomplete])
 @pytest.mark.parametrize("df2", [df_imputed])
 @pytest.mark.parametrize("df_mask", [df_mask])
-def test_mean_absolute_error(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> None:
+def test_mean_absolute_error(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> None:
     assert metrics.mean_absolute_error(df1, df1, df_mask).equals(
         pd.Series([0.0, 0.0], index=["col1", "col2"])
     )
@@ -90,9 +97,9 @@ def test_mean_absolute_percentage_error(
 def test_weighted_mean_absolute_percentage_error(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> None:
-    assert metrics.weighted_mean_absolute_percentage_error(df1, df1, df_mask).equals(
-        pd.Series([0.0, 0.0], index=["col1", "col2"])
-    )
+    assert metrics.weighted_mean_absolute_percentage_error(
+        df1, df1, df_mask
+    ).equals(pd.Series([0.0, 0.0], index=["col1", "col2"]))
     result = metrics.weighted_mean_absolute_percentage_error(df1, df2, df_mask)
     expected = pd.Series([0.1, 1.0], index=["col1", "col2"])
     np.testing.assert_allclose(result, expected, atol=1e-3)
@@ -101,7 +108,9 @@ def test_weighted_mean_absolute_percentage_error(
 @pytest.mark.parametrize("df1", [df_incomplete])
 @pytest.mark.parametrize("df2", [df_imputed])
 @pytest.mark.parametrize("df_mask", [df_mask])
-def test_accuracy(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> None:
+def test_accuracy(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> None:
     result = metrics.accuracy(df1, df1, df_mask)
     expected = pd.Series([1.0, 1.0], index=["col1", "col2"])
     pd.testing.assert_series_equal(result, expected)
@@ -113,17 +122,23 @@ def test_accuracy(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -
 @pytest.mark.parametrize("df1", [df_incomplete])
 @pytest.mark.parametrize("df2", [df_imputed])
 @pytest.mark.parametrize("df_mask", [df_mask])
-def test_wasserstein_distance(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> None:
+def test_wasserstein_distance(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> None:
     dist = metrics.dist_wasserstein(df1, df1, df_mask, method="columnwise")
     assert dist.equals(pd.Series([0.0, 0.0], index=["col1", "col2"]))
     dist = metrics.dist_wasserstein(df1, df2, df_mask, method="columnwise")
-    assert dist.round(3).equals(pd.Series([0.250, 0.833], index=["col1", "col2"]))
+    assert dist.round(3).equals(
+        pd.Series([0.250, 0.833], index=["col1", "col2"])
+    )
 
 
 @pytest.mark.parametrize("df1", [df_incomplete])
 @pytest.mark.parametrize("df2", [df_imputed])
 @pytest.mark.parametrize("df_mask", [df_mask])
-def test_kl_divergence(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> None:
+def test_kl_divergence(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> None:
     result = metrics.kl_divergence(df1, df1, df_mask, method="columnwise")
     expected = pd.Series([0.0, 0.0], index=["col1", "col2"])
     pd.testing.assert_series_equal(result, expected, atol=1e-3)
@@ -133,7 +148,9 @@ def test_kl_divergence(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFra
     pd.testing.assert_series_equal(result, expected, atol=1e-3)
 
     df_nonan = df1.notna()
-    result = metrics.kl_divergence(df1, df2, df_nonan, method="gaussian", min_n_rows=2)
+    result = metrics.kl_divergence(
+        df1, df2, df_nonan, method="gaussian", min_n_rows=2
+    )
     expected = pd.Series([1.029], index=["All"])
     pd.testing.assert_series_equal(result, expected, atol=1e-3)
 
@@ -190,26 +207,38 @@ def test_sum_pairwise_distances(
 @pytest.mark.parametrize("df1", [df_incomplete])
 @pytest.mark.parametrize("df2", [df_imputed])
 @pytest.mark.parametrize("df_mask", [df_mask])
-def test_sum_energy_distances(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame) -> None:
+def test_sum_energy_distances(
+    df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
+) -> None:
     sum_distances_df1 = np.sum(
         scipy.spatial.distance.cdist(
-            df1[df_mask].fillna(0.0), df1[df_mask].fillna(0.0), metric="cityblock"
+            df1[df_mask].fillna(0.0),
+            df1[df_mask].fillna(0.0),
+            metric="cityblock",
         )
     )
     sum_distances_df2 = np.sum(
         scipy.spatial.distance.cdist(
-            df2[df_mask].fillna(0.0), df2[df_mask].fillna(0.0), metric="cityblock"
+            df2[df_mask].fillna(0.0),
+            df2[df_mask].fillna(0.0),
+            metric="cityblock",
         )
     )
     sum_distances_df1_df2 = np.sum(
         scipy.spatial.distance.cdist(
-            df1[df_mask].fillna(0.0), df2[df_mask].fillna(0.0), metric="cityblock"
+            df1[df_mask].fillna(0.0),
+            df2[df_mask].fillna(0.0),
+            metric="cityblock",
         )
     )
-    energy_distance_scipy = 2 * sum_distances_df1_df2 - sum_distances_df1 - sum_distances_df2
+    energy_distance_scipy = (
+        2 * sum_distances_df1_df2 - sum_distances_df1 - sum_distances_df2
+    )
     energy_distance_qolmat = metrics.sum_energy_distances(df1, df2, df_mask)
 
-    assert energy_distance_qolmat.equals(pd.Series(energy_distance_scipy, index=["All"]))
+    assert energy_distance_qolmat.equals(
+        pd.Series(energy_distance_scipy, index=["All"])
+    )
 
 
 @pytest.mark.parametrize("df1", [df_incomplete])
@@ -218,20 +247,23 @@ def test_sum_energy_distances(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.
 def test_mean_difference_correlation_matrix_numerical_features(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> None:
-    assert metrics.mean_difference_correlation_matrix_numerical_features(df1, df1, df_mask).equals(
-        pd.Series([0.0, 0.0], index=["col1", "col2"])
-    )
+    assert metrics.mean_difference_correlation_matrix_numerical_features(
+        df1, df1, df_mask
+    ).equals(pd.Series([0.0, 0.0], index=["col1", "col2"]))
     assert metrics.mean_difference_correlation_matrix_numerical_features(
         df1, df1, df_mask, False
     ).equals(pd.Series([0.0, 0.0], index=["col1", "col2"]))
 
-    assert metrics.mean_difference_correlation_matrix_numerical_features(df1, df2, df_mask).equals(
-        pd.Series([0.0, 0.0], index=["col1", "col2"])
-    )
+    assert metrics.mean_difference_correlation_matrix_numerical_features(
+        df1, df2, df_mask
+    ).equals(pd.Series([0.0, 0.0], index=["col1", "col2"]))
 
 
 df_incomplete_cat = pd.DataFrame(
-    {"col1": ["a", np.nan, "a", "b", np.nan], "col2": ["c", np.nan, "d", "b", "d"]}
+    {
+        "col1": ["a", np.nan, "a", "b", np.nan],
+        "col2": ["c", np.nan, "d", "b", "d"],
+    }
 )
 
 df_imputed_cat = pd.DataFrame(
@@ -279,7 +311,10 @@ def test_mean_difference_correlation_matrix_categorical_features(
 
 
 df_incomplete_cat_num = pd.DataFrame(
-    {"col1": ["a", np.nan, "a", "b", np.nan], "col2": [-1, np.nan, 0.5, 1, 1.5]}
+    {
+        "col1": ["a", np.nan, "a", "b", np.nan],
+        "col2": [-1, np.nan, 0.5, 1, 1.5],
+    }
 )
 
 df_imputed_cat_num = pd.DataFrame(
@@ -287,7 +322,10 @@ def test_mean_difference_correlation_matrix_categorical_features(
 )
 
 df_mask_cat_num = pd.DataFrame(
-    {"col1": [True, False, True, True, False], "col2": [True, False, True, True, False]}
+    {
+        "col1": [True, False, True, True, False],
+        "col2": [True, False, True, True, False],
+    }
 )
 
 
@@ -318,7 +356,9 @@ def test_exception_raise_different_shapes(
     df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFrame
 ) -> None:
     with pytest.raises(Exception):
-        metrics.mean_difference_correlation_matrix_numerical_features(df1, df2, df_mask)
+        metrics.mean_difference_correlation_matrix_numerical_features(
+            df1, df2, df_mask
+        )
     with pytest.raises(Exception):
         metrics.frechet_distance_base(df1, df2)
 
@@ -332,7 +372,9 @@ def test_exception_raise_no_numerical_column_found(
     with pytest.raises(Exception):
         metrics.kolmogorov_smirnov_test(df1, df2, df_mask)
     with pytest.raises(Exception):
-        metrics.mean_difference_correlation_matrix_numerical_features(df1, df2, df_mask)
+        metrics.mean_difference_correlation_matrix_numerical_features(
+            df1, df2, df_mask
+        )
 
 
 @pytest.mark.parametrize("df1", [df_incomplete])
@@ -346,7 +388,10 @@ def test_exception_raise_no_categorical_column_found(
 
 
 df_incomplete_cat_num_bad = pd.DataFrame(
-    {"col1": ["a", np.nan, "c", "b", np.nan], "col2": [-1, np.nan, 0.5, 0.5, 1.5]}
+    {
+        "col1": ["a", np.nan, "c", "b", np.nan],
+        "col2": [-1, np.nan, 0.5, 0.5, 1.5],
+    }
 )
 
 
@@ -376,14 +421,19 @@ def test_pattern_based_weighted_mean_metric(
 
 
 rng = npr.default_rng(123)
-df_gauss1 = pd.DataFrame(rng.multivariate_normal([0, 0], [[1, 0.2], [0.2, 2]], size=100))
-df_gauss2 = pd.DataFrame(rng.multivariate_normal([0, 1], [[1, 0.2], [0.2, 2]], size=100))
+df_gauss1 = pd.DataFrame(
+    rng.multivariate_normal([0, 0], [[1, 0.2], [0.2, 2]], size=100)
+)
+df_gauss2 = pd.DataFrame(
+    rng.multivariate_normal([0, 1], [[1, 0.2], [0.2, 2]], size=100)
+)
 df_mask_gauss = pd.DataFrame(np.full_like(df_gauss1, True))
 
 
 def test_pattern_mae_comparison(mocker) -> None:
-
-    mock_metric = mocker.patch("qolmat.benchmark.metrics.accuracy_1D", return_value=0)
+    mock_metric = mocker.patch(
+        "qolmat.benchmark.metrics.accuracy_1D", return_value=0
+    )
 
     df_nonan = df_incomplete.notna()
     metrics.pattern_based_weighted_mean_metric(
diff --git a/tests/benchmark/test_missing_patterns.py b/tests/benchmark/test_missing_patterns.py
index 0fa06e69..4cd29455 100644
--- a/tests/benchmark/test_missing_patterns.py
+++ b/tests/benchmark/test_missing_patterns.py
@@ -4,7 +4,9 @@
 
 from qolmat.benchmark import missing_patterns as mp
 
-df_complet = pd.DataFrame({"col1": [i for i in range(100)], "col2": [2 * i for i in range(100)]})
+df_complet = pd.DataFrame(
+    {"col1": list(range(100)), "col2": [2 * i for i in range(100)]}
+)
 
 df_incomplet = df_complet.copy()
 df_incomplet.iloc[99, :] = np.nan
@@ -20,9 +22,15 @@
 df_incomplet_group.index = df_incomplet_group.index.set_names("group")
 
 list_generators = {
-    "geo": mp.GeometricHoleGenerator(n_splits=2, ratio_masked=0.1, random_state=42),
-    "unif": mp.UniformHoleGenerator(n_splits=2, ratio_masked=0.1, random_state=42),
-    "multi": mp.MultiMarkovHoleGenerator(n_splits=2, ratio_masked=0.1, random_state=42),
+    "geo": mp.GeometricHoleGenerator(
+        n_splits=2, ratio_masked=0.1, random_state=42
+    ),
+    "unif": mp.UniformHoleGenerator(
+        n_splits=2, ratio_masked=0.1, random_state=42
+    ),
+    "multi": mp.MultiMarkovHoleGenerator(
+        n_splits=2, ratio_masked=0.1, random_state=42
+    ),
     "group": mp.GroupedHoleGenerator(
         n_splits=2, ratio_masked=0.1, random_state=42, groups=("group",)
     ),
@@ -38,7 +46,9 @@
         (df_incomplet_group, list_generators["group"]),
     ],
 )
-def test_SamplerHoleGenerator_split(df: pd.DataFrame, generator: mp._HoleGenerator) -> None:
+def test_SamplerHoleGenerator_split(
+    df: pd.DataFrame, generator: mp._HoleGenerator
+) -> None:
     mask = generator.split(df)[0]
     col1_holes = mask["col1"].sum()
     col2_holes = mask["col2"].sum()
@@ -57,7 +67,9 @@ def test_SamplerHoleGenerator_split(df: pd.DataFrame, generator: mp._HoleGenerat
         (df_incomplet_group, list_generators["group"]),
     ],
 )
-def test_SamplerHoleGenerator_reproducible(df: pd.DataFrame, generator: mp._HoleGenerator) -> None:
+def test_SamplerHoleGenerator_reproducible(
+    df: pd.DataFrame, generator: mp._HoleGenerator
+) -> None:
     generator.random_state = 42
     mask1 = generator.split(df)[0]
     generator.random_state = 43
@@ -81,7 +93,9 @@ def test_SamplerHoleGenerator_reproducible(df: pd.DataFrame, generator: mp._Hole
 def test_SamplerHoleGenerator_without_real_nans(
     df: pd.DataFrame, generator: mp._HoleGenerator
 ) -> None:
-    real_nan = np.random.choice([True, False], size=df.size, p=[0.4, 0.6]).reshape(100, 2)
+    real_nan = np.random.choice(
+        [True, False], size=df.size, p=[0.4, 0.6]
+    ).reshape(100, 2)
     df[real_nan] = np.nan
 
     mask = generator.split(df)[0]
@@ -92,5 +106,9 @@ def test_SamplerHoleGenerator_without_real_nans(
     loc_real_nans_col2 = np.where(df["col2"].isna())[0]
     loc_mask_col2 = np.where(mask["col2"])[0]
 
-    np.testing.assert_allclose(len(set(loc_real_nans_col1) & set(loc_mask_col1)), 0)
-    np.testing.assert_allclose(len(set(loc_real_nans_col2) & set(loc_mask_col2)), 0)
+    np.testing.assert_allclose(
+        len(set(loc_real_nans_col1) & set(loc_mask_col1)), 0
+    )
+    np.testing.assert_allclose(
+        len(set(loc_real_nans_col2) & set(loc_mask_col2)), 0
+    )
diff --git a/tests/imputations/rpca/test_rpca.py b/tests/imputations/rpca/test_rpca.py
index 1430dc4e..34672ef2 100644
--- a/tests/imputations/rpca/test_rpca.py
+++ b/tests/imputations/rpca/test_rpca.py
@@ -1,32 +1,21 @@
 from typing import Tuple
+
 import numpy as np
-import pandas as pd
-import pytest
 from numpy.typing import NDArray
-from pytest_mock.plugin import MockerFixture
-from qolmat.imputations.rpca.rpca import RPCA
-
-# X_incomplete = np.array([[1, np.nan], [4, 2], [np.nan, 4]])
-
 
-# X_exp_nrows_1_prepare_data = np.array([1.0, np.nan, 4.0, 2.0, np.nan, 4.0])
-# X_exp_nrows_6_prepare_data = np.concatenate(
-#     [X_incomplete.reshape(-1, 6).flatten(), np.ones((1, 94)).flatten() * np.nan]
-# )
-
-# period = 100
-# max_iter = 256
-# mu = 0.5
-# tau = 0.5
-# lam = 1
+from qolmat.imputations.rpca.rpca import RPCA
 
 
 class RPCAMock(RPCA):
+    """Mock for RPCA."""
+
     def __init__(self):
+        """Mock for init RPCA."""
         super().__init__()
         self.Q = None
 
     def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
+        """Mock for decompose function."""
         self.call_count = 1
         return D, D
 
diff --git a/tests/imputations/rpca/test_rpca_noisy.py b/tests/imputations/rpca/test_rpca_noisy.py
index 78f62e41..d20aeaba 100644
--- a/tests/imputations/rpca/test_rpca_noisy.py
+++ b/tests/imputations/rpca/test_rpca_noisy.py
@@ -4,7 +4,6 @@
 import pytest
 from numpy.typing import NDArray
 
-from qolmat.imputations.rpca import rpca_utils
 from qolmat.imputations.rpca.rpca_noisy import RpcaNoisy
 from qolmat.utils import utils
 from qolmat.utils.data import generate_artificial_ts
@@ -57,7 +56,9 @@ def test_check_cost_function_minimized_warning(
 ):
     """Test warning when the cost function is not minimized."""
     with pytest.warns(UserWarning):
-        RpcaNoisy()._check_cost_function_minimized(obs, lr, ano, omega, lam, tau)
+        RpcaNoisy()._check_cost_function_minimized(
+            obs, lr, ano, omega, lam, tau
+        )
 
 
 @pytest.mark.parametrize(
@@ -85,7 +86,9 @@ def test_check_cost_function_minimized_no_warning(
 ):
     """Test no warning when the cost function is minimized."""
     with warnings.catch_warnings(record=True) as record:
-        RpcaNoisy()._check_cost_function_minimized(obs, lr, ano, omega, lam, tau)
+        RpcaNoisy()._check_cost_function_minimized(
+            obs, lr, ano, omega, lam, tau
+        )
     assert len(record) == 0
 
 
@@ -108,7 +111,9 @@ def test_rpca_decompose_rpca_shape(norm: str):
     rank = 2
     rpca = RpcaNoisy(rank=rank, norm=norm)
     Omega = ~np.isnan(X_test)
-    M_result, A_result, L_result, Q_result = rpca.decompose_with_basis(X_test, Omega)
+    M_result, A_result, L_result, Q_result = rpca.decompose_with_basis(
+        X_test, Omega
+    )
     n_rows, n_cols = X_test.shape
     assert M_result.shape == (n_rows, n_cols)
     assert A_result.shape == (n_rows, n_cols)
@@ -143,7 +148,9 @@ def test_rpca_noisy_zero_tau(X: NDArray, lam: float, X_interpolated: NDArray):
     "X, tau, X_interpolated",
     [(X_incomplete, 0.4, X_interpolated), (X_incomplete, 2.4, X_interpolated)],
 )
-def test_rpca_noisy_zero_lambda(X: NDArray, tau: float, X_interpolated: NDArray):
+def test_rpca_noisy_zero_lambda(
+    X: NDArray, tau: float, X_interpolated: NDArray
+):
     """Test RPCA noisy results if lambda equals zero."""
     rpca = RpcaNoisy(tau=tau, lam=0, norm="L2")
     Omega = ~np.isnan(X)
@@ -154,7 +161,9 @@ def test_rpca_noisy_zero_lambda(X: NDArray, tau: float, X_interpolated: NDArray)
 
 def test_rpca_noisy_decompose_rpca(synthetic_temporal_data):
     """Test RPCA noisy results for time series data.
-    Check if the cost function is smaller at the end than at the start."""
+
+    Check if the cost function is smaller at the end than at the start.
+    """
     signal = synthetic_temporal_data
     period = 100
     tau = 1
@@ -166,24 +175,27 @@ def test_rpca_noisy_decompose_rpca(synthetic_temporal_data):
 
     low_rank_init = D
     anomalies_init = np.zeros(D.shape)
-    cost_init = RpcaNoisy.cost_function(D, low_rank_init, anomalies_init, Omega, tau, lam)
+    cost_init = RpcaNoisy.cost_function(
+        D, low_rank_init, anomalies_init, Omega, tau, lam
+    )
 
-    X_result, A_result, _, _ = RpcaNoisy.minimise_loss(D, Omega, rank, tau, lam)
-    cost_result = RpcaNoisy.cost_function(D, X_result, A_result, Omega, tau, lam)
+    X_result, A_result, _, _ = RpcaNoisy.minimise_loss(
+        D, Omega, rank, tau, lam
+    )
+    cost_result = RpcaNoisy.cost_function(
+        D, X_result, A_result, Omega, tau, lam
+    )
 
     assert cost_result <= cost_init
 
-    # assert np.linalg.norm(X_input_rpca, "nuc") >= 1 / 2 * np.linalg.norm(
-    #     X_input_rpca - X_result.reshape(period, -1) - A_result.reshape(period, -1),
-    #     "fro",
-    # ) ** 2 + tau * np.linalg.norm(X_result.reshape(period, -1), "nuc") + lam * np.sum(
-    #     np.abs(A_result.reshape(period, -1))
-    # )
 
+def test_rpca_noisy_temporal_signal_temporal_regularisations(
+    synthetic_temporal_data,
+):
+    """Test RPCA noisy results for TS data with temporal regularisations.
 
-def test_rpca_noisy_temporal_signal_temporal_regularisations(synthetic_temporal_data):
-    """Test RPCA noisy results for time series data with temporal regularisations.
-    Check if the cost function is smaller at the end than at the start."""
+    Check if the cost function is smaller at the end than at the start.
+    """
     signal = synthetic_temporal_data
     period = 10
     tau = 1
diff --git a/tests/imputations/rpca/test_rpca_pcp.py b/tests/imputations/rpca/test_rpca_pcp.py
index c7ab69e5..de997d90 100644
--- a/tests/imputations/rpca/test_rpca_pcp.py
+++ b/tests/imputations/rpca/test_rpca_pcp.py
@@ -85,6 +85,7 @@ def test_rpca_rpca_pcp_get_params_scale(X: NDArray):
 @pytest.mark.parametrize("X, mu", [(X_complete, small_mu)])
 def test_rpca_rpca_pcp_zero_lambda_small_mu(X: NDArray, mu: float):
     """Test RPCA PCP results if lambda equals zero.
+
     The problem is ill-conditioned and the result depends
     on the parameter mu; case when mu is small.
     """
@@ -98,6 +99,7 @@ def test_rpca_rpca_pcp_zero_lambda_small_mu(X: NDArray, mu: float):
 @pytest.mark.parametrize("X, mu", [(X_complete, large_mu)])
 def test_rpca_rpca_pcp_zero_lambda_large_mu(X: NDArray, mu: float):
     """Test RPCA PCP results if lambda equals zero.
+
     The problem is ill-conditioned and the result depends
     on the parameter mu; case when mu is large.
     """
@@ -120,7 +122,9 @@ def test_rpca_rpca_pcp_large_lambda_small_mu(X: NDArray, mu: float):
 
 def test_rpca_temporal_signal(synthetic_temporal_data):
     """Test RPCA PCP results for time series data.
-    Check if the cost function is smaller at the end than at the start."""
+
+    Check if the cost function is smaller at the end than at the start.
+    """
     signal = synthetic_temporal_data
     period = 100
     lam = 0.1
@@ -130,6 +134,6 @@ def test_rpca_temporal_signal(synthetic_temporal_data):
     Omega = ~np.isnan(D)
     D_interpolated = utils.linear_interpolation(D)
     X_result, A_result = rpca.decompose(D, Omega)
-    assert np.linalg.norm(D_interpolated, "nuc") >= np.linalg.norm(X_result, "nuc") + lam * np.sum(
-        np.abs(A_result)
-    )
+    assert np.linalg.norm(D_interpolated, "nuc") >= np.linalg.norm(
+        X_result, "nuc"
+    ) + lam * np.sum(np.abs(A_result))
diff --git a/tests/imputations/rpca/test_rpca_utils.py b/tests/imputations/rpca/test_rpca_utils.py
index 775c9d98..120bff83 100644
--- a/tests/imputations/rpca/test_rpca_utils.py
+++ b/tests/imputations/rpca/test_rpca_utils.py
@@ -1,14 +1,14 @@
 import numpy as np
-from numpy.typing import NDArray
 import pytest
+from numpy.typing import NDArray
+
 from qolmat.imputations.rpca.rpca_utils import (
     approx_rank,
+    l1_norm,
     soft_thresholding,
     svd_thresholding,
-    l1_norm,
     toeplitz_matrix,
 )
-from qolmat.utils.utils import fold_signal
 
 X_incomplete = np.array(
     [
@@ -20,7 +20,9 @@
     ]
 )
 
-X_complete = np.array([[1, 7, 4, 4], [5, 2, 4, 4], [-3, 3, 3, 3], [2, -1, 5, 5], [2, 1, 5, 5]])
+X_complete = np.array(
+    [[1, 7, 4, 4], [5, 2, 4, 4], [-3, 3, 3, 3], [2, -1, 5, 5], [2, 1, 5, 5]]
+)
 
 
 @pytest.mark.parametrize("X", [X_complete])
diff --git a/tests/imputations/test_em_sampler.py b/tests/imputations/test_em_sampler.py
index 832737dc..21e2ffd0 100644
--- a/tests/imputations/test_em_sampler.py
+++ b/tests/imputations/test_em_sampler.py
@@ -1,22 +1,29 @@
 from typing import List, Literal
+
 import numpy as np
 import pytest
+import scipy
 from numpy.typing import NDArray
 from scipy import linalg
-import scipy
 from sklearn.datasets import make_spd_matrix
-from qolmat.utils import utils
-
 
 from qolmat.imputations import em_sampler
-from qolmat.utils.exceptions import IllConditioned
+from qolmat.utils import utils
 
 np.random.seed(42)
 
 A: NDArray = np.array([[3, 1, 0], [1, 1, 0], [0, 0, 1]], dtype=float)
-A_inverse: NDArray = np.array([[0.5, -0.5, 0], [-0.5, 1.5, 0], [0, 0, 1]], dtype=float)
+A_inverse: NDArray = np.array(
+    [[0.5, -0.5, 0], [-0.5, 1.5, 0], [0, 0, 1]], dtype=float
+)
 X_missing = np.array(
-    [[1, np.nan, 1], [2, np.nan, 3], [1, 4, np.nan], [-1, 2, 1], [1, 1, np.nan]],
+    [
+        [1, np.nan, 1],
+        [2, np.nan, 3],
+        [1, 4, np.nan],
+        [-1, 2, 1],
+        [1, 1, np.nan],
+    ],
     dtype=float,
 )
 mask: NDArray = np.isnan(X_missing)
@@ -40,7 +47,6 @@ def generate_multinormal_predefined_mean_cov(d=3, n=500):
         mask[ind, j] = True
     X_missing = X.copy()
     X_missing[mask] = np.nan
-    # return {"mean": mean, "covariance": covariance, "X": X, "X_missing": X_missing}
     return X, X_missing, mean, covariance
 
 
@@ -93,16 +99,20 @@ def test_gradient_conjugue(
     """Test the conjugate gradient algorithm."""
     X_first_guess = utils.impute_nans(X_missing)
     X_result = em_sampler._conjugate_gradient(A, X_first_guess, mask)
-    X_expected = np.array([[1, -1, 1], [2, -2, 3], [1, 4, 0], [-1, 2, 1], [1, 1, 0]], dtype=float)
+    X_expected = np.array(
+        [[1, -1, 1], [2, -2, 3], [1, 4, 0], [-1, 2, 1], [1, 1, 0]], dtype=float
+    )
 
-    assert np.sum(X_result * (X_result @ A)) <= np.sum(X_first_guess * (X_first_guess @ A))
+    assert np.sum(X_result * (X_result @ A)) <= np.sum(
+        X_first_guess * (X_first_guess @ A)
+    )
     assert np.allclose(X_missing[~mask], X_result[~mask])
     assert ((X_result @ A)[mask] == 0).all()
     np.testing.assert_allclose(X_result, X_expected, atol=1e-5)
 
 
 def test_get_lag_p():
-    """Test if it can retrieve the lag p"""
+    """Test if it can retrieve the lag p."""
     X, _, _, _ = generate_varp_process(d=3, n=1000, p=2)
     varpem = em_sampler.VARpEM()
     varpem.fit(X)
@@ -120,7 +130,8 @@ def test_fit_calls(mocker, X_missing: NDArray) -> None:
     """Test number of calls of some methods in MultiNormalEM."""
     max_iter_em = 3
     mock_sample_ou = mocker.patch(
-        "qolmat.imputations.em_sampler.MultiNormalEM._sample_ou", return_value=X_missing
+        "qolmat.imputations.em_sampler.MultiNormalEM._sample_ou",
+        return_value=X_missing,
     )
     mock_maximize_likelihood = mocker.patch(
         "qolmat.imputations.em_sampler.MultiNormalEM._maximize_likelihood",
@@ -152,7 +163,11 @@ def test_fit_calls(mocker, X_missing: NDArray) -> None:
 @pytest.mark.parametrize(
     "means, covs, logliks",
     [
-        ([np.array([1, 2, 3, 3])] * 15, [np.array([1, 2, 3, 3])] * 15, [1] * 15),
+        (
+            [np.array([1, 2, 3, 3])] * 15,
+            [np.array([1, 2, 3, 3])] * 15,
+            [1] * 15,
+        ),
         (
             [np.array([1, 2, 3, 3])] * 15,
             [np.random.uniform(low=0, high=100, size=(1, 4))[0]] * 15,
@@ -180,7 +195,7 @@ def test_em_sampler_check_convergence_true(
     em.dict_criteria_stop["means"] = means
     em.dict_criteria_stop["covs"] = covs
     em.dict_criteria_stop["logliks"] = logliks
-    assert em._check_convergence() == True
+    assert em._check_convergence()
 
 
 @pytest.mark.parametrize(
@@ -197,7 +212,7 @@ def test_em_sampler_check_convergence_false(
     em.dict_criteria_stop["means"] = means
     em.dict_criteria_stop["covs"] = covs
     em.dict_criteria_stop["logliks"] = logliks
-    assert em._check_convergence() == True
+    assert em._check_convergence()
 
 
 @pytest.mark.parametrize(
@@ -231,7 +246,9 @@ def test_sample_ou_2d(model):
     assert abs(mean_est - mean_theo) < np.sqrt(var_theo / n_samples) * q_alpha
 
     ratio_inf = scipy.stats.chi2.ppf(alpha / 2, n_samples) / (n_samples - 1)
-    ratio_sup = scipy.stats.chi2.ppf(1 - alpha / 2, n_samples) / (n_samples - 1)
+    ratio_sup = scipy.stats.chi2.ppf(1 - alpha / 2, n_samples) / (
+        n_samples - 1
+    )
 
     ratio = var_est / var_theo
 
@@ -261,7 +278,7 @@ def test_varem_sampler_check_convergence_true(
     em.dict_criteria_stop["B"] = list_B
     em.dict_criteria_stop["S"] = list_S
     em.dict_criteria_stop["logliks"] = logliks
-    assert em._check_convergence() == True
+    assert em._check_convergence()
 
 
 @pytest.mark.parametrize(
@@ -278,12 +295,14 @@ def test_varem_sampler_check_convergence_false(
     em.dict_criteria_stop["B"] = list_B
     em.dict_criteria_stop["S"] = list_S
     em.dict_criteria_stop["logliks"] = logliks
-    assert em._check_convergence() == True
+    assert em._check_convergence()
 
 
 def test_illconditioned_multinormalem() -> None:
     """Test that data with colinearity raises an exception."""
-    X = np.array([[1, np.nan, 8, 1], [3, 1, 4, 2], [2, 3, np.nan, 1]], dtype=float)
+    X = np.array(
+        [[1, np.nan, 8, 1], [3, 1, 4, 2], [2, 3, np.nan, 1]], dtype=float
+    )
     model = em_sampler.MultiNormalEM()
     with pytest.warns(UserWarning):
         _ = model.fit_transform(X)
@@ -293,7 +312,7 @@ def test_illconditioned_multinormalem() -> None:
 
 
 def test_no_more_nan_multinormalem() -> None:
-    """Test there are no more missing values after the MultiNormalEM algorithm."""
+    """Test there are no more missing values after the MultiNormalEM algo."""
     X = np.array([[1, np.nan], [3, 1], [np.nan, 3]], dtype=float)
     model = em_sampler.MultiNormalEM()
     X_imp = model.fit_transform(X)
@@ -310,9 +329,11 @@ def test_no_more_nan_varpem() -> None:
     assert np.sum(np.isnan(X_imputed)) == 0
 
 
-def test_fit_parameters_multinormalem():
-    """Test the fit MultiNormalEM provides good parameters estimates (no imputation)."""
-    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(d=2, n=10000)
+def test_fit_parameters_multinormalem_no_imputation():
+    """Test fit MultiNormalEM provides good parameters estimates."""
+    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(
+        d=2, n=10000
+    )
     em = em_sampler.MultiNormalEM()
     em.fit_parameters(X)
     np.testing.assert_allclose(em.means, mean, atol=1e-1)
@@ -320,8 +341,10 @@ def test_fit_parameters_multinormalem():
 
 
 def test_mean_covariance_multinormalem():
-    """Test the MultiNormalEM provides good mean and covariance estimations."""
-    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(d=2, n=1000)
+    """Test MultiNormalEM provides good mean and covariance estimations."""
+    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(
+        d=2, n=1000
+    )
     em = em_sampler.MultiNormalEM()
     X_imputed = em.fit_transform(X_missing)
 
@@ -333,11 +356,14 @@ def test_mean_covariance_multinormalem():
     np.testing.assert_allclose(em.means, mean, rtol=1e-1, atol=1e-1)
     np.testing.assert_allclose(em.cov, covariance, rtol=1e-1, atol=1e-1)
     np.testing.assert_allclose(mean_imputed, mean, rtol=1e-1, atol=1e-1)
-    np.testing.assert_allclose(covariance_imputed, covariance, rtol=1e-1, atol=1e-1)
+    np.testing.assert_allclose(
+        covariance_imputed, covariance, rtol=1e-1, atol=1e-1
+    )
 
 
 def test_multinormal_em_minimize_llik():
-    X, X_missing, mean, covariance = generate_multinormal_predefined_mean_cov(d=2, n=1000)
+    """Test that the loglikelihood of the imputed data is lower."""
+    X, X_missing, _, _ = generate_multinormal_predefined_mean_cov(d=2, n=1000)
     imputer = em_sampler.MultiNormalEM(method="mle", random_state=11)
     X_imputed = imputer.fit_transform(X_missing)
     llikelihood_imputed = imputer.get_loglikelihood(X_imputed)
@@ -354,6 +380,7 @@ def test_multinormal_em_minimize_llik():
 
 @pytest.mark.parametrize("method", ["sample", "mle"])
 def test_multinormal_em_fit_transform(method: Literal["mle", "sample"]):
+    """Test fit_transform method returns the same result as the fit method."""
     imputer = em_sampler.MultiNormalEM(method=method, random_state=11)
     X = X_missing.copy()
     result = imputer.fit_transform(X)
@@ -390,7 +417,9 @@ def test_parameters_after_imputation_varpem(p: int):
 
 def test_varpem_fit_transform():
     imputer = em_sampler.VARpEM(method="mle", random_state=11)
-    X = np.array([[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]])
+    X = np.array(
+        [[1, 1, 1, 1], [np.nan, np.nan, 3, 2], [1, 2, 2, 1], [2, 2, 2, 2]]
+    )
     result = imputer.fit_transform(X)
     assert result.shape == X.shape
     np.testing.assert_allclose(result[~np.isnan(X)], X[~np.isnan(X)])
@@ -439,12 +468,6 @@ def test_pretreatment_temporal(em):
     np.testing.assert_allclose(mask_result, mask_expected)
 
 
-# X_missing = np.array(
-#     [[1, np.nan, 1], [2, np.nan, 3], [1, 4, np.nan], [-1, 2, 1], [1, 1, np.nan]],
-#     dtype=float,
-# )
-
-
 @pytest.mark.parametrize(
     "em",
     [
diff --git a/tests/imputations/test_imputers.py b/tests/imputations/test_imputers.py
index bea18d9a..5069f0bd 100644
--- a/tests/imputations/test_imputers.py
+++ b/tests/imputations/test_imputers.py
@@ -5,26 +5,33 @@
 import pytest
 from sklearn.ensemble import ExtraTreesRegressor
 from sklearn.linear_model import LinearRegression
-from sklearn.utils.estimator_checks import check_estimator, parametrize_with_checks
-from qolmat.benchmark.hyperparameters import HyperValue
+from sklearn.utils.estimator_checks import (
+    parametrize_with_checks,
+)
 
+from qolmat.benchmark.hyperparameters import HyperValue
 from qolmat.imputations import imputers
 
-df_complete = pd.DataFrame({"col1": [0, 1, 2, 3, 4], "col2": [-1, 0, 0.5, 1, 1.5]})
+df_complete = pd.DataFrame(
+    {"col1": [0, 1, 2, 3, 4], "col2": [-1, 0, 0.5, 1, 1.5]}
+)
 
 df_incomplete = pd.DataFrame(
     {"col1": [0, np.nan, 2, 3, np.nan], "col2": [-1, np.nan, 0.5, np.nan, 1.5]}
 )
 
 df_mixed = pd.DataFrame(
-    {"col1": [0, np.nan, 2, 3, np.nan], "col2": ["a", np.nan, "b", np.nan, "b"]}
+    {
+        "col1": [0, np.nan, 2, 3, np.nan],
+        "col2": ["a", np.nan, "b", np.nan, "b"],
+    }
 )
 
 df_timeseries = pd.DataFrame(
     pd.DataFrame(
         {
-            "col1": [i for i in range(20)],
-            "col2": [0, np.nan, 2, np.nan, 2] + [i for i in range(5, 20)],
+            "col1": list(range(20)),
+            "col2": [0, np.nan, 2, np.nan, 2] + list(range(5, 20)),
         },
         index=pd.date_range("2023-04-17", periods=20, freq="D"),
     )
@@ -80,14 +87,18 @@ def test_hyperparameters_get_hyperparameters() -> None:
 }
 
 
-@pytest.mark.parametrize("col, expected", [("col1", expected1), ("col2", expected2)])
+@pytest.mark.parametrize(
+    "col, expected", [("col1", expected1), ("col2", expected2)]
+)
 def test_hyperparameters_get_hyperparameters_modified(
     col: str, expected: Dict[str, HyperValue]
 ) -> None:
     imputer = imputers.ImputerRpcaNoisy()
     for key, val in hyperparams_global.items():
         setattr(imputer, key, val)
-    imputer.imputer_params = tuple(set(imputer.imputer_params) | set(hyperparams_global.keys()))
+    imputer.imputer_params = tuple(
+        set(imputer.imputer_params) | set(hyperparams_global.keys())
+    )
     hyperparams = imputer.get_hyperparams(col)
 
     assert hyperparams == expected
@@ -105,7 +116,9 @@ def test_hyperparameters_get_hyperparameters_modified(
 @pytest.mark.parametrize(
     "df", [pd.DataFrame({"col1": [np.nan, np.nan, np.nan], "col2": [1, 2, 3]})]
 )
-def test_Imputer_fit_transform_on_nan_column(df: pd.DataFrame, imputer: imputers._Imputer) -> None:
+def test_Imputer_fit_transform_on_nan_column(
+    df: pd.DataFrame, imputer: imputers._Imputer
+) -> None:
     np.testing.assert_raises(ValueError, imputer.fit_transform, df)
 
 
@@ -130,7 +143,9 @@ def test_fit_transform_on_grouped(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 @pytest.mark.parametrize("df_oracle", [df_complete])
-def test_ImputerOracle_fit_transform(df: pd.DataFrame, df_oracle: pd.DataFrame) -> None:
+def test_ImputerOracle_fit_transform(
+    df: pd.DataFrame, df_oracle: pd.DataFrame
+) -> None:
     imputer = imputers.ImputerOracle()
     imputer.set_solution(df_oracle)
     result = imputer.fit_transform(df)
@@ -142,7 +157,9 @@ def test_ImputerOracle_fit_transform(df: pd.DataFrame, df_oracle: pd.DataFrame)
 def test_ImputerSimple_mean_fit_transform(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerSimple(strategy="mean")
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0, 5 / 3, 2, 3, 5 / 3], "col2": ["a", "b", "b", "b", "b"]})
+    expected = pd.DataFrame(
+        {"col1": [0, 5 / 3, 2, 3, 5 / 3], "col2": ["a", "b", "b", "b", "b"]}
+    )
     pd.testing.assert_frame_equal(result, expected)
 
 
@@ -150,7 +167,9 @@ def test_ImputerSimple_mean_fit_transform(df: pd.DataFrame) -> None:
 def test_ImputerSimple_median_fit_transform(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerSimple()
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0.0, 2.0, 2.0, 3.0, 2.0], "col2": ["a", "b", "b", "b", "b"]})
+    expected = pd.DataFrame(
+        {"col1": [0.0, 2.0, 2.0, 3.0, 2.0], "col2": ["a", "b", "b", "b", "b"]}
+    )
     pd.testing.assert_frame_equal(result, expected)
 
 
@@ -158,7 +177,9 @@ def test_ImputerSimple_median_fit_transform(df: pd.DataFrame) -> None:
 def test_ImputerSimple_mode_fit_transform(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerSimple(strategy="most_frequent")
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0.0, 0.0, 2.0, 3.0, 0.0], "col2": ["a", "b", "b", "b", "b"]})
+    expected = pd.DataFrame(
+        {"col1": [0.0, 0.0, 2.0, 3.0, 0.0], "col2": ["a", "b", "b", "b", "b"]}
+    )
     pd.testing.assert_frame_equal(result, expected)
 
 
@@ -174,7 +195,9 @@ def test_ImputerShuffle_fit_transform1(df: pd.DataFrame) -> None:
 def test_ImputerShuffle_fit_transform2(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerShuffle(random_state=42)
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0, 3, 2, 3, 0], "col2": [-1, 1.5, 0.5, 1.5, 1.5]})
+    expected = pd.DataFrame(
+        {"col1": [0, 3, 2, 3, 0], "col2": [-1, 1.5, 0.5, 1.5, 1.5]}
+    )
     np.testing.assert_allclose(result, expected)
 
 
@@ -182,7 +205,9 @@ def test_ImputerShuffle_fit_transform2(df: pd.DataFrame) -> None:
 def test_ImputerLOCF_fit_transform(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerLOCF()
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0, 0, 2, 3, 3], "col2": [-1, -1, 0.5, 0.5, 1.5]})
+    expected = pd.DataFrame(
+        {"col1": [0, 0, 2, 3, 3], "col2": [-1, -1, 0.5, 0.5, 1.5]}
+    )
     np.testing.assert_allclose(result, expected)
 
 
@@ -190,7 +215,9 @@ def test_ImputerLOCF_fit_transform(df: pd.DataFrame) -> None:
 def test_ImputerNOCB_fit_transform(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerNOCB()
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0, 2, 2, 3, 3], "col2": [-1, 0.5, 0.5, 1.5, 1.5]})
+    expected = pd.DataFrame(
+        {"col1": [0, 2, 2, 3, 3], "col2": [-1, 0.5, 0.5, 1.5, 1.5]}
+    )
     np.testing.assert_allclose(result, expected)
 
 
@@ -198,7 +225,9 @@ def test_ImputerNOCB_fit_transform(df: pd.DataFrame) -> None:
 def test_ImputerInterpolation_fit_transform(df: pd.DataFrame) -> None:
     imputer = imputers.ImputerInterpolation()
     result = imputer.fit_transform(df)
-    expected = pd.DataFrame({"col1": [0, 1, 2, 3, 3], "col2": [-1, -0.25, 0.5, 1, 1.5]})
+    expected = pd.DataFrame(
+        {"col1": [0, 1, 2, 3, 3], "col2": [-1, -0.25, 0.5, 1, 1.5]}
+    )
     np.testing.assert_allclose(result, expected)
 
 
@@ -208,8 +237,8 @@ def test_ImputerResiduals_fit_transform(df: pd.DataFrame) -> None:
     result = imputer.fit_transform(df)
     expected = pd.DataFrame(
         {
-            "col1": [i for i in range(20)],
-            "col2": [0, 0.953, 2, 2.061, 2] + [i for i in range(5, 20)],
+            "col1": list(range(20)),
+            "col2": [0, 0.953, 2, 2.061, 2] + list(range(5, 20)),
         },
         index=pd.date_range("2023-04-17", periods=20, freq="D"),
     )
@@ -262,14 +291,18 @@ def test_ImputerRegressor_fit_transform(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_timeseries])
 def test_ImputerRpcaNoisy_fit_transform(df: pd.DataFrame) -> None:
-    imputer = imputers.ImputerRpcaNoisy(columnwise=False, max_iterations=100, tau=1, lam=0.3)
+    imputer = imputers.ImputerRpcaNoisy(
+        columnwise=False, max_iterations=100, tau=1, lam=0.3
+    )
     df_omega = df.notna()
     df_result = imputer.fit_transform(df)
     np.testing.assert_allclose(df_result[df_omega], df[df_omega])
     assert df_result.notna().all().all()
 
 
-index_grouped = pd.MultiIndex.from_product([["a", "b"], range(4)], names=["group", "date"])
+index_grouped = pd.MultiIndex.from_product(
+    [["a", "b"], range(4)], names=["group", "date"]
+)
 dict_values = {
     "col1": [0, np.nan, 0, np.nan, 1, 1, 1, 1],
     "col2": [1, 1, 1, 1, 2, 2, 2, 2],
@@ -319,6 +352,8 @@ def test_models_fit_transform_grouped(imputer):
         imputers.ImputerEM(),
     ]
 )
-def test_sklearn_compatible_estimator(estimator: imputers._Imputer, check: Any) -> None:
+def test_sklearn_compatible_estimator(
+    estimator: imputers._Imputer, check: Any
+) -> None:
     """Check compatibility with sklearn, using sklearn estimator checks API."""
     check(estimator)
diff --git a/tests/imputations/test_imputers_diffusions.py b/tests/imputations/test_imputers_diffusions.py
index 18363175..40215091 100644
--- a/tests/imputations/test_imputers_diffusions.py
+++ b/tests/imputations/test_imputers_diffusions.py
@@ -1,10 +1,11 @@
+from typing import Any
+
 import numpy as np
 import pandas as pd
 import pytest
-
-from typing import Any
-
-from sklearn.utils.estimator_checks import check_estimator, parametrize_with_checks
+from sklearn.utils.estimator_checks import (
+    parametrize_with_checks,
+)
 
 from qolmat.benchmark import metrics
 from qolmat.imputations import imputers, imputers_pytorch
@@ -82,7 +83,9 @@ def test_TabDDPM_fit(df: pd.DataFrame) -> None:
     )
 
     model = ddpms.TabDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64)
-    model = model.fit(df, batch_size=2, epochs=2, x_valid=df, print_valid=False)
+    model = model.fit(
+        df, batch_size=2, epochs=2, x_valid=df, print_valid=False
+    )
 
     df_imputed = model.predict(df)
 
@@ -94,7 +97,6 @@ def test_TabDDPM_fit(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TabDDPM_process_data(df: pd.DataFrame) -> None:
-
     model = ddpms.TabDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64)
     arr_processed, arr_mask, _ = model._process_data(df, is_training=True)
 
@@ -104,11 +106,14 @@ def test_TabDDPM_process_data(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TabDDPM_process_reversely_data(df: pd.DataFrame) -> None:
-
     model = ddpms.TabDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64)
-    model = model.fit(df, batch_size=2, epochs=2, x_valid=df, print_valid=False)
+    model = model.fit(
+        df, batch_size=2, epochs=2, x_valid=df, print_valid=False
+    )
 
-    arr_processed, arr_mask, list_indices = model._process_data(df, is_training=False)
+    arr_processed, arr_mask, list_indices = model._process_data(
+        df, is_training=False
+    )
     df_imputed = model._process_reversely_data(arr_processed, df, list_indices)
 
     np.testing.assert_array_equal(df.shape, df_imputed.shape)
@@ -118,11 +123,16 @@ def test_TabDDPM_process_reversely_data(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TabDDPM_q_sample(df: pd.DataFrame) -> None:
-
     model = ddpms.TabDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64)
-    model = model.fit(df, batch_size=2, epochs=2, x_valid=df, print_valid=False)
+    model = model.fit(
+        df, batch_size=2, epochs=2, x_valid=df, print_valid=False
+    )
 
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    device = (
+        torch.device("cuda")
+        if torch.cuda.is_available()
+        else torch.device("cpu")
+    )
 
     ts_data_noised, ts_noise = model._q_sample(
         x=torch.ones(2, 5, dtype=torch.float).to(device),
@@ -135,7 +145,9 @@ def test_TabDDPM_q_sample(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TabDDPM_eval(df: pd.DataFrame) -> None:
-    model = ddpms.TabDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64, is_clip=True)
+    model = ddpms.TabDDPM(
+        num_noise_steps=10, num_blocks=1, dim_embedding=64, is_clip=True
+    )
     model = model.fit(
         df,
         batch_size=2,
@@ -156,7 +168,9 @@ def test_TabDDPM_eval(df: pd.DataFrame) -> None:
         list(df.index),
     )
 
-    np.testing.assert_array_equal(list(scores.keys()), ["mean_absolute_error", "dist_wasserstein"])
+    np.testing.assert_array_equal(
+        list(scores.keys()), ["mean_absolute_error", "dist_wasserstein"]
+    )
 
 
 @pytest.mark.parametrize("df", [df_incomplete])
@@ -191,8 +205,12 @@ def test_TabDDPM_predict(df: pd.DataFrame) -> None:
         }
     )
 
-    model = ddpms.TabDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64, is_clip=True)
-    model = model.fit(df, batch_size=2, epochs=2, x_valid=df, print_valid=False)
+    model = ddpms.TabDDPM(
+        num_noise_steps=10, num_blocks=1, dim_embedding=64, is_clip=True
+    )
+    model = model.fit(
+        df, batch_size=2, epochs=2, x_valid=df, print_valid=False
+    )
 
     df_imputed = model.predict(df)
 
@@ -216,7 +234,12 @@ def test_TsDDPM_fit(df: pd.DataFrame) -> None:
 
     model = ddpms.TsDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64)
     model = model.fit(
-        df, batch_size=2, epochs=2, x_valid=df, print_valid=False, index_datetime="datetime"
+        df,
+        batch_size=2,
+        epochs=2,
+        x_valid=df,
+        print_valid=False,
+        index_datetime="datetime",
     )
 
     df_imputed = model.predict(df)
@@ -229,10 +252,16 @@ def test_TsDDPM_fit(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TsDDPM_process_data(df: pd.DataFrame) -> None:
-
-    model = ddpms.TsDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=False)
+    model = ddpms.TsDDPM(
+        num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=False
+    )
     model = model.fit(
-        df, batch_size=2, epochs=2, x_valid=df, print_valid=False, index_datetime="datetime"
+        df,
+        batch_size=2,
+        epochs=2,
+        x_valid=df,
+        print_valid=False,
+        index_datetime="datetime",
     )
 
     arr_processed, arr_mask, _ = model._process_data(df, is_training=True)
@@ -240,9 +269,16 @@ def test_TsDDPM_process_data(df: pd.DataFrame) -> None:
     np.testing.assert_array_equal(arr_processed.shape, [5, 1, 5])
     np.testing.assert_array_equal(arr_mask.shape, [5, 1, 5])
 
-    model = ddpms.TsDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=True)
+    model = ddpms.TsDDPM(
+        num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=True
+    )
     model = model.fit(
-        df, batch_size=2, epochs=2, x_valid=df, print_valid=False, index_datetime="datetime"
+        df,
+        batch_size=2,
+        epochs=2,
+        x_valid=df,
+        print_valid=False,
+        index_datetime="datetime",
     )
 
     arr_processed, arr_mask, _ = model._process_data(df, is_training=True)
@@ -253,25 +289,42 @@ def test_TsDDPM_process_data(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TsDDPM_process_reversely_data(df: pd.DataFrame) -> None:
-
-    model = ddpms.TsDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=False)
+    model = ddpms.TsDDPM(
+        num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=False
+    )
     model = model.fit(
-        df, batch_size=2, epochs=2, x_valid=df, print_valid=False, index_datetime="datetime"
+        df,
+        batch_size=2,
+        epochs=2,
+        x_valid=df,
+        print_valid=False,
+        index_datetime="datetime",
     )
 
-    arr_processed, arr_mask, list_indices = model._process_data(df, is_training=False)
+    arr_processed, arr_mask, list_indices = model._process_data(
+        df, is_training=False
+    )
     df_imputed = model._process_reversely_data(arr_processed, df, list_indices)
 
     np.testing.assert_array_equal(df.shape, df_imputed.shape)
     np.testing.assert_array_equal(df.index, df_imputed.index)
     np.testing.assert_array_equal(df.columns, df_imputed.columns)
 
-    model = ddpms.TsDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=True)
+    model = ddpms.TsDDPM(
+        num_noise_steps=10, num_blocks=1, dim_embedding=64, is_rolling=True
+    )
     model = model.fit(
-        df, batch_size=2, epochs=2, x_valid=df, print_valid=False, index_datetime="datetime"
+        df,
+        batch_size=2,
+        epochs=2,
+        x_valid=df,
+        print_valid=False,
+        index_datetime="datetime",
     )
 
-    arr_processed, arr_mask, list_indices = model._process_data(df, is_training=False)
+    arr_processed, arr_mask, list_indices = model._process_data(
+        df, is_training=False
+    )
     df_imputed = model._process_reversely_data(arr_processed, df, list_indices)
 
     np.testing.assert_array_equal(df.shape, df_imputed.shape)
@@ -281,12 +334,20 @@ def test_TsDDPM_process_reversely_data(df: pd.DataFrame) -> None:
 
 @pytest.mark.parametrize("df", [df_incomplete])
 def test_TsDDPM_q_sample(df: pd.DataFrame) -> None:
-
     model = ddpms.TsDDPM(num_noise_steps=10, num_blocks=1, dim_embedding=64)
     model = model.fit(
-        df, batch_size=2, epochs=2, x_valid=df, print_valid=False, index_datetime="datetime"
+        df,
+        batch_size=2,
+        epochs=2,
+        x_valid=df,
+        print_valid=False,
+        index_datetime="datetime",
+    )
+    device = (
+        torch.device("cuda")
+        if torch.cuda.is_available()
+        else torch.device("cpu")
     )
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
     ts_data_noised, ts_noise = model._q_sample(
         x=torch.ones(2, 1, 5, dtype=torch.float).to(device),
@@ -299,9 +360,13 @@ def test_TsDDPM_q_sample(df: pd.DataFrame) -> None:
 
 @parametrize_with_checks(
     [
-        imputers_pytorch.ImputerDiffusion(model=ddpms.TabDDPM(), batch_size=1, epochs=1),
+        imputers_pytorch.ImputerDiffusion(
+            model=ddpms.TabDDPM(), batch_size=1, epochs=1
+        ),
     ]
 )
-def test_sklearn_compatible_estimator(estimator: imputers._Imputer, check: Any) -> None:
+def test_sklearn_compatible_estimator(
+    estimator: imputers._Imputer, check: Any
+) -> None:
     """Check compatibility with sklearn, using sklearn estimator checks API."""
     check(estimator)
diff --git a/tests/imputations/test_imputers_pytorch.py b/tests/imputations/test_imputers_pytorch.py
index a6146291..0704114c 100644
--- a/tests/imputations/test_imputers_pytorch.py
+++ b/tests/imputations/test_imputers_pytorch.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pandas as pd
 import pytest
-import torch
 
 from qolmat.imputations import imputers_pytorch
 from qolmat.utils.exceptions import PyTorchExtraNotInstalled
@@ -29,7 +28,9 @@ def test_ImputerRegressorPyTorch_fit_transform(df: pd.DataFrame) -> None:
     nn.manual_seed(42)
     if nn.cuda.is_available():
         nn.cuda.manual_seed(42)
-    estimator = imputers_pytorch.build_mlp(input_dim=2, list_num_neurons=[64, 32])
+    estimator = imputers_pytorch.build_mlp(
+        input_dim=2, list_num_neurons=[64, 32]
+    )
     imputer = imputers_pytorch.ImputerRegressorPyTorch(
         estimator=estimator, handler_nan="column", epochs=10
     )
@@ -55,30 +56,3 @@ def test_ImputerRegressorPyTorch_fit_transform(df: pd.DataFrame) -> None:
         }
     )
     np.testing.assert_allclose(result, expected, atol=1e-3)
-
-
-# @pytest.mark.parametrize("df", [df_incomplete])
-# def test_imputers_pytorch_Autoencoder(df: pd.DataFrame) -> None:
-#     input = df.values.shape[1]
-#     latent = 4
-#     encoder, decoder = imputers_pytorch.build_autoencoder_example(
-#         input_dim=input,
-#         latent_dim=latent,
-#         output_dim=input,
-#         list_num_neurons=[4 * latent, 2 * latent],
-#     )
-#     autoencoder = imputers_pytorch.ImputerAutoencoder(
-#         encoder, decoder, epochs=10, lamb=0.01, max_iterations=5, random_state=42
-#     )
-#     result = autoencoder.fit_transform(df)
-#     print(result)
-#     expected = pd.DataFrame(
-#         {
-#             "col1": [22.315, 15, 22.496, 23, 33],
-#             "col2": [69, 76, 74, 80, 78],
-#             "col3": [174, 166, 182, 177, 174.218],
-#             "col4": [9, 12, 11, 12, 8],
-#             "col5": [93, 75, 62.308, 12, 62.449],
-#         }
-#     )
-#     np.testing.assert_allclose(result, expected, atol=1e-3)
diff --git a/tests/imputations/test_preprocessing.py b/tests/imputations/test_preprocessing.py
index 30b55bd3..a05fffdb 100644
--- a/tests/imputations/test_preprocessing.py
+++ b/tests/imputations/test_preprocessing.py
@@ -1,15 +1,12 @@
 import numpy as np
 import pandas as pd
 import pytest
-from sklearn.compose import make_column_selector as selector
-
-from sklearn.pipeline import Pipeline
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.metrics import mean_squared_error
-from sklearn.utils.estimator_checks import check_estimator
-from sklearn.utils.validation import check_X_y, check_array
 from sklearn.model_selection import train_test_split
-from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.utils.estimator_checks import check_estimator
+
 from qolmat.imputations.preprocessing import (
     BinTransformer,
     MixteHGBM,
@@ -83,7 +80,9 @@ def test_fit_transform_BinTransformer(bin_transformer):
 
 def test_transform_BinTransformer(bin_transformer):
     bin_transformer.dict_df_bins_ = {
-        0: pd.DataFrame({"value": [1, 2, 3, 4, 5], "min": [-np.inf, 1.5, 2.5, 3.5, 4.5]})
+        0: pd.DataFrame(
+            {"value": [1, 2, 3, 4, 5], "min": [-np.inf, 1.5, 2.5, 3.5, 4.5]}
+        )
     }
     bin_transformer.feature_names_in_ = pd.Index([0])
     bin_transformer.n_features_in_ = 1
@@ -100,7 +99,9 @@ def test_fit_transform_with_dataframes_BinTransformer(bin_transformer):
 
 def test_transform_with_dataframes_BinTransformer(bin_transformer):
     bin_transformer.dict_df_bins_ = {
-        0: pd.DataFrame({"value": [1, 2, 3, 4, 5], "min": [0.5, 1.5, 2.5, 3.5, 4.5]})
+        0: pd.DataFrame(
+            {"value": [1, 2, 3, 4, 5], "min": [0.5, 1.5, 2.5, 3.5, 4.5]}
+        )
     }
     bin_transformer.feature_names_in_ = pd.Index(["0"])
     bin_transformer.n_features_in_ = 1
@@ -126,7 +127,9 @@ def test_inverse_transform_OneHotEncoderProjector(encoder):
     df_back = encoder.inverse_transform(df_dum)
     pd.testing.assert_frame_equal(df, df_back)
 
-    df_dum_perturbated = df_dum + np.random.uniform(-0.5, 0.5, size=df_dum.shape)
+    df_dum_perturbated = df_dum + np.random.uniform(
+        -0.5, 0.5, size=df_dum.shape
+    )
     df_back = encoder.inverse_transform(df_dum_perturbated)
     pd.testing.assert_frame_equal(df, df_back)
 
@@ -137,16 +140,22 @@ def test_inverse_transform_OneHotEncoderProjector(encoder):
 
 
 class DummyTransformer(TransformerMixin, BaseEstimator):
+    """Dummy transformer for testing."""
+
     def fit(self, X, y=None):
+        """Fit function."""
         return self
 
     def transform(self, X):
+        """Transform function."""
         return X
 
     def fit_transform(self, X, y=None):
+        """Fit and transform function."""
         return self.fit(X, y).transform(X)
 
     def inverse_transform(self, X, y=None):
+        """Inverse transform function."""
         return X
 
 
diff --git a/tests/imputations/test_softimpute.py b/tests/imputations/test_softimpute.py
index e8c3dff0..b85025da 100644
--- a/tests/imputations/test_softimpute.py
+++ b/tests/imputations/test_softimpute.py
@@ -1,4 +1,3 @@
-from typing import Any
 import numpy as np
 import pytest
 from numpy.typing import NDArray
@@ -10,16 +9,16 @@
 X_non_regression_test = np.array(
     [[1, 2, np.nan, 4], [1, 5, 3, np.nan], [4, 2, 3, 2], [1, 1, 5, 4]]
 )
-X_expected = np.array([[1, 2, 2.9066, 4], [1, 5, 3, 2.1478], [4, 2, 3, 2], [1, 1, 5, 4]])
+X_expected = np.array(
+    [[1, 2, 2.9066, 4], [1, 5, 3, 2.1478], [4, 2, 3, 2], [1, 1, 5, 4]]
+)
 tau = 1
 max_iterations = 30
 random_state = 50
 
 
 def test_initialized_default() -> None:
-    """Test that initialization does not crash and
-    has default parameters
-    """
+    """Test that initialization does not crash and has default parameters."""
     model = softimpute.SoftImpute()
     assert model.period == 1
     assert model.rank is None
@@ -27,9 +26,7 @@ def test_initialized_default() -> None:
 
 
 def test_initialized_custom() -> None:
-    """Test that initialization does not crash and
-    has custom parameters
-    """
+    """Test that initialization does not crash and has custom parameters."""
     model = softimpute.SoftImpute(period=2, rank=10)
     assert model.period == 2
     assert model.rank == 10
@@ -38,13 +35,17 @@ def test_initialized_custom() -> None:
 
 @pytest.mark.parametrize("X", [X])
 def test_soft_impute_decompose(X: NDArray) -> None:
-    """Test fit instance and decomposition is computed"""
+    """Test fit instance and decomposition is computed."""
     tau = 1
     model = softimpute.SoftImpute(tau=tau)
     Omega = ~np.isnan(X)
     X_imputed = np.where(Omega, X, 0)
-    cost_all_in_M = model.cost_function(X, X_imputed, np.full_like(X, 0), Omega, tau)
-    cost_all_in_A = model.cost_function(X, np.full_like(X, 0), X_imputed, Omega, tau)
+    cost_all_in_M = model.cost_function(
+        X, X_imputed, np.full_like(X, 0), Omega, tau
+    )
+    cost_all_in_A = model.cost_function(
+        X, np.full_like(X, 0), X_imputed, Omega, tau
+    )
     M, A = model.decompose(X, Omega)
     cost_final = model.cost_function(X, M, A, Omega, tau)
     assert isinstance(model, softimpute.SoftImpute)
@@ -56,12 +57,9 @@ def test_soft_impute_decompose(X: NDArray) -> None:
     assert cost_final < cost_all_in_A
 
 
-# tests/imputations/test_imputers.py::test_sklearn_compatible_estimator
-
-
 @pytest.mark.parametrize("X", [X])
 def test_soft_impute_convergence(X: NDArray) -> None:
-    """Test type of the check convergence"""
+    """Test type of the check convergence."""
     model = softimpute.SoftImpute()
     M = model.random_state.uniform(size=(10, 20))
     U, D, V = np.linalg.svd(M, full_matrices=False)
@@ -70,31 +68,14 @@ def test_soft_impute_convergence(X: NDArray) -> None:
 
 
 def test_soft_impute_convergence_with_none() -> None:
-    """Test check type None and raise error"""
+    """Test check type None and raise error."""
     model = softimpute.SoftImpute()
     with pytest.raises(ValueError):
         _ = model._check_convergence(
-            None,
+            np.array([1]),
             np.array([1]),
             np.array([1]),
             np.array([1]),
             np.array([1]),
             np.array([1]),
         )
-
-
-# @pytest.mark.parametrize(
-#     "X, X_expected, tau, max_iterations, random_state",
-#     [(X_non_regression_test, X_expected, tau, max_iterations, random_state)],
-# )
-# def test_soft_impute_non_regression(
-#     X: NDArray, X_expected: NDArray, tau: float, max_iterations: int, random_state: int
-# ) -> None:
-#     """Non regression test"""
-#     model = softimpute.SoftImpute(
-#         tau=tau, max_iterations=max_iterations, random_state=random_state
-#     )
-#     Omega = ~np.isnan(X)
-#     M, A = model.decompose(X, Omega)
-#     X_result = M + A
-#     np.testing.assert_allclose(X_result, X_expected, rtol=1e-3, atol=1e-3)
diff --git a/tests/utils/test_algebra.py b/tests/utils/test_algebra.py
index 45a508c8..ae6a6ae4 100644
--- a/tests/utils/test_algebra.py
+++ b/tests/utils/test_algebra.py
@@ -1,7 +1,7 @@
 import numpy as np
-from sympy import diag
 
 from qolmat.utils import algebra
+from qolmat.utils.algebra import svdtriplet
 
 
 def test_frechet_distance_exact():
@@ -12,7 +12,9 @@ def test_frechet_distance_exact():
     means2 = np.array([0, -1, 1])
     cov2 = np.eye(3, 3)
 
-    expected = np.sum((means2 - means1) ** 2) + np.sum((np.sqrt(stds) - 1) ** 2)
+    expected = np.sum((means2 - means1) ** 2) + np.sum(
+        (np.sqrt(stds) - 1) ** 2
+    )
     expected /= 3
     result = algebra.frechet_distance_exact(means1, cov1, means2, cov2)
     np.testing.assert_almost_equal(result, expected, decimal=3)
@@ -26,6 +28,118 @@ def test_kl_divergence_gaussian_exact():
     means2 = np.array([0, -1, 1])
     cov2 = np.eye(3, 3)
 
-    expected = (np.sum(stds**2 - np.log(stds**2) - 1 + (means2 - means1) ** 2)) / 2
+    expected = (
+        np.sum(stds**2 - np.log(stds**2) - 1 + (means2 - means1) ** 2)
+    ) / 2
     result = algebra.kl_divergence_gaussian_exact(means1, cov1, means2, cov2)
     np.testing.assert_almost_equal(result, expected, decimal=3)
+
+def test_svdtriplet_known_matrix():
+    """Test svdtriplet on a known matrix without weights."""
+    X = np.array([[3, 1], [1, 3]])
+    expected_singular_values = np.array([4, 2])
+    expected_U = np.array([[0.7071, -0.7071],
+                           [0.7071, 0.7071]])
+    expected_V = np.array([[0.7071, 0.7071],
+                           [0.7071, -0.7071]])
+    # Call svdtriplet without weights
+    s, U, V = svdtriplet(X, row_w=None, ncp=2)
+    # Compare singular values
+    np.testing.assert_almost_equal(s, expected_singular_values, decimal=3)
+    np.testing.assert_almost_equal(np.abs(U), np.abs(expected_U), decimal=3)
+    np.testing.assert_almost_equal(np.abs(V), np.abs(expected_V), decimal=3)
+
+def test_svdtriplet_with_row_weights():
+    """Test svdtriplet with row weights."""
+    X = np.array([[1, 2], [3, 4], [5, 6]])
+    row_w = np.array([0.2, 0.5, 0.3])
+    # Manually compute the weighted X
+    X_weighted = X * np.sqrt(row_w)[:, None]
+    U_expected, s_expected, Vt_expected = np.linalg.svd(X_weighted,
+                                                        full_matrices=False)
+    V_expected = Vt_expected.T
+    # Call svdtriplet with weights
+    s, U, V = svdtriplet(X, row_w=row_w, ncp=2)
+    # Rescale U_expected by dividing by sqrt(row_w)
+    U_expected /= np.sqrt(row_w)[:, None]
+    # Compare singular values
+    np.testing.assert_allclose(s, s_expected[:2], atol=1e-6)
+    # Compare U and V (up to sign)
+    np.testing.assert_allclose(np.abs(U), np.abs(U_expected[:, :2]), atol=1e-6)
+    np.testing.assert_allclose(np.abs(V), np.abs(V_expected[:, :2]), atol=1e-6)
+
+def test_svdtriplet_ncp_limit():
+    """Test svdtriplet with ncp less than the full rank."""
+    X = np.random.rand(5, 3)
+    ncp = 2
+    s, U, V = svdtriplet(X, ncp=ncp)
+    # Check the dimensions
+    assert s.shape == (ncp,)
+    assert U.shape == (X.shape[0], ncp)
+    assert V.shape == (X.shape[1], ncp)
+    # Reconstruct X approximation
+    X_approx = U @ np.diag(s) @ V.T
+    # Check that the approximation is close to X
+    # Note: With reduced ncp, approximation won't be exact
+    assert X_approx.shape == X.shape
+    s_full, _, _ = svdtriplet(X)
+    X_full = U @ np.diag(s_full) @ V.T
+    error_ncp = np.linalg.norm(X - X_approx)
+    error_full = np.linalg.norm(X - X_full)
+    assert error_ncp >= error_full
+
+def test_svdtriplet_row_weights_none():
+    """Test svdtriplet with default row weights."""
+    X = np.random.rand(4, 4)
+    s_default, U_default, V_default = svdtriplet(X)
+    # Manually set uniform weights
+    row_w = np.ones(X.shape[0]) / X.shape[0]
+    s_manual, U_manual, V_manual = svdtriplet(X, row_w=row_w)
+    # Compare results
+    np.testing.assert_allclose(s_default, s_manual, atol=1e-6)
+    np.testing.assert_allclose(U_default, U_manual, atol=1e-6)
+    np.testing.assert_allclose(V_default, V_manual, atol=1e-6)
+
+def test_svdtriplet_zero_matrix():
+    """Test svdtriplet on a zero matrix."""
+    X = np.zeros((3, 3))
+    s, U, V = svdtriplet(X)
+    # Singular values should be zero
+    expected_s = np.zeros(3)
+    np.testing.assert_array_equal(s, expected_s)
+    # U and V should be orthogonal matrices
+    np.testing.assert_allclose(U.T @ U, np.eye(3), atol=1e-6)
+    np.testing.assert_allclose(V.T @ V, np.eye(3), atol=1e-6)
+
+def test_svdtriplet_non_square_matrix():
+    """Test svdtriplet on a non-square matrix."""
+    X = np.random.rand(6, 4)
+    s, U, V = svdtriplet(X)
+    # Check dimensions
+    assert U.shape == (6, 4)
+    assert s.shape == (4,)
+    assert V.shape == (4, 4)
+    # Reconstruct X
+    X_reconstructed = U @ np.diag(s) @ V.T
+    np.testing.assert_allclose(X, X_reconstructed, atol=1e-6)
+
+def test_svdtriplet_large_ncp():
+    """Test svdtriplet with ncp larger than possible."""
+    X = np.random.rand(5, 3)
+    ncp = 10  # Larger than min(n_samples - 1, n_features)
+    s, U, V = svdtriplet(X, ncp=ncp)
+    expected_ncp = min(5 - 1, 3)
+    assert s.shape == (expected_ncp,)
+    assert U.shape == (5, expected_ncp)
+    assert V.shape == (3, expected_ncp)
+
+def test_svdtriplet_negative_weights():
+    """Test svdtriplet with negative row weights (should raise an error)."""
+    X = np.random.rand(4, 4)
+    row_w = np.array([0.25, -0.25, 0.5, 0.5])  # Negative weight
+    with pytest.raises(ValueError):
+        s, U, V = svdtriplet(X, row_w=row_w)
+
+
+
+
diff --git a/tests/utils/test_data.py b/tests/utils/test_data.py
index 40ee120a..713ff611 100644
--- a/tests/utils/test_data.py
+++ b/tests/utils/test_data.py
@@ -1,19 +1,40 @@
 import datetime
 import os
+from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pandas as pd
 import pytest
 from pytest_mock.plugin import MockerFixture
-from unittest.mock import MagicMock, patch
+
 from qolmat.utils import data
 
 columns = ["station", "date", "year", "month", "day", "hour", "a", "b", "wd"]
 df_beijing_raw = pd.DataFrame(
     [
         ["Beijing", datetime.datetime(2013, 3, 1), 2013, 3, 1, 0, 1, 2, "NW"],
-        ["Beijing", datetime.datetime(2013, 3, 1), 2014, 3, 1, 0, 3, np.nan, "NW"],
-        ["Beijing", datetime.datetime(2013, 3, 1), 2015, 3, 1, 0, np.nan, 6, "NW"],
+        [
+            "Beijing",
+            datetime.datetime(2013, 3, 1),
+            2014,
+            3,
+            1,
+            0,
+            3,
+            np.nan,
+            "NW",
+        ],
+        [
+            "Beijing",
+            datetime.datetime(2013, 3, 1),
+            2015,
+            3,
+            1,
+            0,
+            np.nan,
+            6,
+            "NW",
+        ],
     ],
     columns=columns,
 )
@@ -71,7 +92,13 @@
         [2.0, 5.0, 4.0, 1.0, 4.0],
         [3.0, 6.0, 3.0, 4.0, 6.0],
     ],
-    columns=["T1 rain", "T2 preasure", "T3 temperature", "T4 humidity", "T5 sun"],
+    columns=[
+        "T1 rain",
+        "T2 preasure",
+        "T3 temperature",
+        "T4 humidity",
+        "T5 sun",
+    ],
     index=pd.date_range(start="2010-01-01", periods=3, freq="1D"),
 )
 
@@ -222,7 +249,9 @@ def test_get_dataframes_in_folder(mock_convert_tsf, mock_read_csv, mock_walk):
     mock_walk.return_value = [("/fakepath", ("subfolder",), ("file.csv",))]
     result_csv = data.get_dataframes_in_folder("/fakepath", ".csv")
     assert len(result_csv) == 1
-    mock_read_csv.assert_called_once_with(os.path.join("/fakepath", "file.csv"))
+    mock_read_csv.assert_called_once_with(
+        os.path.join("/fakepath", "file.csv")
+    )
     pd.testing.assert_frame_equal(result_csv[0], df_conductor)
 
     mock_read_csv.reset_mock()
@@ -230,7 +259,9 @@ def test_get_dataframes_in_folder(mock_convert_tsf, mock_read_csv, mock_walk):
     mock_walk.return_value = [("/fakepath", ("subfolder",), ("file.tsf",))]
     result_tsf = data.get_dataframes_in_folder("/fakepath", ".tsf")
     assert len(result_tsf) == 1
-    mock_convert_tsf.assert_called_once_with(os.path.join("/fakepath", "file.tsf"))
+    mock_convert_tsf.assert_called_once_with(
+        os.path.join("/fakepath", "file.tsf")
+    )
     pd.testing.assert_frame_equal(result_tsf[0], df_beijing)
     mock_read_csv.assert_called()
 
@@ -238,14 +269,18 @@ def test_get_dataframes_in_folder(mock_convert_tsf, mock_read_csv, mock_walk):
 @patch("numpy.random.normal")
 @patch("numpy.random.choice")
 @patch("numpy.random.standard_exponential")
-def test_generate_artificial_ts(mock_standard_exponential, mock_choice, mock_normal):
+def test_generate_artificial_ts(
+    mock_standard_exponential, mock_choice, mock_normal
+):
     n_samples = 100
     periods = [10, 20]
     amp_anomalies = 1.0
     ratio_anomalies = 0.1
     amp_noise = 0.1
 
-    mock_standard_exponential.return_value = np.ones(int(n_samples * ratio_anomalies))
+    mock_standard_exponential.return_value = np.ones(
+        int(n_samples * ratio_anomalies)
+    )
     mock_choice.return_value = np.arange(int(n_samples * ratio_anomalies))
     mock_normal.return_value = np.zeros(n_samples)
 
@@ -274,11 +309,20 @@ def test_generate_artificial_ts(mock_standard_exponential, mock_choice, mock_nor
         ("Bug", None),
     ],
 )
-def test_data_get_data(name_data: str, df: pd.DataFrame, mocker: MockerFixture) -> None:
-    mock_download = mocker.patch("qolmat.utils.data.download_data_from_zip", return_value=[df])
-    mock_read = mocker.patch("qolmat.utils.data.read_csv_local", return_value=df)
+def test_data_get_data(
+    name_data: str, df: pd.DataFrame, mocker: MockerFixture
+) -> None:
+    mock_download = mocker.patch(
+        "qolmat.utils.data.download_data_from_zip", return_value=[df]
+    )
+    mock_read = mocker.patch(
+        "qolmat.utils.data.read_csv_local", return_value=df
+    )
     mock_read_dl = mocker.patch("pandas.read_csv", return_value=df)
-    mocker.patch("qolmat.utils.data.preprocess_data_beijing", return_value=df_preprocess_beijing)
+    mocker.patch(
+        "qolmat.utils.data.preprocess_data_beijing",
+        return_value=df_preprocess_beijing,
+    )
     mocker.patch("pandas.read_parquet", return_value=df_sncf)
 
     try:
@@ -346,7 +390,9 @@ def test_preprocess_data_beijing(df: pd.DataFrame) -> None:
     assert result_df.index.names == ["station", "datetime"]
     assert all(result_df.index.get_level_values("station") == "Beijing")
     assert len(result_df) == 1
-    assert np.isclose(result_df.loc[(("Beijing"),), "pm2.5"], 176.66666666666666)
+    assert np.isclose(
+        result_df.loc[(("Beijing"),), "pm2.5"], 176.66666666666666
+    )
 
 
 @pytest.mark.parametrize("df", [df_preprocess_offline])
@@ -363,7 +409,9 @@ def test_data_add_holes(df: pd.DataFrame) -> None:
         ("Beijing", df_beijing),
     ],
 )
-def test_data_get_data_corrupted(name_data: str, df: pd.DataFrame, mocker: MockerFixture) -> None:
+def test_data_get_data_corrupted(
+    name_data: str, df: pd.DataFrame, mocker: MockerFixture
+) -> None:
     mock_get = mocker.patch("qolmat.utils.data.get_data", return_value=df)
     df_out = data.get_data_corrupted(name_data)
     assert mock_get.call_count == 1
@@ -395,5 +443,7 @@ def test_data_add_datetime_features(df: pd.DataFrame) -> None:
     result = data.add_datetime_features(df)
     pd.testing.assert_index_equal(result.index, df.index)
     assert result.columns.tolist() == columns_out
-    pd.testing.assert_frame_equal(result.drop(columns=["time_cos", "time_sin"]), df)
+    pd.testing.assert_frame_equal(
+        result.drop(columns=["time_cos", "time_sin"]), df
+    )
     assert (result["time_cos"] ** 2 + result["time_sin"] ** 2 == 1).all()
diff --git a/tests/utils/test_exceptions.py b/tests/utils/test_exceptions.py
index e9e10b7a..e0703c7f 100644
--- a/tests/utils/test_exceptions.py
+++ b/tests/utils/test_exceptions.py
@@ -1,4 +1,3 @@
-import pytest
 from qolmat.utils import exceptions
 
 
diff --git a/tests/utils/test_plot.py b/tests/utils/test_plot.py
index 5c45e72e..aadbaf7f 100644
--- a/tests/utils/test_plot.py
+++ b/tests/utils/test_plot.py
@@ -1,13 +1,14 @@
 from typing import Any, List, Tuple
-import matplotlib as mpl
+
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import pytest
 import scipy.sparse
-from qolmat.utils import plot
 from pytest_mock.plugin import MockerFixture
 
+from qolmat.utils import plot
+
 plt.switch_backend("Agg")
 
 np.random.seed(42)
@@ -30,12 +31,16 @@
 df1 = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
 df2 = pd.DataFrame({"x": [2, 3, 4], "y": [5, 6, 7]})
 dict_df_imputed = {
-    "Imputer1": pd.DataFrame({"A": [2, 3, np.nan], "B": [5, np.nan, 7], "C": [np.nan, 8, 9]})
+    "Imputer1": pd.DataFrame(
+        {"A": [2, 3, np.nan], "B": [5, np.nan, 7], "C": [np.nan, 8, 9]}
+    )
 }
 
 
 @pytest.mark.parametrize("list_matrices", [list_matrices])
-def test_utils_plot_plot_matrices(list_matrices: List[np.ndarray], mocker: MockerFixture) -> None:
+def test_utils_plot_plot_matrices(
+    list_matrices: List[np.ndarray], mocker: MockerFixture
+) -> None:
     mocker.patch("matplotlib.pyplot.savefig")
     mocker.patch("matplotlib.pyplot.show")
     plot.plot_matrices(list_matrices=list_matrices, title="title")
@@ -45,7 +50,9 @@ def test_utils_plot_plot_matrices(list_matrices: List[np.ndarray], mocker: Mocke
 
 
 @pytest.mark.parametrize("list_signals", [list_signals])
-def test_utils_plot_plot_signal(list_signals: List[List[Any]], mocker: MockerFixture) -> None:
+def test_utils_plot_plot_signal(
+    list_signals: List[List[Any]], mocker: MockerFixture
+) -> None:
     mocker.patch("matplotlib.pyplot.savefig")
     mocker.patch("matplotlib.pyplot.show")
     plot.plot_signal(list_signals=list_signals, ylabel="ylabel", title="title")
@@ -54,7 +61,9 @@ def test_utils_plot_plot_signal(list_signals: List[List[Any]], mocker: MockerFix
     plt.close("all")
 
 
-@pytest.mark.parametrize("M, A, E, index_array, dims", [(M, A, E, [0, 1, 2], (10, 10))])
+@pytest.mark.parametrize(
+    "M, A, E, index_array, dims", [(M, A, E, [0, 1, 2], (10, 10))]
+)
 def test__utils_plot_plot_images(
     M: np.ndarray,
     A: np.ndarray,
@@ -72,7 +81,9 @@ def test__utils_plot_plot_images(
 
 
 @pytest.mark.parametrize("X", [X])
-def test_utils_plot_make_ellipses_from_data(X: np.ndarray, mocker: MockerFixture):
+def test_utils_plot_make_ellipses_from_data(
+    X: np.ndarray, mocker: MockerFixture
+):
     mocker.patch("matplotlib.pyplot.show")
     ax = plt.gca()
     plot.make_ellipses_from_data(X[1], X[2], ax, color="blue")
@@ -93,7 +104,9 @@ def test_utils_plot_compare_covariances(
 
 @pytest.mark.parametrize("df", [df])
 @pytest.mark.parametrize("orientation", ["horizontal", "vertical"])
-def test_utils_plot_multibar(df: pd.DataFrame, orientation: str, mocker: MockerFixture):
+def test_utils_plot_multibar(
+    df: pd.DataFrame, orientation: str, mocker: MockerFixture
+):
     mocker.patch("matplotlib.pyplot.show")
     plot.multibar(df, orientation=orientation)
     assert len(plt.gcf().get_axes()) > 0
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index 950d2bf0..4f048d10 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -1,20 +1,21 @@
 import sys
+from io import StringIO
+
 import numpy as np
-from numpy.typing import NDArray
 import pandas as pd
 import pytest
-from qolmat.utils import utils
-from pytest_mock.plugin import MockerFixture
-from io import StringIO
-
-from qolmat.utils.exceptions import NotDimension2, SignalTooShort
+from numpy.typing import NDArray
 
+from qolmat.utils import utils
+from qolmat.utils.exceptions import NotDimension2
 
 df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
 
 
 @pytest.mark.parametrize("iteration, total", [(1, 1)])
-def test_utils_utils_display_progress_bar(iteration: int, total: int, capsys) -> None:
+def test_utils_utils_display_progress_bar(
+    iteration: int, total: int, capsys
+) -> None:
     captured_output = StringIO()
     sys.stdout = captured_output
     utils.progress_bar(
@@ -34,7 +35,9 @@ def test_utils_utils_display_progress_bar(iteration: int, total: int, capsys) ->
     assert output == output_expected
 
 
-@pytest.mark.parametrize("values, lag_max", [(pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]), 3)])
+@pytest.mark.parametrize(
+    "values, lag_max", [(pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]), 3)]
+)
 def test_utils_utils_acf(values, lag_max):
     result = utils.acf(values, lag_max)
     result_expected = pd.Series([1.0, 1.0, 1.0])