PySATL · xImoZA · Dec 11, 2025 · Dec 12, 2025 · Dec 14, 2025 · Dec 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -52,6 +52,9 @@ coverage.xml
 .pytest_cache/
 cover/
 
+# Benchmarks
+.asv/
+
 # Translations
 *.mo
 *.pot

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
         entry: poetry run ruff format
         language: system
         types: [python]
-        files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/).*
+        files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/|benchmarks/).*
 
     -   id: mypy
         name: mypy check

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,68 @@
+# PySATL-MPEST benchmarks
+
+This directory contains the performance test suite for **PySATL-MPEST**, built using [Airspeed Velocity (asv)](https://asv.readthedocs.io/).
+
+## Usage
+
+Since this project uses Poetry, ensure dependencies are installed (including `asv`):
+
+```
+poetry install
+```
+
+### 1. Configure Machine
+Before running benchmarks for the first time, register your machine's information.
+
+```
+# Make sure you are inside the poetry shell or use 'poetry run'
+poetry run asv machine
+```
+
+### 2. Quick Run (Development Mode)
+If you are writing code and want to quickly test the performance of your current changes without creating a separate virtual environment or committing code:
+
+Note: This mode uses your current local environment. By default, results from `--quick` runs are not saved.
+```
+# Run benchmarks only once
+poetry run asv run --python=same --quick
+
+# Run all benchmarks
+poetry run asv run --python=same
+
+# Run only distributions benchmarks
+poetry run asv run --python=same --bench bench_distributions
+
+# Run only the DistributionMethods class inside bench_distributions
+poetry run asv run --python=same --bench DistributionMethods
+```
+
+### 3. Regression Testing (Compare Branches)
+To compare the performance of your current branch (`HEAD`) against the `main` branch to see if your changes made things faster or slower:
+Results are saved for comparison.
+
+```
+# Compare HEAD (current state) vs refactor/rework-arch branch
+poetry run asv continuous refactor/rework-arch HEAD
+```
+
+### 4. Full History Run
+To run benchmarks across the history of commits (configured in asv.conf.json). This creates virtual environments for each commit to ensure accuracy.
+```
+# Run all commits (might take a long time!)
+poetry run asv run
+
+# Run only recent changes (e.g., last 10 commits)
+poetry run asv run HEAD~10..HEAD
+```
+
+### 5. Results
+
+For greater control, a graphical view, and to have results saved for future comparison you can run ASV commands (record results and generate HTML):
+```
+# 1. Generate HTML report
+poetry run asv publish
+
+# 2. Serve the report locally
+poetry run asv preview
+```
+This will open the results in your default browser at `http://127.0.0.1:8080`.
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -0,0 +1,21 @@
+{
+    "version": 1,
+    "project": "rework_pysatl_mpest",
+    "project_url": "https://github.com/PySATL/rework-pysatl-mpest",
+    "repo": "..",
+    "branches": ["refactor/rework-arch"],
+    "show_commit_url": "https://github.com/PySATL/rework-pysatl-mpest/commit/",
+    "environment_type": "virtualenv",
+    "install_command": ["python -m pip install -e ."],
+    "matrix": {
+        "req": {
+            "numpy": [],
+            "scipy": [],
+            "scikit-learn": []
+        }
+    },
+    "benchmark_dir": "benchmarks",
+    "env_dir": ".asv/env",
+    "results_dir": ".asv/results",
+    "html_dir": ".asv/html"
+}
diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
@@ -0,0 +1,13 @@
+"""
+Benchmark suite for pysatl-mpest.
+"""
+
+__author__ = "Aleksandra Ri"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+
+# TODO: Remove this after resolving runtime warnings
+import warnings
+
+warnings.simplefilter("ignore")
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
@@ -0,0 +1,203 @@
+"""
+Benchmarks for the core MixtureModel class.
+"""
+
+__author__ = "Aleksandra Ri"
+__copyright__ = "Copyright (c) 2025 PySATL project"
+__license__ = "SPDX-License-Identifier: MIT"
+
+from copy import copy
+
+from .common import (
+    DTYPES_MAP,
+    GENERATE_SHAPES,
+    SAMPLE_SIZES,
+    Benchmark,
+    LibAdapter,
+    get_components,
+    measure_peak_memory,
+)
+
+
+class MixtureMethods(Benchmark):
+    """
+    Benchmarks for MixtureModel computational methods.
+    """
+
+    params = (
+        [10],  # n_components
+        SAMPLE_SIZES,  # n_samples
+        list(DTYPES_MAP.keys()),  # dtype_name
+    )
+    param_names = ["n_components", "n_samples", "dtype_name"]
+
+    def setup(self, n_components, n_samples, dtype_name):
+        dtype = DTYPES_MAP[dtype_name]
+        components = get_components("Normal", dtype, n_components)
+
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
+        # Pre-generate data to avoid measuring generation time
+        self.X = self.mixture.generate(n_samples)
+
+    # --- Time Benchmarks ---
+
+    def time_pdf(self, n_components, n_samples, dtype_name):
+        self.mixture.pdf(self.X)
+
+    def time_lpdf(self, n_components, n_samples, dtype_name):
+        self.mixture.lpdf(self.X)
+
+    def time_loglikelihood(self, n_components, n_samples, dtype_name):
+        self.mixture.loglikelihood(self.X)
+
+    # --- Memory Benchmarks (Peak) ---
+
+    @measure_peak_memory
+    def track_peakmem_pdf(self, n_components, n_samples, dtype_name):
+        self.mixture.pdf(self.X)
+
+    @measure_peak_memory
+    def track_peakmem_lpdf(self, n_components, n_samples, dtype_name):
+        self.mixture.lpdf(self.X)
+
+    @measure_peak_memory
+    def track_peakmem_loglikelihood(self, n_components, n_samples, dtype_name):
+        self.mixture.loglikelihood(self.X)
+
+
+class MixtureScalability(Benchmark):
+    """
+    Tests how MixtureModel scales with the number of components (K).
+    Critical for identifying O(K) vs O(K^2) bottlenecks.
+    """
+
+    params = (
+        [2, 5, 20, 100],  # n_components
+        [10000],  # n_samples
+    )
+    param_names = ["n_components", "n_samples"]
+
+    def setup(self, n_components, n_samples):
+        # Create K distinct components
+        components = get_components("Normal", n_components=n_components)
+
+        self.mixture = LibAdapter.create_mixture(components=components)
+        self.X = self.mixture.generate(n_samples)
+
+    # --- Time Benchmarks ---
+
+    def time_pdf_scaling(self, n_components, n_samples):
+        self.mixture.pdf(self.X)
+
+    def time_lpdf_scaling(self, n_components, n_samples):
+        self.mixture.lpdf(self.X)
+
+    def time_loglikelihood_scaling(self, n_components, n_samples):
+        self.mixture.loglikelihood(self.X)
+
+    # --- Memory Benchmarks (Peak) ---
+
+    @measure_peak_memory
+    def track_peakmem_pdf_scaling(self, n_components, n_samples):
+        self.mixture.pdf(self.X)
+
+    @measure_peak_memory
+    def track_peakmem_lpdf_scaling(self, n_components, n_samples):
+        self.mixture.lpdf(self.X)
+
+    @measure_peak_memory
+    def track_peakmem_loglikelihood_scaling(self, n_components, n_samples):
+        self.mixture.loglikelihood(self.X)
+
+
+class MixtureGenerate(Benchmark):
+    """
+    Benchmarks for MixtureModel sampling.
+    """
+
+    params = (
+        [2, 5, 20, 100],  # n_components
+        list(GENERATE_SHAPES.keys()),  # shape_name
+        list(DTYPES_MAP.keys()),  # dtype_name
+    )
+    param_names = ["n_components", "shape_name", "dtype_name"]
+
+    def setup(self, n_components, shape_name, dtype_name):
+        dtype = DTYPES_MAP[dtype_name]
+        components = get_components("Normal", n_components=n_components)
+
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
+        self.shape = GENERATE_SHAPES[shape_name]
+
+    # --- Time Benchmarks ---
+
+    def time_generate(self, n_components, shape_name, dtype_name):
+        self.mixture.generate(self.shape)
+
+    # --- Memory Benchmarks (Peak) ---
+
+    @measure_peak_memory
+    def track_peakmem_generate(self, n_components, shape_name, dtype_name):
+        self.mixture.generate(self.shape)
+
+
+class MixtureAstype(Benchmark):
+    """
+    Benchmarks for casting MixtureModels.
+    """
+
+    params = (
+        [2, 5, 20, 100],  # n_components
+        list(DTYPES_MAP.keys()),  # dtype_name
+        list(DTYPES_MAP.keys()),  # conv_dtype_name
+    )
+    param_names = ["n_components", "dtype_name", "conv_dtype_name"]
+
+    def setup(self, n_components, dtype_name, conv_dtype_name):
+        dtype = DTYPES_MAP[dtype_name]
+        components = get_components("Normal", dtype, n_components)
+
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
+        self.conv_dtype = DTYPES_MAP[conv_dtype_name]
+
+        if not callable(getattr(self.mixture, "astype", None)):
+            raise NotImplementedError("Version does not support .astype")
+
+    # --- Time Benchmarks ---
+
+    def time_astype(self, n_components, dtype_name, conv_dtype_name):
+        self.mixture.astype(self.conv_dtype)
+
+    # --- Memory Benchmarks (Peak) ---
+
+    @measure_peak_memory
+    def track_peakmem_astype(self, n_components, dtype_name, conv_dtype_name):
+        self.mixture.astype(self.conv_dtype)
+
+
+class MixtureCopy(Benchmark):
+    """
+    Benchmarks for object copying overhead.
+    High frequency usage in Pipeline state saving.
+    """
+
+    params = (
+        [2, 5, 20, 100],  # n_components
+        list(DTYPES_MAP.keys()),  # dtype_name
+    )
+    param_names = ["n_components", "dtype_name"]
+
+    def setup(self, n_components, dtype_name):
+        dtype = DTYPES_MAP[dtype_name]
+        components = get_components("Normal", dtype, n_components)
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
+
+    # --- Time Benchmarks ---
+
+    def time_copy(self, n_components, dtype_name):
+        copy(self.mixture)
+
+    # --- Memory Benchmarks (Peak) ---
+    @measure_peak_memory
+    def track_peakmem_copy(self, n_components, dtype_name):
+        copy(self.mixture)
-Original file line number
+Diff line change
@@ Expand Up / @@ -52,6 +52,9 @@ coverage.xml @@
     .pytest_cache/
     cover/
+    # Benchmarks
+    .asv/
     # Translations
     *.mo
     *.pot
@@ Expand Down @@