Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ coverage.xml
.pytest_cache/
cover/

# Benchmarks
.asv/

# Translations
*.mo
*.pot
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ repos:
entry: poetry run ruff format
language: system
types: [python]
files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/).*
files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/|benchmarks/).*

- id: mypy
name: mypy check
Expand Down
68 changes: 68 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# PySATL-MPEST benchmarks

This directory contains the performance test suite for **PySATL-MPEST**, built using [Airspeed Velocity (asv)](https://asv.readthedocs.io/).

## Usage

Since this project uses Poetry, ensure dependencies are installed (including `asv`):

```
poetry install
```

### 1. Configure Machine
Before running benchmarks for the first time, register your machine's information.

```
# Make sure you are inside the poetry shell or use 'poetry run'
poetry run asv machine
```

### 2. Quick Run (Development Mode)
If you are writing code and want to quickly test the performance of your current changes without creating a separate virtual environment or committing code:

Note: This mode uses your current local environment. By default, results from `--quick` runs are not saved.
```
# Run benchmarks only once
poetry run asv run --python=same --quick

# Run all benchmarks
poetry run asv run --python=same

# Run only distributions benchmarks
poetry run asv run --python=same --bench bench_distributions

# Run only the DistributionMethods class inside bench_distributions
poetry run asv run --python=same --bench DistributionMethods
```

### 3. Regression Testing (Compare Branches)
To compare the performance of your current branch (`HEAD`) against the `main` branch to see if your changes made things faster or slower:
Results are saved for comparison.

```
# Compare HEAD (current state) vs refactor/rework-arch branch
poetry run asv continuous refactor/rework-arch HEAD
```

### 4. Full History Run
To run benchmarks across the history of commits (configured in asv.conf.json). This creates virtual environments for each commit to ensure accuracy.
```
# Run all commits (might take a long time!)
poetry run asv run

# Run only recent changes (e.g., last 10 commits)
poetry run asv run HEAD~10..HEAD
```

### 5. Results

For greater control, a graphical view, and to have results saved for future comparison you can run ASV commands (record results and generate HTML):
```
# 1. Generate HTML report
poetry run asv publish

# 2. Serve the report locally
poetry run asv preview
```
This will open the results in your default browser at `http://127.0.0.1:8080`.
21 changes: 21 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"version": 1,
"project": "rework_pysatl_mpest",
"project_url": "https://github.com/PySATL/rework-pysatl-mpest",
"repo": "..",
"branches": ["refactor/rework-arch"],
"show_commit_url": "https://github.com/PySATL/rework-pysatl-mpest/commit/",
"environment_type": "virtualenv",
"install_command": ["python -m pip install -e ."],
"matrix": {
"req": {
"numpy": [],
"scipy": [],
"scikit-learn": []
}
},
"benchmark_dir": "benchmarks",
"env_dir": ".asv/env",
"results_dir": ".asv/results",
"html_dir": ".asv/html"
}
13 changes: 13 additions & 0 deletions benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Benchmark suite for pysatl-mpest.
"""

__author__ = "Aleksandra Ri"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"


# TODO: Remove this after resolving runtime warnings
import warnings

warnings.simplefilter("ignore")
203 changes: 203 additions & 0 deletions benchmarks/benchmarks/bench_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
"""
Benchmarks for the core MixtureModel class.
"""

__author__ = "Aleksandra Ri"
__copyright__ = "Copyright (c) 2025 PySATL project"
__license__ = "SPDX-License-Identifier: MIT"

from copy import copy

from .common import (
DTYPES_MAP,
GENERATE_SHAPES,
SAMPLE_SIZES,
Benchmark,
LibAdapter,
get_components,
measure_peak_memory,
)


class MixtureMethods(Benchmark):
"""
Benchmarks for MixtureModel computational methods.
"""

params = (
[10], # n_components
SAMPLE_SIZES, # n_samples
list(DTYPES_MAP.keys()), # dtype_name
)
param_names = ["n_components", "n_samples", "dtype_name"]

def setup(self, n_components, n_samples, dtype_name):
dtype = DTYPES_MAP[dtype_name]
components = get_components("Normal", dtype, n_components)

self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
# Pre-generate data to avoid measuring generation time
self.X = self.mixture.generate(n_samples)

# --- Time Benchmarks ---

def time_pdf(self, n_components, n_samples, dtype_name):
self.mixture.pdf(self.X)

def time_lpdf(self, n_components, n_samples, dtype_name):
self.mixture.lpdf(self.X)

def time_loglikelihood(self, n_components, n_samples, dtype_name):
self.mixture.loglikelihood(self.X)

# --- Memory Benchmarks (Peak) ---

@measure_peak_memory
def track_peakmem_pdf(self, n_components, n_samples, dtype_name):
self.mixture.pdf(self.X)

@measure_peak_memory
def track_peakmem_lpdf(self, n_components, n_samples, dtype_name):
self.mixture.lpdf(self.X)

@measure_peak_memory
def track_peakmem_loglikelihood(self, n_components, n_samples, dtype_name):
self.mixture.loglikelihood(self.X)


class MixtureScalability(Benchmark):
"""
Tests how MixtureModel scales with the number of components (K).
Critical for identifying O(K) vs O(K^2) bottlenecks.
"""

params = (
[2, 5, 20, 100], # n_components
[10000], # n_samples
)
param_names = ["n_components", "n_samples"]

def setup(self, n_components, n_samples):
# Create K distinct components
components = get_components("Normal", n_components=n_components)

self.mixture = LibAdapter.create_mixture(components=components)
self.X = self.mixture.generate(n_samples)

# --- Time Benchmarks ---

def time_pdf_scaling(self, n_components, n_samples):
self.mixture.pdf(self.X)

def time_lpdf_scaling(self, n_components, n_samples):
self.mixture.lpdf(self.X)

def time_loglikelihood_scaling(self, n_components, n_samples):
self.mixture.loglikelihood(self.X)

# --- Memory Benchmarks (Peak) ---

@measure_peak_memory
def track_peakmem_pdf_scaling(self, n_components, n_samples):
self.mixture.pdf(self.X)

@measure_peak_memory
def track_peakmem_lpdf_scaling(self, n_components, n_samples):
self.mixture.lpdf(self.X)

@measure_peak_memory
def track_peakmem_loglikelihood_scaling(self, n_components, n_samples):
self.mixture.loglikelihood(self.X)


class MixtureGenerate(Benchmark):
"""
Benchmarks for MixtureModel sampling.
"""

params = (
[2, 5, 20, 100], # n_components
list(GENERATE_SHAPES.keys()), # shape_name
list(DTYPES_MAP.keys()), # dtype_name
)
param_names = ["n_components", "shape_name", "dtype_name"]

def setup(self, n_components, shape_name, dtype_name):
dtype = DTYPES_MAP[dtype_name]
components = get_components("Normal", n_components=n_components)

self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
self.shape = GENERATE_SHAPES[shape_name]

# --- Time Benchmarks ---

def time_generate(self, n_components, shape_name, dtype_name):
self.mixture.generate(self.shape)

# --- Memory Benchmarks (Peak) ---

@measure_peak_memory
def track_peakmem_generate(self, n_components, shape_name, dtype_name):
self.mixture.generate(self.shape)


class MixtureAstype(Benchmark):
"""
Benchmarks for casting MixtureModels.
"""

params = (
[2, 5, 20, 100], # n_components
list(DTYPES_MAP.keys()), # dtype_name
list(DTYPES_MAP.keys()), # conv_dtype_name
)
param_names = ["n_components", "dtype_name", "conv_dtype_name"]

def setup(self, n_components, dtype_name, conv_dtype_name):
dtype = DTYPES_MAP[dtype_name]
components = get_components("Normal", dtype, n_components)

self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
self.conv_dtype = DTYPES_MAP[conv_dtype_name]

if not callable(getattr(self.mixture, "astype", None)):
raise NotImplementedError("Version does not support .astype")

# --- Time Benchmarks ---

def time_astype(self, n_components, dtype_name, conv_dtype_name):
self.mixture.astype(self.conv_dtype)

# --- Memory Benchmarks (Peak) ---

@measure_peak_memory
def track_peakmem_astype(self, n_components, dtype_name, conv_dtype_name):
self.mixture.astype(self.conv_dtype)


class MixtureCopy(Benchmark):
"""
Benchmarks for object copying overhead.
High frequency usage in Pipeline state saving.
"""

params = (
[2, 5, 20, 100], # n_components
list(DTYPES_MAP.keys()), # dtype_name
)
param_names = ["n_components", "dtype_name"]

def setup(self, n_components, dtype_name):
dtype = DTYPES_MAP[dtype_name]
components = get_components("Normal", dtype, n_components)
self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)

# --- Time Benchmarks ---

def time_copy(self, n_components, dtype_name):
copy(self.mixture)

# --- Memory Benchmarks (Peak) ---
@measure_peak_memory
def track_peakmem_copy(self, n_components, dtype_name):
copy(self.mixture)
Loading