feat(benchmarks): support older library versions

xImoZA · xImoZA · commit 97375404cb83 · 2025-12-24T03:59:35.000+03:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
         entry: poetry run ruff format
         language: system
         types: [python]
-        files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/).*
+        files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/|benchmarks/).*
 
     -   id: mypy
         name: mypy check
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
@@ -4,9 +4,9 @@
     "project_url": "https://github.com/PySATL/rework-pysatl-mpest",
     "repo": "..",
     "branches": ["refactor/rework-arch"],
-    "show_commit_url": "https://github.com/PySATL/rework-pysatl-mpest/commits",
+    "show_commit_url": "https://github.com/PySATL/rework-pysatl-mpest/commits/refactor/rework-arch/",
     "environment_type": "virtualenv",
-    "install_command": ["python -m pip install ."],
+    "install_command": ["python -m pip install -e ."],
     "matrix": {
         "req": {
             "numpy": [],
diff --git a/benchmarks/benchmarks/bench_core.py b/benchmarks/benchmarks/bench_core.py
@@ -9,18 +9,18 @@
 import warnings
 from copy import copy
 
-from rework_pysatl_mpest.core import MixtureModel
-from .common import Benchmark, DTYPES_MAP, GENERATE_SHAPES, SAMPLE_SIZES, get_components
+from .common import DTYPES_MAP, GENERATE_SHAPES, SAMPLE_SIZES, Benchmark, LibAdapter, get_components
+
 
 class MixtureMethods(Benchmark):
     """
     Benchmarks for MixtureModel computational methods.
     """
 
     params = (
-        [10], # n_components
+        [10],  # n_components
         SAMPLE_SIZES,  # n_samples
-        list(DTYPES_MAP.keys())  # dtype_name
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["n_components", "n_samples", "dtype_name"]
 
@@ -31,7 +31,7 @@ def setup(self, n_components, n_samples, dtype_name):
         dtype = DTYPES_MAP[dtype_name]
         components = get_components("Normal", dtype, n_components)
 
-        self.mixture = MixtureModel(components=components, dtype=dtype)
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
         # Pre-generate data to avoid measuring generation time
         self.X = self.mixture.generate(n_samples)
 
@@ -63,6 +63,7 @@ class MixtureScalability(Benchmark):
     Tests how MixtureModel scales with the number of components (K).
     Critical for identifying O(K) vs O(K^2) bottlenecks.
     """
+
     params = (
         [2, 5, 20, 100],  # n_components
         [10000],  # n_samples
@@ -73,7 +74,7 @@ def setup(self, n_components, n_samples):
         # Create K distinct components
         components = get_components("Normal", n_components=n_components)
 
-        self.mixture = MixtureModel(components)
+        self.mixture = LibAdapter.create_mixture(components=components)
         self.X = self.mixture.generate(n_samples)
 
     # --- Time Benchmarks ---
@@ -107,7 +108,7 @@ class MixtureGenerate(Benchmark):
     params = (
         [2, 5, 20, 100],  # n_components
         list(GENERATE_SHAPES.keys()),  # shape_name
-        list(DTYPES_MAP.keys())  # dtype_name
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["n_components", "shape_name", "dtype_name"]
 
@@ -118,7 +119,7 @@ def setup(self, n_components, shape_name, dtype_name):
         dtype = DTYPES_MAP[dtype_name]
         components = get_components("Normal", n_components=n_components)
 
-        self.mixture = MixtureModel(components=components, dtype=dtype)
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
         self.shape = GENERATE_SHAPES[shape_name]
 
     # --- Time Benchmarks ---
@@ -140,17 +141,20 @@ class MixtureAstype(Benchmark):
     params = (
         [2, 5, 20, 100],  # n_components
         list(DTYPES_MAP.keys()),  # dtype_name
-        list(DTYPES_MAP.keys())  # conv_dtype_name
+        list(DTYPES_MAP.keys()),  # conv_dtype_name
     )
     param_names = ["n_components", "dtype_name", "conv_dtype_name"]
 
     def setup(self, n_components, dtype_name, conv_dtype_name):
         dtype = DTYPES_MAP[dtype_name]
         components = get_components("Normal", dtype, n_components)
 
-        self.mixture = MixtureModel(components=components, dtype=dtype)
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
         self.conv_dtype = DTYPES_MAP[conv_dtype_name]
 
+        if not callable(getattr(self.mixture, "astype", None)):
+            raise NotImplementedError("Old version MixtureModel does not support .astype")
+
     # --- Time Benchmarks ---
 
     def time_astype(self, n_components, dtype_name, conv_dtype_name):
@@ -161,6 +165,7 @@ def time_astype(self, n_components, dtype_name, conv_dtype_name):
     def peakmem_astype(self, n_components, dtype_name, conv_dtype_name):
         self.mixture.astype(self.conv_dtype)
 
+
 class MixtureCopy(Benchmark):
     """
     Benchmarks for object copying overhead.
@@ -169,14 +174,14 @@ class MixtureCopy(Benchmark):
 
     params = (
         [2, 5, 20, 100],  # n_components
-        list(DTYPES_MAP.keys())  # dtype_name
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["n_components", "dtype_name"]
 
     def setup(self, n_components, dtype_name):
         dtype = DTYPES_MAP[dtype_name]
         components = get_components("Normal", dtype, n_components)
-        self.mixture = MixtureModel(components=components, dtype=dtype)
+        self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
 
     # --- Time Benchmarks ---
 
diff --git a/benchmarks/benchmarks/bench_distributions.py b/benchmarks/benchmarks/bench_distributions.py
@@ -9,8 +9,7 @@
 import warnings
 from copy import copy
 
-from .common import Benchmark, DISTRIBUTIONS, DTYPES_MAP, GENERATE_SHAPES, SAMPLE_SIZES, get_components, RNG_GENERATOR
-
+from .common import DISTRIBUTIONS, DTYPES_MAP, GENERATE_SHAPES, RNG_GENERATOR, SAMPLE_SIZES, Benchmark, get_components
 
 
 class DistributionMethods(Benchmark):
@@ -22,7 +21,7 @@ class DistributionMethods(Benchmark):
     params = (
         DISTRIBUTIONS,  # dist_name
         SAMPLE_SIZES,  # n_samples
-        list(DTYPES_MAP.keys())  # dtype_name
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["dist_name", "n_samples", "dtype_name"]
 
@@ -71,7 +70,7 @@ class DistributionGenerate(Benchmark):
     params = (
         DISTRIBUTIONS,  # dist_name
         list(GENERATE_SHAPES.keys()),  # shape_name
-        list(DTYPES_MAP.keys())  # dtype_name
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["dist_name", "shape_name", "dtype_name"]
 
@@ -102,7 +101,7 @@ class DistributionAstype(Benchmark):
     params = (
         DISTRIBUTIONS,  # dist_name
         list(DTYPES_MAP.keys()),  # dtype_name
-        list(DTYPES_MAP.keys())  # conv_dtype_name
+        list(DTYPES_MAP.keys()),  # conv_dtype_name
     )
     param_names = ["dist_name", "dtype_name", "conv_dtype_name"]
 
@@ -111,6 +110,9 @@ def setup(self, dist_name, dtype_name, conv_dtype_name):
         self.dist = get_components(dist_name, dtype, 1)[0]
         self.conv_dtype = DTYPES_MAP[conv_dtype_name]
 
+        if not callable(getattr(self.dist, "astype", None)):
+            raise NotImplementedError(f"Old version {dist_name} does not support .astype")
+
     # --- Time Benchmarks ---
 
     def time_astype(self, dist_name, dtype_name, conv_dtype_name):
@@ -130,7 +132,7 @@ class DistributionCopy(Benchmark):
 
     params = (
         DISTRIBUTIONS,  # dist_name
-        list(DTYPES_MAP.keys())  # dtype_name
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["dist_name", "dtype_name"]
 
diff --git a/benchmarks/benchmarks/bench_estimators.py b/benchmarks/benchmarks/bench_estimators.py
@@ -10,14 +10,18 @@
 from copy import copy
 
 import numpy as np
-
-from rework_pysatl_mpest.core import MixtureModel
 from rework_pysatl_mpest.estimators import ECM
-from rework_pysatl_mpest.estimators.iterative import ExpectationStep, PipelineState, OptimizationBlock, \
-    MaximizationStrategy, MaximizationStep
+from rework_pysatl_mpest.estimators.iterative import (
+    ExpectationStep,
+    MaximizationStep,
+    MaximizationStrategy,
+    OptimizationBlock,
+    PipelineState,
+)
 from rework_pysatl_mpest.estimators.iterative.breakpointers import StepBreakpointer
 from rework_pysatl_mpest.optimizers import ScipyNelderMead
-from .common import Benchmark, DTYPES_MAP, SAMPLE_SIZES, get_components, DISTRIBUTIONS, RNG_GENERATOR
+
+from .common import DISTRIBUTIONS, DTYPES_MAP, RNG_GENERATOR, SAMPLE_SIZES, Benchmark, LibAdapter, get_components
 
 
 class StepOverhead(Benchmark):
@@ -34,8 +38,8 @@ class StepOverhead(Benchmark):
         DISTRIBUTIONS,  # dist_name
         [2],  # n_components
         SAMPLE_SIZES,  # n_samples
-        list(DTYPES_MAP.keys()), # dtype_name
-        [True, False]  # is_soft
+        list(DTYPES_MAP.keys()),  # dtype_name
+        [True, False],  # is_soft
     )
     param_names = ["dist_name", "n_components", "n_samples", "dtype_name", "is_soft"]
 
@@ -52,17 +56,15 @@ def setup(self, dist_name, n_components, n_samples, dtype_name, is_soft):
                 comp.fix_param("shape")
                 comp.fix_param("loc")
 
-        self.mix_analytical = MixtureModel(self.comps_analytical, dtype=dtype)
+        self.mix_analytical = LibAdapter.create_mixture(components=self.comps_analytical, dtype=dtype)
         self.X_analytical = self.mix_analytical.generate(n_samples)
 
         # --- Pipeline Components ---
         self.e_step = ExpectationStep(is_soft=is_soft)
 
         # Setup States
         # 1. State ready for E-step
-        self.state_analytical_for_E = PipelineState(
-            self.X_analytical, None, None, copy(self.mix_analytical), None
-        )
+        self.state_analytical_for_E = PipelineState(self.X_analytical, None, None, copy(self.mix_analytical), None)
 
         # 2. State ready for M-step (Pre-calculate H)
         self.state_analytical_for_M = self.e_step.run(
@@ -103,11 +105,11 @@ class ECMAnalyticalCleanWithStepBreakpointer(Benchmark):
     """
 
     params = (
-        ["Normal", "Exponential", "Pareto", "Weibull"], # dist_name
-        [2], # n_components
-        [5], # max_steps
-        SAMPLE_SIZES, # n_samples
-        list(DTYPES_MAP.keys()) # dtype_name
+        ["Normal", "Exponential", "Pareto", "Weibull"],  # dist_name
+        [2],  # n_components
+        [5],  # max_steps
+        SAMPLE_SIZES,  # n_samples
+        list(DTYPES_MAP.keys()),  # dtype_name
     )
     param_names = ["dist_name", "n_components", "max_steps", "n_samples", "dtype_name"]
 
@@ -120,24 +122,22 @@ def setup(self, dist_name, n_components, max_steps, n_samples, dtype_name):
 
         dtype = DTYPES_MAP[dtype_name]
         true_comps = get_components(dist_name, dtype, n_components)
-        self.X = MixtureModel(true_comps).generate(n_samples)
+        self.X = LibAdapter.create_mixture(components=true_comps).generate(n_samples)
 
         start_comps = copy(true_comps)
         for comp in start_comps:
-            new_params = np.asarray(comp.get_params_vector(comp.params), dtype=dtype) + np.ones(len(comp.params), dtype=dtype)
+            new_params = np.asarray(comp.get_params_vector(comp.params), dtype=dtype) + np.ones(
+                len(comp.params), dtype=dtype
+            )
             comp.set_params_from_vector(comp.params, new_params)
             if dist_name == "Weibull":
                 comp.fix_param("shape")
                 comp.fix_param("loc")
 
-        self.start_mixture = MixtureModel(start_comps, dtype=dtype)
+        self.start_mixture = LibAdapter.create_mixture(components=start_comps, dtype=dtype)
 
         # Configure ECM to run for a fixed small number of steps to measure throughput
-        self.ecm = ECM(
-            breakpointers=[StepBreakpointer(max_steps=max_steps)],
-            pruners=[],
-            optimizer=ScipyNelderMead()
-        )
+        self.ecm = ECM(breakpointers=[StepBreakpointer(max_steps=max_steps)], pruners=[], optimizer=ScipyNelderMead())
 
     def time_fit(self, dist_name, n_components, max_steps, n_samples, dtype_name):
         self.ecm.fit(self.X, self.start_mixture)
@@ -152,10 +152,10 @@ class ECMAnalyticalOverflow(Benchmark):
     """
 
     params = (
-        ["Normal", "Exponential", "Pareto", "Weibull"], # dist_name
+        ["Normal", "Exponential", "Pareto", "Weibull"],  # dist_name
         [2],  # n_components
-        SAMPLE_SIZES, # n_samples
-        ["float16"] # dtype_name
+        SAMPLE_SIZES,  # n_samples
+        ["float16"],  # dtype_name
     )
     param_names = ["dist_name", "n_components", "n_samples", "dtype_name"]
     timeout = 300.0
@@ -177,14 +177,10 @@ def setup(self, dist_name, n_components, n_samples, dtype_name):
                 comp.fix_param("shape")
                 comp.fix_param("loc")
 
-        self.start_mix = MixtureModel(start_comps, dtype=dtype)
+        self.start_mix = LibAdapter.create_mixture(components=start_comps, dtype=dtype)
 
         # Run only 1 step to trigger the error immediately and measure recovery overhead
-        self.ecm = ECM(
-            breakpointers=[StepBreakpointer(max_steps=1)],
-            pruners=[],
-            optimizer=ScipyNelderMead()
-        )
+        self.ecm = ECM(breakpointers=[StepBreakpointer(max_steps=1)], pruners=[], optimizer=ScipyNelderMead())
 
     def time_fit_restart(self, dist_name, n_components, n_samples, dtype_name):
         with warnings.catch_warnings():
diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py