Skip to content

Commit 9737540

Browse files
committed
feat(benchmarks): support older library versions
1 parent 74eb20b commit 9737540

File tree

6 files changed

+147
-84
lines changed

6 files changed

+147
-84
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ repos:
2424
entry: poetry run ruff format
2525
language: system
2626
types: [python]
27-
files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/).*
27+
files: ^(mpest/|rework_pysatl_mpest/|experimental_env/|tests/|rework_tests/|benchmarks/).*
2828

2929
- id: mypy
3030
name: mypy check

benchmarks/asv.conf.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
"project_url": "https://github.com/PySATL/rework-pysatl-mpest",
55
"repo": "..",
66
"branches": ["refactor/rework-arch"],
7-
"show_commit_url": "https://github.com/PySATL/rework-pysatl-mpest/commits",
7+
"show_commit_url": "https://github.com/PySATL/rework-pysatl-mpest/commits/refactor/rework-arch/",
88
"environment_type": "virtualenv",
9-
"install_command": ["python -m pip install ."],
9+
"install_command": ["python -m pip install -e ."],
1010
"matrix": {
1111
"req": {
1212
"numpy": [],

benchmarks/benchmarks/bench_core.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@
99
import warnings
1010
from copy import copy
1111

12-
from rework_pysatl_mpest.core import MixtureModel
13-
from .common import Benchmark, DTYPES_MAP, GENERATE_SHAPES, SAMPLE_SIZES, get_components
12+
from .common import DTYPES_MAP, GENERATE_SHAPES, SAMPLE_SIZES, Benchmark, LibAdapter, get_components
13+
1414

1515
class MixtureMethods(Benchmark):
1616
"""
1717
Benchmarks for MixtureModel computational methods.
1818
"""
1919

2020
params = (
21-
[10], # n_components
21+
[10], # n_components
2222
SAMPLE_SIZES, # n_samples
23-
list(DTYPES_MAP.keys()) # dtype_name
23+
list(DTYPES_MAP.keys()), # dtype_name
2424
)
2525
param_names = ["n_components", "n_samples", "dtype_name"]
2626

@@ -31,7 +31,7 @@ def setup(self, n_components, n_samples, dtype_name):
3131
dtype = DTYPES_MAP[dtype_name]
3232
components = get_components("Normal", dtype, n_components)
3333

34-
self.mixture = MixtureModel(components=components, dtype=dtype)
34+
self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
3535
# Pre-generate data to avoid measuring generation time
3636
self.X = self.mixture.generate(n_samples)
3737

@@ -63,6 +63,7 @@ class MixtureScalability(Benchmark):
6363
Tests how MixtureModel scales with the number of components (K).
6464
Critical for identifying O(K) vs O(K^2) bottlenecks.
6565
"""
66+
6667
params = (
6768
[2, 5, 20, 100], # n_components
6869
[10000], # n_samples
@@ -73,7 +74,7 @@ def setup(self, n_components, n_samples):
7374
# Create K distinct components
7475
components = get_components("Normal", n_components=n_components)
7576

76-
self.mixture = MixtureModel(components)
77+
self.mixture = LibAdapter.create_mixture(components=components)
7778
self.X = self.mixture.generate(n_samples)
7879

7980
# --- Time Benchmarks ---
@@ -107,7 +108,7 @@ class MixtureGenerate(Benchmark):
107108
params = (
108109
[2, 5, 20, 100], # n_components
109110
list(GENERATE_SHAPES.keys()), # shape_name
110-
list(DTYPES_MAP.keys()) # dtype_name
111+
list(DTYPES_MAP.keys()), # dtype_name
111112
)
112113
param_names = ["n_components", "shape_name", "dtype_name"]
113114

@@ -118,7 +119,7 @@ def setup(self, n_components, shape_name, dtype_name):
118119
dtype = DTYPES_MAP[dtype_name]
119120
components = get_components("Normal", n_components=n_components)
120121

121-
self.mixture = MixtureModel(components=components, dtype=dtype)
122+
self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
122123
self.shape = GENERATE_SHAPES[shape_name]
123124

124125
# --- Time Benchmarks ---
@@ -140,17 +141,20 @@ class MixtureAstype(Benchmark):
140141
params = (
141142
[2, 5, 20, 100], # n_components
142143
list(DTYPES_MAP.keys()), # dtype_name
143-
list(DTYPES_MAP.keys()) # conv_dtype_name
144+
list(DTYPES_MAP.keys()), # conv_dtype_name
144145
)
145146
param_names = ["n_components", "dtype_name", "conv_dtype_name"]
146147

147148
def setup(self, n_components, dtype_name, conv_dtype_name):
148149
dtype = DTYPES_MAP[dtype_name]
149150
components = get_components("Normal", dtype, n_components)
150151

151-
self.mixture = MixtureModel(components=components, dtype=dtype)
152+
self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
152153
self.conv_dtype = DTYPES_MAP[conv_dtype_name]
153154

155+
if not callable(getattr(self.mixture, "astype", None)):
156+
raise NotImplementedError("Old version MixtureModel does not support .astype")
157+
154158
# --- Time Benchmarks ---
155159

156160
def time_astype(self, n_components, dtype_name, conv_dtype_name):
@@ -161,6 +165,7 @@ def time_astype(self, n_components, dtype_name, conv_dtype_name):
161165
def peakmem_astype(self, n_components, dtype_name, conv_dtype_name):
162166
self.mixture.astype(self.conv_dtype)
163167

168+
164169
class MixtureCopy(Benchmark):
165170
"""
166171
Benchmarks for object copying overhead.
@@ -169,14 +174,14 @@ class MixtureCopy(Benchmark):
169174

170175
params = (
171176
[2, 5, 20, 100], # n_components
172-
list(DTYPES_MAP.keys()) # dtype_name
177+
list(DTYPES_MAP.keys()), # dtype_name
173178
)
174179
param_names = ["n_components", "dtype_name"]
175180

176181
def setup(self, n_components, dtype_name):
177182
dtype = DTYPES_MAP[dtype_name]
178183
components = get_components("Normal", dtype, n_components)
179-
self.mixture = MixtureModel(components=components, dtype=dtype)
184+
self.mixture = LibAdapter.create_mixture(components=components, dtype=dtype)
180185

181186
# --- Time Benchmarks ---
182187

benchmarks/benchmarks/bench_distributions.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
import warnings
1010
from copy import copy
1111

12-
from .common import Benchmark, DISTRIBUTIONS, DTYPES_MAP, GENERATE_SHAPES, SAMPLE_SIZES, get_components, RNG_GENERATOR
13-
12+
from .common import DISTRIBUTIONS, DTYPES_MAP, GENERATE_SHAPES, RNG_GENERATOR, SAMPLE_SIZES, Benchmark, get_components
1413

1514

1615
class DistributionMethods(Benchmark):
@@ -22,7 +21,7 @@ class DistributionMethods(Benchmark):
2221
params = (
2322
DISTRIBUTIONS, # dist_name
2423
SAMPLE_SIZES, # n_samples
25-
list(DTYPES_MAP.keys()) # dtype_name
24+
list(DTYPES_MAP.keys()), # dtype_name
2625
)
2726
param_names = ["dist_name", "n_samples", "dtype_name"]
2827

@@ -71,7 +70,7 @@ class DistributionGenerate(Benchmark):
7170
params = (
7271
DISTRIBUTIONS, # dist_name
7372
list(GENERATE_SHAPES.keys()), # shape_name
74-
list(DTYPES_MAP.keys()) # dtype_name
73+
list(DTYPES_MAP.keys()), # dtype_name
7574
)
7675
param_names = ["dist_name", "shape_name", "dtype_name"]
7776

@@ -102,7 +101,7 @@ class DistributionAstype(Benchmark):
102101
params = (
103102
DISTRIBUTIONS, # dist_name
104103
list(DTYPES_MAP.keys()), # dtype_name
105-
list(DTYPES_MAP.keys()) # conv_dtype_name
104+
list(DTYPES_MAP.keys()), # conv_dtype_name
106105
)
107106
param_names = ["dist_name", "dtype_name", "conv_dtype_name"]
108107

@@ -111,6 +110,9 @@ def setup(self, dist_name, dtype_name, conv_dtype_name):
111110
self.dist = get_components(dist_name, dtype, 1)[0]
112111
self.conv_dtype = DTYPES_MAP[conv_dtype_name]
113112

113+
if not callable(getattr(self.dist, "astype", None)):
114+
raise NotImplementedError(f"Old version {dist_name} does not support .astype")
115+
114116
# --- Time Benchmarks ---
115117

116118
def time_astype(self, dist_name, dtype_name, conv_dtype_name):
@@ -130,7 +132,7 @@ class DistributionCopy(Benchmark):
130132

131133
params = (
132134
DISTRIBUTIONS, # dist_name
133-
list(DTYPES_MAP.keys()) # dtype_name
135+
list(DTYPES_MAP.keys()), # dtype_name
134136
)
135137
param_names = ["dist_name", "dtype_name"]
136138

benchmarks/benchmarks/bench_estimators.py

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,18 @@
1010
from copy import copy
1111

1212
import numpy as np
13-
14-
from rework_pysatl_mpest.core import MixtureModel
1513
from rework_pysatl_mpest.estimators import ECM
16-
from rework_pysatl_mpest.estimators.iterative import ExpectationStep, PipelineState, OptimizationBlock, \
17-
MaximizationStrategy, MaximizationStep
14+
from rework_pysatl_mpest.estimators.iterative import (
15+
ExpectationStep,
16+
MaximizationStep,
17+
MaximizationStrategy,
18+
OptimizationBlock,
19+
PipelineState,
20+
)
1821
from rework_pysatl_mpest.estimators.iterative.breakpointers import StepBreakpointer
1922
from rework_pysatl_mpest.optimizers import ScipyNelderMead
20-
from .common import Benchmark, DTYPES_MAP, SAMPLE_SIZES, get_components, DISTRIBUTIONS, RNG_GENERATOR
23+
24+
from .common import DISTRIBUTIONS, DTYPES_MAP, RNG_GENERATOR, SAMPLE_SIZES, Benchmark, LibAdapter, get_components
2125

2226

2327
class StepOverhead(Benchmark):
@@ -34,8 +38,8 @@ class StepOverhead(Benchmark):
3438
DISTRIBUTIONS, # dist_name
3539
[2], # n_components
3640
SAMPLE_SIZES, # n_samples
37-
list(DTYPES_MAP.keys()), # dtype_name
38-
[True, False] # is_soft
41+
list(DTYPES_MAP.keys()), # dtype_name
42+
[True, False], # is_soft
3943
)
4044
param_names = ["dist_name", "n_components", "n_samples", "dtype_name", "is_soft"]
4145

@@ -52,17 +56,15 @@ def setup(self, dist_name, n_components, n_samples, dtype_name, is_soft):
5256
comp.fix_param("shape")
5357
comp.fix_param("loc")
5458

55-
self.mix_analytical = MixtureModel(self.comps_analytical, dtype=dtype)
59+
self.mix_analytical = LibAdapter.create_mixture(components=self.comps_analytical, dtype=dtype)
5660
self.X_analytical = self.mix_analytical.generate(n_samples)
5761

5862
# --- Pipeline Components ---
5963
self.e_step = ExpectationStep(is_soft=is_soft)
6064

6165
# Setup States
6266
# 1. State ready for E-step
63-
self.state_analytical_for_E = PipelineState(
64-
self.X_analytical, None, None, copy(self.mix_analytical), None
65-
)
67+
self.state_analytical_for_E = PipelineState(self.X_analytical, None, None, copy(self.mix_analytical), None)
6668

6769
# 2. State ready for M-step (Pre-calculate H)
6870
self.state_analytical_for_M = self.e_step.run(
@@ -103,11 +105,11 @@ class ECMAnalyticalCleanWithStepBreakpointer(Benchmark):
103105
"""
104106

105107
params = (
106-
["Normal", "Exponential", "Pareto", "Weibull"], # dist_name
107-
[2], # n_components
108-
[5], # max_steps
109-
SAMPLE_SIZES, # n_samples
110-
list(DTYPES_MAP.keys()) # dtype_name
108+
["Normal", "Exponential", "Pareto", "Weibull"], # dist_name
109+
[2], # n_components
110+
[5], # max_steps
111+
SAMPLE_SIZES, # n_samples
112+
list(DTYPES_MAP.keys()), # dtype_name
111113
)
112114
param_names = ["dist_name", "n_components", "max_steps", "n_samples", "dtype_name"]
113115

@@ -120,24 +122,22 @@ def setup(self, dist_name, n_components, max_steps, n_samples, dtype_name):
120122

121123
dtype = DTYPES_MAP[dtype_name]
122124
true_comps = get_components(dist_name, dtype, n_components)
123-
self.X = MixtureModel(true_comps).generate(n_samples)
125+
self.X = LibAdapter.create_mixture(components=true_comps).generate(n_samples)
124126

125127
start_comps = copy(true_comps)
126128
for comp in start_comps:
127-
new_params = np.asarray(comp.get_params_vector(comp.params), dtype=dtype) + np.ones(len(comp.params), dtype=dtype)
129+
new_params = np.asarray(comp.get_params_vector(comp.params), dtype=dtype) + np.ones(
130+
len(comp.params), dtype=dtype
131+
)
128132
comp.set_params_from_vector(comp.params, new_params)
129133
if dist_name == "Weibull":
130134
comp.fix_param("shape")
131135
comp.fix_param("loc")
132136

133-
self.start_mixture = MixtureModel(start_comps, dtype=dtype)
137+
self.start_mixture = LibAdapter.create_mixture(components=start_comps, dtype=dtype)
134138

135139
# Configure ECM to run for a fixed small number of steps to measure throughput
136-
self.ecm = ECM(
137-
breakpointers=[StepBreakpointer(max_steps=max_steps)],
138-
pruners=[],
139-
optimizer=ScipyNelderMead()
140-
)
140+
self.ecm = ECM(breakpointers=[StepBreakpointer(max_steps=max_steps)], pruners=[], optimizer=ScipyNelderMead())
141141

142142
def time_fit(self, dist_name, n_components, max_steps, n_samples, dtype_name):
143143
self.ecm.fit(self.X, self.start_mixture)
@@ -152,10 +152,10 @@ class ECMAnalyticalOverflow(Benchmark):
152152
"""
153153

154154
params = (
155-
["Normal", "Exponential", "Pareto", "Weibull"], # dist_name
155+
["Normal", "Exponential", "Pareto", "Weibull"], # dist_name
156156
[2], # n_components
157-
SAMPLE_SIZES, # n_samples
158-
["float16"] # dtype_name
157+
SAMPLE_SIZES, # n_samples
158+
["float16"], # dtype_name
159159
)
160160
param_names = ["dist_name", "n_components", "n_samples", "dtype_name"]
161161
timeout = 300.0
@@ -177,14 +177,10 @@ def setup(self, dist_name, n_components, n_samples, dtype_name):
177177
comp.fix_param("shape")
178178
comp.fix_param("loc")
179179

180-
self.start_mix = MixtureModel(start_comps, dtype=dtype)
180+
self.start_mix = LibAdapter.create_mixture(components=start_comps, dtype=dtype)
181181

182182
# Run only 1 step to trigger the error immediately and measure recovery overhead
183-
self.ecm = ECM(
184-
breakpointers=[StepBreakpointer(max_steps=1)],
185-
pruners=[],
186-
optimizer=ScipyNelderMead()
187-
)
183+
self.ecm = ECM(breakpointers=[StepBreakpointer(max_steps=1)], pruners=[], optimizer=ScipyNelderMead())
188184

189185
def time_fit_restart(self, dist_name, n_components, n_samples, dtype_name):
190186
with warnings.catch_warnings():

0 commit comments

Comments
 (0)