18
18
from sklearn .metrics import pairwise_distances
19
19
from sklearn .utils .validation import check_random_state , validate_data
20
20
21
- from radius_clustering .utils ._emos import py_emos_main
22
- from radius_clustering .utils ._mds_approx import solve_mds
21
+ from .algorithms import clustering_approx , clustering_exact
23
22
24
23
DIR_PATH = os .path .dirname (os .path .realpath (__file__ ))
25
24
@@ -53,20 +52,23 @@ class RadiusClustering(ClusterMixin, BaseEstimator):
53
52
54
53
.. note::
55
54
The `random_state_` attribute is not used when the `manner` is set to "exact".
55
+
56
+ .. versionchanged:: 2.0.0
57
+ The `RadiusClustering` class has been refactored.
58
+ Clustering algorithms are now separated into their own module
59
+ (`algorithms.py`) to improve maintainability and extensibility.
56
60
57
61
.. versionadded:: 1.3.0
58
- The *random_state* parameter was added to allow reproducibility in
59
- the approximate method.
62
+
63
+ - The *random_state* parameter was added to allow reproducibility in the approximate method.
64
+
65
+ - The `radius` parameter replaces the `threshold` parameter for setting the dissimilarity threshold for better clarity and consistency.
60
66
61
67
.. versionchanged:: 1.3.0
62
68
All publicly accessible attributes are now suffixed with an underscore
63
69
(e.g., `centers_`, `labels_`).
64
70
This is particularly useful for compatibility with scikit-learn's API.
65
71
66
- .. versionadded:: 1.3.0
67
- The `radius` parameter replaces the `threshold` parameter for setting
68
- the dissimilarity threshold for better clarity and consistency.
69
-
70
72
.. deprecated:: 1.3.0
71
73
The `threshold` parameter is deprecated. Use `radius` instead.
72
74
Will be removed in a future version.
@@ -243,7 +245,7 @@ def fit_predict(self, X: np.ndarray, y: None = None, metric: str | callable = "e
243
245
labels : array, shape (n_samples,)
244
246
The cluster labels for each point in X.
245
247
"""
246
- self .fit (X )
248
+ self .fit (X , metric = metric )
247
249
return self .labels_
248
250
249
251
def _clustering (self ):
@@ -252,75 +254,15 @@ def _clustering(self):
252
254
"""
253
255
n = self .X_checked_ .shape [0 ]
254
256
if self .manner != "exact" and self .manner != "approx" :
255
- print (f"Invalid manner: { self .manner } . Defaulting to 'approx'." )
256
257
raise ValueError ("Invalid manner. Choose either 'exact' or 'approx'." )
257
258
if self .manner == "exact" :
258
- self ._clustering_exact ( n )
259
+ self .centers_ , self . mds_exec_time_ = clustering_exact ( n , self . edges_ , self . nb_edges_ )
259
260
else :
260
- self ._clustering_approx (n )
261
-
262
- def _clustering_exact (self , n : int ) -> None :
263
- """
264
- Perform exact MDS clustering.
265
-
266
- Parameters:
267
- -----------
268
- n : int
269
- The number of points in the dataset.
270
-
271
- Notes:
272
- ------
273
- This function uses the EMOS algorithm to solve the MDS problem.
274
- See: [jiang]_ for more details.
275
- """
276
- self .centers_ , self .mds_exec_time_ = py_emos_main (
277
- self .edges_ .flatten (), n , self .nb_edges_
278
- )
279
- self .centers_ .sort () # Sort the centers to ensure consistent order
280
-
281
- def _clustering_approx (self , n : int ) -> None :
282
- """
283
- Perform approximate MDS clustering.
284
- This method uses a pretty trick to set the seed for
285
- the random state of the C++ code of the MDS solver.
286
-
287
- .. tip::
288
- The random state is used to ensure reproducibility of the results
289
- when using the approximate method.
290
- If `random_state` is None, a default value of 42 is used.
291
-
292
- .. important::
293
- :collapsible: closed
294
- The trick to set the random state is :
295
- 1. Use the `check_random_state` function to get a `RandomState`singleton
296
- instance, set up with the provided `random_state`.
297
- 2. Use the `randint` method of the `RandomState` instance to generate a
298
- random integer.
299
- 3. Use this random integer as the seed for the C++ code of the MDS solver.
300
-
301
- This ensures that the seed passed to the C++ code is always an integer,
302
- which is required by the MDS solver, and allows for
303
- reproducibility of the results.
304
-
305
- Parameters:
306
- -----------
307
- n : int
308
- The number of points in the dataset.
309
-
310
- Notes:
311
- ------
312
- This function uses the approximation method to solve the MDS problem.
313
- See [casado]_ for more details.
314
- """
315
- if self .random_state is None :
316
- self .random_state = 42
317
- self .random_state_ = check_random_state (self .random_state )
318
- seed = self .random_state_ .randint (np .iinfo (np .int32 ).max )
319
- result = solve_mds (
320
- n , self .edges_ .flatten ().astype (np .int32 ), self .nb_edges_ , seed
321
- )
322
- self .centers_ = sorted ([x for x in result ["solution_set" ]])
323
- self .mds_exec_time_ = result ["Time" ]
261
+ if self .random_state is None :
262
+ self .random_state = 42
263
+ self .random_state_ = check_random_state (self .random_state )
264
+ seed = self .random_state_ .randint (np .iinfo (np .int32 ).max )
265
+ self .centers_ , self .mds_exec_time_ = clustering_approx (n , self .edges_ , self .nb_edges_ , seed )
324
266
325
267
def _compute_effective_radius (self ):
326
268
"""
0 commit comments