Skip to content

Commit 37c4460

Browse files
committed
linting + changelog
1 parent 6e33df4 commit 37c4460

File tree

5 files changed

+102
-38
lines changed

5 files changed

+102
-38
lines changed

.coverage

0 Bytes
Binary file not shown.

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Changelog
2+
3+
## [1.2.3] - 2025-06-18
4+
5+
### Added
6+
7+
- Full test coverage for the entire codebase.
8+
- Badge for test coverage in the README.
9+
- Added `radius` parameter to the `RadiusClustering` class, allowing users to specify the radius for clustering.
10+
11+
### Deprecated
12+
13+
- Deprecated the `threshold` parameter in the `RadiusClustering` class. Use `radius` instead.
14+
15+
### Changed
16+
17+
- Updated all the attributes in the `RadiusClustering` class to fit `scikit-learn` standards and conventions.
18+
- Updated the tests cases to reflect the changes in the `RadiusClustering` class.
19+
- Updated README and documentation to reflect the new `radius` parameter and the deprecation of `threshold`.

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,14 +120,14 @@ exclude = [
120120

121121
# Same as Black.
122122
line-length = 88
123-
indent-width = 4
123+
target-version = "py310"
124124

125125
[tool.ruff.lint]
126126
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
127127
# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
128128
# McCabe complexity (`C901`) by default.
129-
select = ["E", "F"]
130-
ignore = []
129+
select = ["E", "F", "W", "I"]
130+
ignore = ["E203", "E731", "E741"]
131131

132132
# Allow fix for all enabled rules (when `--fix`) is provided.
133133
fixable = ["ALL"]

setup.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import platform
2-
from setuptools import setup, Extension
3-
from Cython.Build import cythonize
2+
43
import numpy as np
4+
from Cython.Build import cythonize
5+
from setuptools import Extension, setup
56

67
SYSTEM = platform.system()
78
CPU = platform.processor()
@@ -21,7 +22,10 @@
2122
extensions = [
2223
Extension(
2324
"radius_clustering.utils._emos",
24-
["src/radius_clustering/utils/emos.pyx", "src/radius_clustering/utils/main-emos.c"],
25+
[
26+
"src/radius_clustering/utils/emos.pyx",
27+
"src/radius_clustering/utils/main-emos.c"
28+
],
2529
include_dirs=[np.get_include(), "src/radius_clustering/utils"],
2630
extra_compile_args=C_COMPILE_ARGS,
2731
),

src/radius_clustering/radius_clustering.py

Lines changed: 73 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
"""
1010

1111
import os
12+
import warnings
13+
1214
import numpy as np
13-
from sklearn.metrics import pairwise_distances
1415
from sklearn.base import BaseEstimator, ClusterMixin
15-
from sklearn.utils.validation import check_array, validate_data, check_random_state
16+
from sklearn.metrics import pairwise_distances
17+
from sklearn.utils.validation import check_random_state, validate_data
1618

1719
from radius_clustering.utils._emos import py_emos_main
1820
from radius_clustering.utils._mds_approx import solve_mds
@@ -21,7 +23,7 @@
2123

2224

2325
class RadiusClustering(ClusterMixin, BaseEstimator):
24-
"""
26+
r"""
2527
Radius Clustering algorithm.
2628
2729
This class implements clustering based on the Minimum Dominating Set (MDS) problem.
@@ -46,29 +48,52 @@ class RadiusClustering(ClusterMixin, BaseEstimator):
4648
The maximum distance between any point and its assigned cluster center.
4749
random_state\_ : int | None
4850
The random state used for reproducibility. If None, no random state is set.
49-
51+
5052
.. note::
5153
The `random_state_` attribute is not used when the `manner` is set to "exact".
52-
54+
5355
.. versionadded:: 1.3.0
54-
The *random_state* parameter was added to allow reproducibility in the approximate method.
56+
The *random_state* parameter was added to allow reproducibility in
57+
the approximate method.
5558
5659
.. versionchanged:: 1.3.0
57-
All publicly accessible attributes are now suffixed with an underscore (e.g., `centers_`, `labels_`).
60+
All publicly accessible attributes are now suffixed with an underscore
61+
(e.g., `centers_`, `labels_`).
5862
This is particularly useful for compatibility with scikit-learn's API.
59-
60-
.. versionchanged:: 1.3.0
61-
The `threshold` parameter was renamed to `radius` to better reflect its purpose.
63+
64+
.. versionadded:: 1.3.0
65+
The `radius` parameter replaces the `threshold` parameter for setting
66+
the dissimilarity threshold for better clarity and consistency.
67+
68+
.. deprecated:: 1.3.0
69+
The `threshold` parameter is deprecated. Use `radius` instead.
70+
Will be removed in a future version.
6271
"""
6372

6473
_estimator_type = "clusterer"
6574

66-
def __init__(self, manner: str ="approx", radius: float =0.5, random_state: int | None = None) -> None:
75+
def __init__(
76+
self,
77+
manner: str = "approx",
78+
radius: float = 0.5,
79+
threshold=None,
80+
random_state: int | None = None,
81+
) -> None:
82+
if threshold is not None:
83+
warnings.warn(
84+
"The 'threshold' parameter is deprecated and"
85+
" will be removed in a future version."
86+
"Please use 'radius' instead.",
87+
DeprecationWarning,
88+
stacklevel=2,
89+
)
90+
radius = threshold
91+
self.threshold = threshold # For backward compatibility
6792
self.manner = manner
6893
self.radius = radius
6994
self.random_state = random_state
7095

71-
def _check_symmetric(self, a: np.ndarray, tol: float =1e-8) -> bool:
96+
def _check_symmetric(self, a: np.ndarray, tol: float = 1e-8) -> bool:
7297
if a.ndim != 2:
7398
raise ValueError("Input must be a 2D array.")
7499
if a.shape[0] != a.shape[1]:
@@ -80,21 +105,26 @@ def fit(self, X: np.ndarray, y: None = None) -> "RadiusClustering":
80105
Fit the MDS clustering model to the input data.
81106
82107
This method computes the distance matrix if the input is a feature matrix,
83-
or uses the provided distance matrix directly if the input is already a distance matrix.
108+
or uses the provided distance matrix directly if the input is already
109+
a distance matrix.
84110
85111
.. note::
86112
If the input is a distance matrix, it should be symmetric and square.
87-
If the input is a feature matrix, the distance matrix will be computed using Euclidean distance.
88-
113+
If the input is a feature matrix, the distance matrix
114+
will be computed using Euclidean distance.
115+
89116
.. tip::
90-
Next version will support providing different metrics or even custom callables to compute the distance matrix.
117+
Next version will support providing different metrics or
118+
even custom callables to compute the distance matrix.
91119
92120
Parameters:
93121
-----------
94122
X : array-like, shape (n_samples, n_features)
95-
The input data to cluster. X should be a 2D array-like structure. It can either be :
123+
The input data to cluster. X should be a 2D array-like structure.
124+
It can either be :
96125
- A distance matrix (symmetric, square) with shape (n_samples, n_samples).
97-
- A feature matrix with shape (n_samples, n_features) where the distance matrix will be computed.
126+
- A feature matrix with shape (n_samples, n_features)
127+
where the distance matrix will be computed.
98128
y : Ignored
99129
Not used, present here for API consistency by convention.
100130
@@ -128,7 +158,7 @@ def fit(self, X: np.ndarray, y: None = None) -> "RadiusClustering":
128158
dist_mat = pairwise_distances(self.X_checked_, metric="euclidean")
129159
else:
130160
dist_mat = self.X_checked_
131-
161+
132162
if not isinstance(self.radius, (float, int)):
133163
raise ValueError("Radius must be a positive float.")
134164
if self.radius <= 0:
@@ -141,7 +171,9 @@ def fit(self, X: np.ndarray, y: None = None) -> "RadiusClustering":
141171
self.effective_radius_ = 0
142172
self.mds_exec_time_ = 0
143173
return self
144-
self.edges_ = np.argwhere(adj_mask).astype(np.uint32) # Edges in the adjacency matrix
174+
self.edges_ = np.argwhere(adj_mask).astype(
175+
np.uint32
176+
) # Edges in the adjacency matrix
145177
# uint32 is used to use less memory. Max number of features is 2^32-1
146178
self.dist_mat_ = dist_mat
147179

@@ -160,9 +192,11 @@ def fit_predict(self, X: np.ndarray, y: None = None) -> np.ndarray:
160192
Parameters:
161193
-----------
162194
X : array-like, shape (n_samples, n_features)
163-
The input data to cluster. X should be a 2D array-like structure. It can either be :
195+
The input data to cluster. X should be a 2D array-like structure.
196+
It can either be :
164197
- A distance matrix (symmetric, square) with shape (n_samples, n_samples).
165-
- A feature matrix with shape (n_samples, n_features) where the distance matrix will be computed.
198+
- A feature matrix with shape (n_samples, n_features) where
199+
the distance matrix will be computed.
166200
y : Ignored
167201
Not used, present here for API consistency by convention.
168202
@@ -181,9 +215,7 @@ def _clustering(self):
181215
n = self.X_checked_.shape[0]
182216
if self.manner != "exact" and self.manner != "approx":
183217
print(f"Invalid manner: {self.manner}. Defaulting to 'approx'.")
184-
raise ValueError(
185-
"Invalid manner. Choose either 'exact' or 'approx'."
186-
)
218+
raise ValueError("Invalid manner. Choose either 'exact' or 'approx'.")
187219
if self.manner == "exact":
188220
self._clustering_exact(n)
189221
else:
@@ -210,20 +242,27 @@ def _clustering_exact(self, n: int) -> None:
210242

211243
def _clustering_approx(self, n: int) -> None:
212244
"""
213-
Perform approximate MDS clustering. This method uses a pretty trick to set the seed for the random state of the C++ code of the MDS solver.
245+
Perform approximate MDS clustering.
246+
This method uses a pretty trick to set the seed for
247+
the random state of the C++ code of the MDS solver.
214248
215249
.. tip::
216-
The random state is used to ensure reproducibility of the results when using the approximate method.
250+
The random state is used to ensure reproducibility of the results
251+
when using the approximate method.
217252
If `random_state` is None, a default value of 42 is used.
218-
253+
219254
.. important::
220255
:collapsible: closed
221256
The trick to set the random state is :
222-
1. Use the `check_random_state` function to get a `RandomState`singleton instance, set up with the provided `random_state`.
223-
2. Use the `randint` method of the `RandomState` instance to generate a random integer.
257+
1. Use the `check_random_state` function to get a `RandomState`singleton
258+
instance, set up with the provided `random_state`.
259+
2. Use the `randint` method of the `RandomState` instance to generate a
260+
random integer.
224261
3. Use this random integer as the seed for the C++ code of the MDS solver.
225262
226-
This ensures that the seed passed to the C++ code is always an integer, which is required by the MDS solver, and allows for reproducibility of the results.
263+
This ensures that the seed passed to the C++ code is always an integer,
264+
which is required by the MDS solver, and allows for
265+
reproducibility of the results.
227266
228267
Parameters:
229268
-----------
@@ -239,7 +278,9 @@ def _clustering_approx(self, n: int) -> None:
239278
self.random_state = 42
240279
self.random_state_ = check_random_state(self.random_state)
241280
seed = self.random_state_.randint(np.iinfo(np.int32).max)
242-
result = solve_mds(n, self.edges_.flatten().astype(np.int32), self.nb_edges_, seed)
281+
result = solve_mds(
282+
n, self.edges_.flatten().astype(np.int32), self.nb_edges_, seed
283+
)
243284
self.centers_ = sorted([x for x in result["solution_set"]])
244285
self.mds_exec_time_ = result["Time"]
245286

0 commit comments

Comments
 (0)