Skip to content

Commit 6e33df4

Browse files
committed
tests
1 parent 2cc363d commit 6e33df4

File tree

10 files changed

+351
-52
lines changed

10 files changed

+351
-52
lines changed

.coverage

52 KB
Binary file not shown.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,8 @@ docs/source/modules/generated/
2525
**/emos.c
2626
**/mds.cpp
2727

28+
# MAC OS files
29+
.DS_Store
30+
2831
# Reportings
2932
reporting/

pyproject.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ documentation = "https://lias-laboratory.github.io/radius_clustering/"
5252
[project.optional-dependencies]
5353
dev = [
5454
"pytest>=8.3.3",
55+
"pytest-cov>=5.0.0",
5556
"pandas",
5657
"cython>=3.0",
5758
"setuptools>= 61.0",
@@ -80,8 +81,22 @@ pythonpath = "src"
8081
testpaths = ["tests"]
8182
addopts = [
8283
"--import-mode=importlib",
84+
"--cov=src/radius_clustering",
85+
"--cov-report=term-missing",
86+
"--cov-report=html:coverage_html_report",
8387
]
8488

89+
[tool.coverage.run]
90+
source = ["src/radius_clustering"]
91+
branch = true
92+
93+
[tool.coverage.report]
94+
show_missing = true
95+
96+
[tool.coverage.html]
97+
directory = "coverage_html_report"
98+
title = "Coverage Report"
99+
85100
[tool.ruff]
86101
# Exclude a variety of commonly ignored directories.
87102
exclude = [

src/radius_clustering/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,4 @@
22
from .radius_clustering import RadiusClustering
33

44
__all__ = ["RadiusClustering"]
5-
6-
# Optionally, you can set a version number for your package
7-
__version__ = "1.2.2"
5+
__version__ = "1.2.3"

src/radius_clustering/radius_clustering.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class RadiusClustering(ClusterMixin, BaseEstimator):
3131
-----------
3232
manner : str, optional (default="approx")
3333
The method to use for solving the MDS problem. Can be "exact" or "approx".
34-
threshold : float, optional (default=0.5)
34+
radius : float, optional (default=0.5)
3535
The dissimilarity threshold to act as radius constraint for the clustering.
3636
3737
Attributes:
@@ -56,13 +56,16 @@ class RadiusClustering(ClusterMixin, BaseEstimator):
5656
.. versionchanged:: 1.3.0
5757
All publicly accessible attributes are now suffixed with an underscore (e.g., `centers_`, `labels_`).
5858
This is particularly useful for compatibility with scikit-learn's API.
59+
60+
.. versionchanged:: 1.3.0
61+
The `threshold` parameter was renamed to `radius` to better reflect its purpose.
5962
"""
6063

6164
_estimator_type = "clusterer"
6265

63-
def __init__(self, manner: str ="approx", threshold: float =0.5, random_state: int | None = None) -> None:
66+
def __init__(self, manner: str ="approx", radius: float =0.5, random_state: int | None = None) -> None:
6467
self.manner = manner
65-
self.threshold = threshold
68+
self.radius = radius
6669
self.random_state = random_state
6770

6871
def _check_symmetric(self, a: np.ndarray, tol: float =1e-8) -> bool:
@@ -125,7 +128,12 @@ def fit(self, X: np.ndarray, y: None = None) -> "RadiusClustering":
125128
dist_mat = pairwise_distances(self.X_checked_, metric="euclidean")
126129
else:
127130
dist_mat = self.X_checked_
128-
adj_mask = np.triu((dist_mat <= self.threshold), k=1)
131+
132+
if not isinstance(self.radius, (float, int)):
133+
raise ValueError("Radius must be a positive float.")
134+
if self.radius <= 0:
135+
raise ValueError("Radius must be a positive float.")
136+
adj_mask = np.triu((dist_mat <= self.radius), k=1)
129137
self.nb_edges_ = np.sum(adj_mask)
130138
if self.nb_edges_ == 0:
131139
self.centers_ = list(range(self.X_checked_.shape[0]))
@@ -171,6 +179,11 @@ def _clustering(self):
171179
Perform the clustering using either the exact or approximate MDS method.
172180
"""
173181
n = self.X_checked_.shape[0]
182+
if self.manner != "exact" and self.manner != "approx":
183+
print(f"Invalid manner: {self.manner}. Defaulting to 'approx'.")
184+
raise ValueError(
185+
"Invalid manner. Choose either 'exact' or 'approx'."
186+
)
174187
if self.manner == "exact":
175188
self._clustering_exact(n)
176189
else:
@@ -193,6 +206,7 @@ def _clustering_exact(self, n: int) -> None:
193206
self.centers_, self.mds_exec_time_ = py_emos_main(
194207
self.edges_.flatten(), n, self.nb_edges_
195208
)
209+
self.centers_.sort() # Sort the centers to ensure consistent order
196210

197211
def _clustering_approx(self, n: int) -> None:
198212
"""
@@ -226,7 +240,7 @@ def _clustering_approx(self, n: int) -> None:
226240
self.random_state_ = check_random_state(self.random_state)
227241
seed = self.random_state_.randint(np.iinfo(np.int32).max)
228242
result = solve_mds(n, self.edges_.flatten().astype(np.int32), self.nb_edges_, seed)
229-
self.centers_ = [x for x in result["solution_set"]]
243+
self.centers_ = sorted([x for x in result["solution_set"]])
230244
self.mds_exec_time_ = result["Time"]
231245

232246
def _compute_effective_radius(self):
@@ -246,4 +260,4 @@ def _compute_labels(self):
246260
self.labels_ = np.argmin(distances, axis=1)
247261

248262
min_dist = np.min(distances, axis=1)
249-
self.labels_[min_dist > self.threshold] = -1
263+
self.labels_[min_dist > self.radius] = -1

tests/test_integration.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import pytest
2+
3+
from radius_clustering import RadiusClustering
4+
from sklearn import datasets
5+
6+
X = datasets.fetch_openml(name="iris", version=1, parser="auto")["data"]
7+
8+
def test_radius_clustering_approx():
9+
"""
10+
Test the approximate method of the RadiusClustering class.
11+
"""
12+
clusterer = RadiusClustering(manner="approx", radius=1.43)
13+
14+
assert clusterer.manner == "approx", "The manner should be 'approx'."
15+
assert clusterer.radius == 1.43, "The radius should be 1.43."
16+
assert clusterer.random_state is None, "The random state should be None by default."
17+
assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
18+
assert clusterer._check_symmetric(X) is False, "The input should not be a symmetric distance matrix."
19+
20+
clusterer.fit(X)
21+
22+
assert clusterer.X_checked_ is not None, "X_checked_ should not be None after fitting."
23+
assert clusterer.dist_mat_ is not None, "dist_mat_ should not be None after fitting."
24+
assert clusterer.nb_edges_ > 0, "There should be edges in the graph."
25+
assert clusterer.labels_ is not None, "Labels should not be None after fitting."
26+
assert clusterer.centers_ is not None, "Centers should not be None after fitting."
27+
assert clusterer.effective_radius_ > 0, "Effective radius should be greater than 0."
28+
assert clusterer.mds_exec_time_ >= 0, "MDS execution time should be non-negative."
29+
assert clusterer.edges_ is not None, "Edges should not be None after fitting."
30+
assert clusterer.random_state == 42, "Random state should be set to 42 after fitting."
31+
32+
results = clusterer.labels_
33+
assert len(results) == X.shape[0], "The number of labels should match the number of samples."
34+
assert len(set(results)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
35+
36+
37+
def test_radius_clustering_exact():
38+
"""
39+
Test the exact method of the RadiusClustering class.
40+
"""
41+
clusterer = RadiusClustering(manner="exact", radius=1.43)
42+
43+
assert clusterer.manner == "exact", "The manner should be 'exact'."
44+
assert clusterer.radius == 1.43, "The radius should be 1.43."
45+
assert clusterer.random_state is None, "The random state should be None by default."
46+
assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
47+
assert clusterer._check_symmetric(X) is False, "The input should not be a symmetric distance matrix."
48+
49+
clusterer.fit(X)
50+
51+
assert clusterer.X_checked_ is not None, "X_checked_ should not be None after fitting."
52+
assert clusterer.dist_mat_ is not None, "dist_mat_ should not be None after fitting."
53+
assert clusterer.nb_edges_ > 0, "There should be edges in the graph."
54+
assert clusterer.labels_ is not None, "Labels should not be None after fitting."
55+
assert clusterer.centers_ is not None, "Centers should not be None after fitting."
56+
assert clusterer.effective_radius_ > 0, "Effective radius should be greater than 0."
57+
assert clusterer.mds_exec_time_ >= 0, "MDS execution time should be non-negative."
58+
assert clusterer.edges_ is not None, "Edges should not be None after fitting."
59+
assert clusterer.random_state is None, "Random state should remain None."
60+
61+
results = clusterer.labels_
62+
assert len(results) == X.shape[0], "The number of labels should match the number of samples."
63+
assert len(set(results)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
64+
65+
def test_radius_clustering_fit_predict():
66+
"""
67+
Test the fit_predict method of the RadiusClustering class.
68+
"""
69+
clusterer = RadiusClustering(manner="approx", radius=1.43)
70+
71+
assert clusterer.manner == "approx", "The manner should be 'approx'."
72+
assert clusterer.radius == 1.43, "The radius should be 1.43."
73+
assert clusterer.random_state is None, "The random state should be None by default."
74+
assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
75+
76+
labels = clusterer.fit_predict(X)
77+
78+
assert labels is not None, "Labels should not be None after fit_predict."
79+
assert len(labels) == X.shape[0], "The number of labels should match the number of samples."
80+
assert len(set(labels)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
81+
82+
def test_radius_clustering_fit_predict_exact():
83+
"""
84+
Test the fit_predict method of the RadiusClustering class with exact method.
85+
"""
86+
clusterer = RadiusClustering(manner="exact", radius=1.43)
87+
88+
assert clusterer.manner == "exact", "The manner should be 'exact'."
89+
assert clusterer.radius == 1.43, "The radius should be 1.43."
90+
assert clusterer.random_state is None, "The random state should be None by default."
91+
assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
92+
93+
labels = clusterer.fit_predict(X)
94+
95+
assert labels is not None, "Labels should not be None after fit_predict."
96+
assert len(labels) == X.shape[0], "The number of labels should match the number of samples."
97+
assert len(set(labels)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
98+
99+
def test_radius_clustering_random_state():
100+
"""
101+
Test the random state functionality of the RadiusClustering class.
102+
"""
103+
clusterer = RadiusClustering(manner="approx", radius=1.43, random_state=123)
104+
105+
assert clusterer.random_state == 123, "The random state should be set to 123."
106+
107+
# Fit the model
108+
clusterer.fit(X)
109+
110+
# Check that the random state is preserved
111+
assert clusterer.random_state == 123, "The random state should remain 123 after fitting."
112+
113+
# Check that the results are consistent with the random state
114+
labels1 = clusterer.labels_
115+
116+
# Re-initialize and fit again with the same random state
117+
clusterer2 = RadiusClustering(manner="approx", radius=1.43, random_state=123)
118+
clusterer2.fit(X)
119+
120+
labels2 = clusterer2.labels_
121+
122+
assert (labels1 == labels2).all(), "Labels should be consistent across runs with the same random state."
123+
124+
def test_deterministic_behavior():
125+
"""
126+
Test the deterministic behavior of the RadiusClustering class with a fixed random state.
127+
"""
128+
clusterer1 = RadiusClustering(manner="approx", radius=1.43, random_state=42)
129+
clusterer2 = RadiusClustering(manner="approx", radius=1.43, random_state=42)
130+
131+
labels1 = clusterer1.fit_predict(X)
132+
labels2 = clusterer2.fit_predict(X)
133+
134+
assert (labels1 == labels2).all(), "Labels should be the same for two instances with the same random state."
135+
136+
clusterer1 = RadiusClustering(manner="exact", radius=1.43)
137+
clusterer2 = RadiusClustering(manner="exact", radius=1.43)
138+
labels1 = clusterer1.fit_predict(X)
139+
labels2 = clusterer2.fit_predict(X)
140+
assert (labels1 == labels2).all(), "Labels should be the same for two exact instances."

tests/test_rad.py

Lines changed: 0 additions & 43 deletions
This file was deleted.

tests/test_regression.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import pytest
2+
import numpy as np
3+
from radius_clustering import RadiusClustering
4+
from sklearn.datasets import load_iris
5+
6+
@pytest.fixture
7+
def iris_data():
8+
"""Fixture to load the Iris dataset."""
9+
data = load_iris()
10+
return data.data
11+
12+
@pytest.fixture
13+
def approx_results():
14+
"""Fixture to store results for approximate clustering."""
15+
results = {
16+
'labels': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
17+
0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
18+
1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,2,2,2,1,2,2,2,2,
19+
2,2,1,1,2,2,2,2,1,2,1,2,1,2,2,1,1,2,2,2,2,2,1,2,2,2,2,1,2,2,2,1,2,2,2,1,2,
20+
2,1],
21+
"centers": [0,96,125],
22+
"time" : 0.0280,
23+
"effective_radius": 1.4282856857085722
24+
}
25+
return results
26+
27+
@pytest.fixture
28+
def exact_results():
29+
"""Fixture to store results for exact clustering."""
30+
results = {
31+
'labels':[
32+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33+
0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
34+
1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,2,2,2,1,2,2,2,2,
35+
2,2,1,1,2,2,2,2,1,2,1,2,1,2,2,1,1,2,2,2,2,2,1,2,2,2,2,1,2,2,2,1,2,2,2,1,2,
36+
2,1
37+
],
38+
"centers": [0, 96, 102],
39+
"time": 0.0004,
40+
"effective_radius": 1.4282856857085722
41+
}
42+
return results
43+
44+
def assert_results(results, expected):
45+
"""Helper function to assert clustering results."""
46+
assert len(results.labels_) == len(expected['labels']), "Labels length mismatch"
47+
assert set(results.labels_) == set(expected['labels']), "Labels do not match expected"
48+
assert results.centers_ == expected['centers'], "Centers do not match expected"
49+
assert abs(results.mds_exec_time_ - expected['time']) < 0.1, "Execution time mismatch by more than 0.1 seconds"
50+
assert abs(results.effective_radius_ - expected['effective_radius']) < 0.01, "Effective radius mismatch"
51+
assert np.sum(results.labels_ - expected['labels']) == 0, "Labels do not match expected"
52+
53+
def test_exact(iris_data, exact_results):
54+
"""Test the RadiusClustering with exact"""
55+
clustering = RadiusClustering(radius=1.43, manner='exact').fit(iris_data)
56+
assert_results(clustering, exact_results)
57+
58+
def test_approx(iris_data, approx_results):
59+
"""Test the RadiusClustering with approx."""
60+
clustering = RadiusClustering(radius=1.43, manner='approx').fit(iris_data)
61+
assert_results(clustering, approx_results)

tests/test_structural.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from logging import getLogger
2+
3+
logger = getLogger(__name__)
4+
logger.setLevel("INFO")
5+
6+
def test_import():
7+
import radius_clustering as rad
8+
9+
10+
def test_from_import():
11+
from radius_clustering import RadiusClustering
12+
13+
def test_check_estimator_api_consistency():
14+
from radius_clustering import RadiusClustering
15+
from sklearn.utils.estimator_checks import check_estimator
16+
17+
# Check the API consistency of the RadiusClustering estimator
18+
check_estimator(RadiusClustering())

0 commit comments

Comments
 (0)