tests

quentinhaenn · quentinhaenn · commit 6e33df4ba358 · 2025-06-18T07:33:06.000+02:00
diff --git a/.coverage b/.coverage
diff --git a/.gitignore b/.gitignore
@@ -25,5 +25,8 @@ docs/source/modules/generated/
 **/emos.c
 **/mds.cpp
 
+# MAC OS files
+.DS_Store
+
 # Reportings
 reporting/
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,6 +52,7 @@ documentation = "https://lias-laboratory.github.io/radius_clustering/"
 [project.optional-dependencies]
 dev = [
     "pytest>=8.3.3",
+    "pytest-cov>=5.0.0",
     "pandas",
     "cython>=3.0",
     "setuptools>= 61.0",
@@ -80,8 +81,22 @@ pythonpath = "src"
 testpaths = ["tests"]
 addopts = [
     "--import-mode=importlib",
+    "--cov=src/radius_clustering",
+    "--cov-report=term-missing",
+    "--cov-report=html:coverage_html_report",
 ]
 
+[tool.coverage.run]
+source = ["src/radius_clustering"]
+branch = true
+
+[tool.coverage.report]
+show_missing = true
+
+[tool.coverage.html]
+directory = "coverage_html_report"
+title = "Coverage Report"
+
 [tool.ruff]
 # Exclude a variety of commonly ignored directories.
 exclude = [
diff --git a/src/radius_clustering/__init__.py b/src/radius_clustering/__init__.py
@@ -2,6 +2,4 @@
 from .radius_clustering import RadiusClustering
 
 __all__ = ["RadiusClustering"]
-
-# Optionally, you can set a version number for your package
-__version__ = "1.2.2"
+__version__ = "1.2.3"
diff --git a/src/radius_clustering/radius_clustering.py b/src/radius_clustering/radius_clustering.py
@@ -31,7 +31,7 @@ class RadiusClustering(ClusterMixin, BaseEstimator):
     -----------
     manner : str, optional (default="approx")
         The method to use for solving the MDS problem. Can be "exact" or "approx".
-    threshold : float, optional (default=0.5)
+    radius : float, optional (default=0.5)
         The dissimilarity threshold to act as radius constraint for the clustering.
 
     Attributes:
@@ -56,13 +56,16 @@ class RadiusClustering(ClusterMixin, BaseEstimator):
     .. versionchanged:: 1.3.0
         All publicly accessible attributes are now suffixed with an underscore (e.g., `centers_`, `labels_`).
         This is particularly useful for compatibility with scikit-learn's API.
+    
+    .. versionchanged:: 1.3.0
+        The `threshold` parameter was renamed to `radius` to better reflect its purpose.
     """
 
     _estimator_type = "clusterer"
 
-    def __init__(self, manner: str ="approx", threshold: float =0.5, random_state: int | None = None) -> None:
+    def __init__(self, manner: str ="approx", radius: float =0.5, random_state: int | None = None) -> None:
         self.manner = manner
-        self.threshold = threshold
+        self.radius = radius
         self.random_state = random_state
 
     def _check_symmetric(self, a: np.ndarray, tol: float =1e-8) -> bool:
@@ -125,7 +128,12 @@ def fit(self, X: np.ndarray, y: None = None) -> "RadiusClustering":
             dist_mat = pairwise_distances(self.X_checked_, metric="euclidean")
         else:
             dist_mat = self.X_checked_
-        adj_mask = np.triu((dist_mat <= self.threshold), k=1)
+        
+        if not isinstance(self.radius, (float, int)):
+            raise ValueError("Radius must be a positive float.")
+        if self.radius <= 0:
+            raise ValueError("Radius must be a positive float.")
+        adj_mask = np.triu((dist_mat <= self.radius), k=1)
         self.nb_edges_ = np.sum(adj_mask)
         if self.nb_edges_ == 0:
             self.centers_ = list(range(self.X_checked_.shape[0]))
@@ -171,6 +179,11 @@ def _clustering(self):
         Perform the clustering using either the exact or approximate MDS method.
         """
         n = self.X_checked_.shape[0]
+        if self.manner != "exact" and self.manner != "approx":
+            print(f"Invalid manner: {self.manner}. Defaulting to 'approx'.")
+            raise ValueError(
+                "Invalid manner. Choose either 'exact' or 'approx'."
+            )
         if self.manner == "exact":
             self._clustering_exact(n)
         else:
@@ -193,6 +206,7 @@ def _clustering_exact(self, n: int) -> None:
         self.centers_, self.mds_exec_time_ = py_emos_main(
             self.edges_.flatten(), n, self.nb_edges_
         )
+        self.centers_.sort()  # Sort the centers to ensure consistent order
 
     def _clustering_approx(self, n: int) -> None:
         """
@@ -226,7 +240,7 @@ def _clustering_approx(self, n: int) -> None:
         self.random_state_ = check_random_state(self.random_state)
         seed = self.random_state_.randint(np.iinfo(np.int32).max)
         result = solve_mds(n, self.edges_.flatten().astype(np.int32), self.nb_edges_, seed)
-        self.centers_ = [x for x in result["solution_set"]]
+        self.centers_ = sorted([x for x in result["solution_set"]])
         self.mds_exec_time_ = result["Time"]
 
     def _compute_effective_radius(self):
@@ -246,4 +260,4 @@ def _compute_labels(self):
         self.labels_ = np.argmin(distances, axis=1)
 
         min_dist = np.min(distances, axis=1)
-        self.labels_[min_dist > self.threshold] = -1
+        self.labels_[min_dist > self.radius] = -1
diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -0,0 +1,140 @@
+import pytest
+
+from radius_clustering import RadiusClustering
+from sklearn import datasets
+
+X = datasets.fetch_openml(name="iris", version=1, parser="auto")["data"]
+
+def test_radius_clustering_approx():
+    """
+    Test the approximate method of the RadiusClustering class.
+    """
+    clusterer = RadiusClustering(manner="approx", radius=1.43)
+
+    assert clusterer.manner == "approx", "The manner should be 'approx'."
+    assert clusterer.radius == 1.43, "The radius should be 1.43."
+    assert clusterer.random_state is None, "The random state should be None by default."
+    assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
+    assert clusterer._check_symmetric(X) is False, "The input should not be a symmetric distance matrix."
+
+    clusterer.fit(X)
+
+    assert clusterer.X_checked_ is not None, "X_checked_ should not be None after fitting."
+    assert clusterer.dist_mat_ is not None, "dist_mat_ should not be None after fitting."
+    assert clusterer.nb_edges_ > 0, "There should be edges in the graph."
+    assert clusterer.labels_ is not None, "Labels should not be None after fitting."
+    assert clusterer.centers_ is not None, "Centers should not be None after fitting."
+    assert clusterer.effective_radius_ > 0, "Effective radius should be greater than 0."
+    assert clusterer.mds_exec_time_ >= 0, "MDS execution time should be non-negative."
+    assert clusterer.edges_ is not None, "Edges should not be None after fitting."
+    assert clusterer.random_state == 42, "Random state should be set to 42 after fitting."
+
+    results = clusterer.labels_
+    assert len(results) == X.shape[0], "The number of labels should match the number of samples."
+    assert len(set(results)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
+
+
+def test_radius_clustering_exact():
+    """
+    Test the exact method of the RadiusClustering class.
+    """
+    clusterer = RadiusClustering(manner="exact", radius=1.43)
+
+    assert clusterer.manner == "exact", "The manner should be 'exact'."
+    assert clusterer.radius == 1.43, "The radius should be 1.43."
+    assert clusterer.random_state is None, "The random state should be None by default."
+    assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
+    assert clusterer._check_symmetric(X) is False, "The input should not be a symmetric distance matrix."
+
+    clusterer.fit(X)
+
+    assert clusterer.X_checked_ is not None, "X_checked_ should not be None after fitting."
+    assert clusterer.dist_mat_ is not None, "dist_mat_ should not be None after fitting."
+    assert clusterer.nb_edges_ > 0, "There should be edges in the graph."
+    assert clusterer.labels_ is not None, "Labels should not be None after fitting."
+    assert clusterer.centers_ is not None, "Centers should not be None after fitting."
+    assert clusterer.effective_radius_ > 0, "Effective radius should be greater than 0."
+    assert clusterer.mds_exec_time_ >= 0, "MDS execution time should be non-negative."
+    assert clusterer.edges_ is not None, "Edges should not be None after fitting."
+    assert clusterer.random_state is None, "Random state should remain None."
+
+    results = clusterer.labels_
+    assert len(results) == X.shape[0], "The number of labels should match the number of samples."
+    assert len(set(results)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
+
+def test_radius_clustering_fit_predict():
+    """
+    Test the fit_predict method of the RadiusClustering class.
+    """
+    clusterer = RadiusClustering(manner="approx", radius=1.43)
+
+    assert clusterer.manner == "approx", "The manner should be 'approx'."
+    assert clusterer.radius == 1.43, "The radius should be 1.43."
+    assert clusterer.random_state is None, "The random state should be None by default."
+    assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
+
+    labels = clusterer.fit_predict(X)
+
+    assert labels is not None, "Labels should not be None after fit_predict."
+    assert len(labels) == X.shape[0], "The number of labels should match the number of samples."
+    assert len(set(labels)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
+
+def test_radius_clustering_fit_predict_exact():
+    """
+    Test the fit_predict method of the RadiusClustering class with exact method.
+    """
+    clusterer = RadiusClustering(manner="exact", radius=1.43)
+
+    assert clusterer.manner == "exact", "The manner should be 'exact'."
+    assert clusterer.radius == 1.43, "The radius should be 1.43."
+    assert clusterer.random_state is None, "The random state should be None by default."
+    assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
+
+    labels = clusterer.fit_predict(X)
+
+    assert labels is not None, "Labels should not be None after fit_predict."
+    assert len(labels) == X.shape[0], "The number of labels should match the number of samples."
+    assert len(set(labels)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
+
+def test_radius_clustering_random_state():
+    """
+    Test the random state functionality of the RadiusClustering class.
+    """
+    clusterer = RadiusClustering(manner="approx", radius=1.43, random_state=123)
+
+    assert clusterer.random_state == 123, "The random state should be set to 123."
+
+    # Fit the model
+    clusterer.fit(X)
+
+    # Check that the random state is preserved
+    assert clusterer.random_state == 123, "The random state should remain 123 after fitting."
+
+    # Check that the results are consistent with the random state
+    labels1 = clusterer.labels_
+
+    # Re-initialize and fit again with the same random state
+    clusterer2 = RadiusClustering(manner="approx", radius=1.43, random_state=123)
+    clusterer2.fit(X)
+    
+    labels2 = clusterer2.labels_
+
+    assert (labels1 == labels2).all(), "Labels should be consistent across runs with the same random state."
+
+def test_deterministic_behavior():
+    """
+    Test the deterministic behavior of the RadiusClustering class with a fixed random state.
+    """
+    clusterer1 = RadiusClustering(manner="approx", radius=1.43, random_state=42)
+    clusterer2 = RadiusClustering(manner="approx", radius=1.43, random_state=42)
+
+    labels1 = clusterer1.fit_predict(X)
+    labels2 = clusterer2.fit_predict(X)
+
+    assert (labels1 == labels2).all(), "Labels should be the same for two instances with the same random state."
+
+    clusterer1 = RadiusClustering(manner="exact", radius=1.43)
+    clusterer2 = RadiusClustering(manner="exact", radius=1.43)
+    labels1 = clusterer1.fit_predict(X)
+    labels2 = clusterer2.fit_predict(X)
+    assert (labels1 == labels2).all(), "Labels should be the same for two exact instances."
diff --git a/tests/test_rad.py b/tests/test_rad.py
diff --git a/tests/test_regression.py b/tests/test_regression.py
@@ -0,0 +1,61 @@
+import pytest
+import numpy as np
+from radius_clustering import RadiusClustering
+from sklearn.datasets import load_iris
+
+@pytest.fixture
+def iris_data():
+    """Fixture to load the Iris dataset."""
+    data = load_iris()
+    return data.data
+
+@pytest.fixture
+def approx_results():
+    """Fixture to store results for approximate clustering."""
+    results = {
+        'labels': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+                   0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+                   1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,2,2,2,1,2,2,2,2,
+                   2,2,1,1,2,2,2,2,1,2,1,2,1,2,2,1,1,2,2,2,2,2,1,2,2,2,2,1,2,2,2,1,2,2,2,1,2,
+                   2,1],
+        "centers": [0,96,125],
+        "time" : 0.0280,
+        "effective_radius": 1.4282856857085722
+    }
+    return results
+
+@pytest.fixture
+def exact_results():
+    """Fixture to store results for exact clustering."""
+    results = {
+        'labels':[
+            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+            0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+            1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,2,2,2,1,2,2,2,2,
+            2,2,1,1,2,2,2,2,1,2,1,2,1,2,2,1,1,2,2,2,2,2,1,2,2,2,2,1,2,2,2,1,2,2,2,1,2,
+            2,1
+                ],
+        "centers": [0, 96, 102],
+        "time": 0.0004,
+        "effective_radius": 1.4282856857085722
+    }
+    return results
+
+def assert_results(results, expected):
+    """Helper function to assert clustering results."""
+    assert len(results.labels_) == len(expected['labels']), "Labels length mismatch"
+    assert set(results.labels_) == set(expected['labels']), "Labels do not match expected"
+    assert results.centers_ == expected['centers'], "Centers do not match expected"
+    assert abs(results.mds_exec_time_ - expected['time']) < 0.1, "Execution time mismatch by more than 0.1 seconds"
+    assert abs(results.effective_radius_ - expected['effective_radius']) < 0.01, "Effective radius mismatch"
+    assert np.sum(results.labels_ - expected['labels']) == 0, "Labels do not match expected"
+
+def test_exact(iris_data, exact_results):
+    """Test the RadiusClustering with exact"""
+    clustering = RadiusClustering(radius=1.43, manner='exact').fit(iris_data)
+    assert_results(clustering, exact_results)
+
+def test_approx(iris_data, approx_results):
+    """Test the RadiusClustering with approx."""
+    clustering = RadiusClustering(radius=1.43, manner='approx').fit(iris_data)
+    assert_results(clustering, approx_results)
diff --git a/tests/test_structural.py b/tests/test_structural.py
@@ -0,0 +1,18 @@
+from logging import getLogger
+
+logger = getLogger(__name__)
+logger.setLevel("INFO")
+
+def test_import():
+    import radius_clustering as rad
+
+
+def test_from_import():
+    from radius_clustering import RadiusClustering
+
+def test_check_estimator_api_consistency():
+    from radius_clustering import RadiusClustering
+    from sklearn.utils.estimator_checks import check_estimator
+
+    # Check the API consistency of the RadiusClustering estimator
+    check_estimator(RadiusClustering())
diff --git a/tests/test_unit.py b/tests/test_unit.py