KrishnaswamyLab
diff --git a/‎Python/test/test_cluster.py‎
Lines changed: 37 additions & 26 deletions b/‎Python/test/test_cluster.py‎
Lines changed: 37 additions & 26 deletions
diff --git a/‎Python/test/test_mds.py‎
Lines changed: 37 additions & 12 deletions b/‎Python/test/test_mds.py‎
Lines changed: 37 additions & 12 deletions
@@ -54,19 +54,22 @@ def test_kmeans_basic():
     clusters = cluster.kmeans(phate_op, n_clusters=3)
 
     # Should return integer array
-    assert np.issubdtype(clusters.dtype, np.signedinteger), \
-        f"Expected integer dtype, got {clusters.dtype}"
+    assert np.issubdtype(
+        clusters.dtype, np.signedinteger
+    ), f"Expected integer dtype, got {clusters.dtype}"
     print(f"✓ Returns integer dtype: {clusters.dtype}")
 
     # Should have correct shape
     assert len(clusters.shape) == 1, f"Expected 1D array, got shape {clusters.shape}"
-    assert len(clusters) == tree_data.shape[0], \
-        f"Expected {tree_data.shape[0]} labels, got {len(clusters)}"
+    assert (
+        len(clusters) == tree_data.shape[0]
+    ), f"Expected {tree_data.shape[0]} labels, got {len(clusters)}"
     print(f"✓ Correct shape: {clusters.shape}")
 
     # Should have exactly 3 clusters
-    assert len(np.unique(clusters)) == 3, \
-        f"Expected 3 clusters, got {len(np.unique(clusters))}"
+    assert (
+        len(np.unique(clusters)) == 3
+    ), f"Expected 3 clusters, got {len(np.unique(clusters))}"
     print(f"✓ Correct number of clusters: {len(np.unique(clusters))}")
 
     # Cluster labels should start from 0
@@ -136,8 +139,9 @@ def test_kmeans_max_clusters():
         n_clusters = len(np.unique(clusters))
 
         # Should respect max_clusters (search from 2 to max_k)
-        assert 2 <= n_clusters < max_k, \
-            f"Expected clusters in [2, {max_k}), got {n_clusters}"
+        assert (
+            2 <= n_clusters < max_k
+        ), f"Expected clusters in [2, {max_k}), got {n_clusters}"
         print(f"✓ max_clusters={max_k}: selected {n_clusters} clusters")
 
     print("✓ Test 4 PASSED\n")
@@ -156,8 +160,9 @@ def test_kmeans_random_state():
     clusters2 = cluster.kmeans(phate_op, n_clusters=4, random_state=42)
 
     # Should be identical
-    assert np.array_equal(clusters1, clusters2), \
-        "Same random_state should give identical results"
+    assert np.array_equal(
+        clusters1, clusters2
+    ), "Same random_state should give identical results"
     print("✓ Same random_state gives identical results")
 
     # Run with different random_state
@@ -187,10 +192,12 @@ def test_kmeans_deprecated_k_parameter():
 
         # Check warning was raised
         assert len(w) == 1, f"Expected 1 warning, got {len(w)}"
-        assert issubclass(w[0].category, FutureWarning), \
-            f"Expected FutureWarning, got {w[0].category}"
-        assert "k is deprecated" in str(w[0].message).lower(), \
-            f"Unexpected warning message: {w[0].message}"
+        assert issubclass(
+            w[0].category, FutureWarning
+        ), f"Expected FutureWarning, got {w[0].category}"
+        assert (
+            "k is deprecated" in str(w[0].message).lower()
+        ), f"Unexpected warning message: {w[0].message}"
         print(f"✓ FutureWarning raised: {w[0].message}")
 
     # Should still work correctly
@@ -217,8 +224,9 @@ def test_silhouette_score_basic():
     score = cluster.silhouette_score(phate_op, n_clusters=3)
 
     # Should return a float
-    assert isinstance(score, (float, np.floating)), \
-        f"Expected float score, got {type(score)}"
+    assert isinstance(
+        score, (float, np.floating)
+    ), f"Expected float score, got {type(score)}"
     print(f"✓ Returns float: {score}")
 
     # Silhouette score should be in [-1, 1]
@@ -262,8 +270,9 @@ def test_silhouette_score_random_state():
     score1 = cluster.silhouette_score(phate_op, n_clusters=3, random_state=42)
     score2 = cluster.silhouette_score(phate_op, n_clusters=3, random_state=42)
 
-    assert np.isclose(score1, score2), \
-        f"Expected same scores, got {score1} and {score2}"
+    assert np.isclose(
+        score1, score2
+    ), f"Expected same scores, got {score1} and {score2}"
     print(f"✓ Reproducible with random_state: {score1:.4f}")
 
     print("✓ Test 9 PASSED\n")
@@ -305,8 +314,9 @@ def test_kmeans_unfitted_phate():
     phate_op = create_unfitted_phate_op()
 
     # Should raise NotFittedError
-    with pytest.raises(exceptions.NotFittedError,
-                      match="This PHATE instance is not fitted yet"):
+    with pytest.raises(
+        exceptions.NotFittedError, match="This PHATE instance is not fitted yet"
+    ):
         cluster.kmeans(phate_op, n_clusters=3)
     print("✓ Correctly raises NotFittedError for unfitted operator")
 
@@ -343,8 +353,9 @@ def test_kmeans_auto_uses_silhouette():
     phate_op, _, _ = create_simple_phate_op()
 
     # Get auto-selected clustering
-    clusters_auto = cluster.kmeans(phate_op, n_clusters="auto",
-                                   max_clusters=8, random_state=42)
+    clusters_auto = cluster.kmeans(
+        phate_op, n_clusters="auto", max_clusters=8, random_state=42
+    )
     n_auto = len(np.unique(clusters_auto))
 
     print(f"Auto-selected: {n_auto} clusters")
@@ -359,8 +370,7 @@ def test_kmeans_auto_uses_silhouette():
     # The auto-selected k should correspond to max silhouette
     best_k = np.argmax(silhouette_scores) + 2  # +2 because range starts at 2
 
-    assert n_auto == best_k, \
-        f"Auto-selected {n_auto} but best silhouette at k={best_k}"
+    assert n_auto == best_k, f"Auto-selected {n_auto} but best silhouette at k={best_k}"
     print(f"✓ Auto mode correctly selected k={best_k} (max silhouette)")
 
     print("✓ Test 13 PASSED\n")
@@ -388,8 +398,9 @@ def test_clustering_stability():
     clusters2 = cluster.kmeans(phate_op2, n_clusters=3, random_state=42)
 
     # Should get identical results
-    assert np.array_equal(clusters1, clusters2), \
-        "Same data and parameters should give identical clustering"
+    assert np.array_equal(
+        clusters1, clusters2
+    ), "Same data and parameters should give identical clustering"
     print("✓ Clustering is reproducible with same data and parameters")
 
     print("✓ Test 14 PASSED\n")
 
@@ -91,7 +91,9 @@ def test_sgd_mds_vs_classic():
     # Allow small increase due to stochastic optimization
     stress_ratio = stress_sgd / stress_classic
     print(f"Stress ratio (SGD/Classic): {stress_ratio:.6f}")
-    assert stress_ratio < 1.1, f"SGD-MDS significantly increased stress: {stress_ratio:.3f}x"
+    assert (
+        stress_ratio < 1.1
+    ), f"SGD-MDS significantly increased stress: {stress_ratio:.3f}x"
     print("✓ SGD-MDS maintains or improves stress")
 
     print("✓ Test 2 PASSED\n")
@@ -113,7 +115,10 @@ def test_sgd_mds_metric_wrapper():
 
         Y = sgd_mds_metric(D, n_components=2, random_state=42, verbose=0)
 
-        assert Y.shape == (n_samples, 2), f"Expected shape ({n_samples}, 2), got {Y.shape}"
+        assert Y.shape == (
+            n_samples,
+            2,
+        ), f"Expected shape ({n_samples}, 2), got {Y.shape}"
         assert not np.any(np.isnan(Y)), "Output contains NaN values"
         print(f"  ✓ n_samples={n_samples}: shape={Y.shape}, no NaN")
 
@@ -137,7 +142,12 @@ def test_sgd_mds_vs_smacof():
 
     # Run SMACOF
     Y_smacof = smacof(
-        D, n_components=2, init=Y_classic.copy(), random_state=42, metric=True, max_iter=300
+        D,
+        n_components=2,
+        init=Y_classic.copy(),
+        random_state=42,
+        metric=True,
+        max_iter=300,
     )
     stress_smacof = compute_stress(D, Y_smacof)
     print(f"SMACOF stress: {stress_smacof:.6f}")
@@ -277,11 +287,12 @@ def test_sgd_mds_different_n_components():
 
     for n_comp in [1, 2, 3, 5]:
         print(f"\nTesting with n_components={n_comp}")
-        Y = sgd_mds(
-            D, n_components=n_comp, n_iter=200, random_state=42
-        )
+        Y = sgd_mds(D, n_components=n_comp, n_iter=200, random_state=42)
 
-        assert Y.shape == (n_samples, n_comp), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
+        assert Y.shape == (
+            n_samples,
+            n_comp,
+        ), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
         assert not np.any(np.isnan(Y))
         print(f"  ✓ n_components={n_comp}: shape={Y.shape}")
 
@@ -373,7 +384,10 @@ def test_classic_mds_different_dimensions():
         print(f"\nTesting with n_components={n_comp}")
         Y = classic(D, n_components=n_comp, random_state=42)
 
-        assert Y.shape == (n_samples, n_comp), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
+        assert Y.shape == (
+            n_samples,
+            n_comp,
+        ), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
         assert not np.any(np.isnan(Y))
         stress = compute_stress(D, Y)
         print(f"  ✓ n_components={n_comp}: shape={Y.shape}, stress={stress:.2e}")
@@ -461,7 +475,12 @@ def test_smacof_vs_classic():
     # Run both methods
     Y_classic = classic(D, n_components=2, random_state=42)
     Y_smacof = smacof(
-        D, n_components=2, init=Y_classic.copy(), random_state=42, metric=True, max_iter=300
+        D,
+        n_components=2,
+        init=Y_classic.copy(),
+        random_state=42,
+        metric=True,
+        max_iter=300,
     )
 
     stress_classic = compute_stress(D, Y_classic)
@@ -528,7 +547,9 @@ def test_mds_zero_distances():
     Y_classic = classic(D, n_components=2, random_state=42)
     Y_smacof = smacof(D, n_components=2, random_state=42, metric=True, max_iter=300)
     # SGD-MDS needs more iterations and lower learning rate to converge on duplicates
-    Y_sgd = sgd_mds(D, n_components=2, n_iter=1000, learning_rate=0.01, random_state=42, verbose=0)
+    Y_sgd = sgd_mds(
+        D, n_components=2, n_iter=1000, learning_rate=0.01, random_state=42, verbose=0
+    )
 
     for name, Y in [("Classic", Y_classic), ("SMACOF", Y_smacof), ("SGD", Y_sgd)]:
         assert not np.any(np.isnan(Y)), f"{name}: contains NaN with duplicates"
@@ -547,8 +568,12 @@ def test_mds_zero_distances():
         print(f"{name}: Point 5 is rank {rank_5_from_10} neighbor of point 10")
 
         # Duplicates should be in top 5 nearest neighbors of each other
-        assert rank_10_from_5 <= 5, f"{name}: duplicate not in top 5 neighbors (rank={rank_10_from_5})"
-        assert rank_5_from_10 <= 5, f"{name}: duplicate not in top 5 neighbors (rank={rank_5_from_10})"
+        assert (
+            rank_10_from_5 <= 5
+        ), f"{name}: duplicate not in top 5 neighbors (rank={rank_10_from_5})"
+        assert (
+            rank_5_from_10 <= 5
+        ), f"{name}: duplicate not in top 5 neighbors (rank={rank_5_from_10})"
 
     print("✓ All methods handle duplicates correctly")
     print("✓ Test 15 PASSED\n")