Skip to content

Commit 1c62fe3

Browse files
author
Joao Felipe Rocha
committed
Linted test files
1 parent 80073f4 commit 1c62fe3

File tree

7 files changed

+236
-145
lines changed

7 files changed

+236
-145
lines changed

Python/test/test_cluster.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,22 @@ def test_kmeans_basic():
5454
clusters = cluster.kmeans(phate_op, n_clusters=3)
5555

5656
# Should return integer array
57-
assert np.issubdtype(clusters.dtype, np.signedinteger), \
58-
f"Expected integer dtype, got {clusters.dtype}"
57+
assert np.issubdtype(
58+
clusters.dtype, np.signedinteger
59+
), f"Expected integer dtype, got {clusters.dtype}"
5960
print(f"✓ Returns integer dtype: {clusters.dtype}")
6061

6162
# Should have correct shape
6263
assert len(clusters.shape) == 1, f"Expected 1D array, got shape {clusters.shape}"
63-
assert len(clusters) == tree_data.shape[0], \
64-
f"Expected {tree_data.shape[0]} labels, got {len(clusters)}"
64+
assert (
65+
len(clusters) == tree_data.shape[0]
66+
), f"Expected {tree_data.shape[0]} labels, got {len(clusters)}"
6567
print(f"✓ Correct shape: {clusters.shape}")
6668

6769
# Should have exactly 3 clusters
68-
assert len(np.unique(clusters)) == 3, \
69-
f"Expected 3 clusters, got {len(np.unique(clusters))}"
70+
assert (
71+
len(np.unique(clusters)) == 3
72+
), f"Expected 3 clusters, got {len(np.unique(clusters))}"
7073
print(f"✓ Correct number of clusters: {len(np.unique(clusters))}")
7174

7275
# Cluster labels should start from 0
@@ -136,8 +139,9 @@ def test_kmeans_max_clusters():
136139
n_clusters = len(np.unique(clusters))
137140

138141
# Should respect max_clusters (search from 2 to max_k)
139-
assert 2 <= n_clusters < max_k, \
140-
f"Expected clusters in [2, {max_k}), got {n_clusters}"
142+
assert (
143+
2 <= n_clusters < max_k
144+
), f"Expected clusters in [2, {max_k}), got {n_clusters}"
141145
print(f"✓ max_clusters={max_k}: selected {n_clusters} clusters")
142146

143147
print("✓ Test 4 PASSED\n")
@@ -156,8 +160,9 @@ def test_kmeans_random_state():
156160
clusters2 = cluster.kmeans(phate_op, n_clusters=4, random_state=42)
157161

158162
# Should be identical
159-
assert np.array_equal(clusters1, clusters2), \
160-
"Same random_state should give identical results"
163+
assert np.array_equal(
164+
clusters1, clusters2
165+
), "Same random_state should give identical results"
161166
print("✓ Same random_state gives identical results")
162167

163168
# Run with different random_state
@@ -187,10 +192,12 @@ def test_kmeans_deprecated_k_parameter():
187192

188193
# Check warning was raised
189194
assert len(w) == 1, f"Expected 1 warning, got {len(w)}"
190-
assert issubclass(w[0].category, FutureWarning), \
191-
f"Expected FutureWarning, got {w[0].category}"
192-
assert "k is deprecated" in str(w[0].message).lower(), \
193-
f"Unexpected warning message: {w[0].message}"
195+
assert issubclass(
196+
w[0].category, FutureWarning
197+
), f"Expected FutureWarning, got {w[0].category}"
198+
assert (
199+
"k is deprecated" in str(w[0].message).lower()
200+
), f"Unexpected warning message: {w[0].message}"
194201
print(f"✓ FutureWarning raised: {w[0].message}")
195202

196203
# Should still work correctly
@@ -217,8 +224,9 @@ def test_silhouette_score_basic():
217224
score = cluster.silhouette_score(phate_op, n_clusters=3)
218225

219226
# Should return a float
220-
assert isinstance(score, (float, np.floating)), \
221-
f"Expected float score, got {type(score)}"
227+
assert isinstance(
228+
score, (float, np.floating)
229+
), f"Expected float score, got {type(score)}"
222230
print(f"✓ Returns float: {score}")
223231

224232
# Silhouette score should be in [-1, 1]
@@ -262,8 +270,9 @@ def test_silhouette_score_random_state():
262270
score1 = cluster.silhouette_score(phate_op, n_clusters=3, random_state=42)
263271
score2 = cluster.silhouette_score(phate_op, n_clusters=3, random_state=42)
264272

265-
assert np.isclose(score1, score2), \
266-
f"Expected same scores, got {score1} and {score2}"
273+
assert np.isclose(
274+
score1, score2
275+
), f"Expected same scores, got {score1} and {score2}"
267276
print(f"✓ Reproducible with random_state: {score1:.4f}")
268277

269278
print("✓ Test 9 PASSED\n")
@@ -305,8 +314,9 @@ def test_kmeans_unfitted_phate():
305314
phate_op = create_unfitted_phate_op()
306315

307316
# Should raise NotFittedError
308-
with pytest.raises(exceptions.NotFittedError,
309-
match="This PHATE instance is not fitted yet"):
317+
with pytest.raises(
318+
exceptions.NotFittedError, match="This PHATE instance is not fitted yet"
319+
):
310320
cluster.kmeans(phate_op, n_clusters=3)
311321
print("✓ Correctly raises NotFittedError for unfitted operator")
312322

@@ -343,8 +353,9 @@ def test_kmeans_auto_uses_silhouette():
343353
phate_op, _, _ = create_simple_phate_op()
344354

345355
# Get auto-selected clustering
346-
clusters_auto = cluster.kmeans(phate_op, n_clusters="auto",
347-
max_clusters=8, random_state=42)
356+
clusters_auto = cluster.kmeans(
357+
phate_op, n_clusters="auto", max_clusters=8, random_state=42
358+
)
348359
n_auto = len(np.unique(clusters_auto))
349360

350361
print(f"Auto-selected: {n_auto} clusters")
@@ -359,8 +370,7 @@ def test_kmeans_auto_uses_silhouette():
359370
# The auto-selected k should correspond to max silhouette
360371
best_k = np.argmax(silhouette_scores) + 2 # +2 because range starts at 2
361372

362-
assert n_auto == best_k, \
363-
f"Auto-selected {n_auto} but best silhouette at k={best_k}"
373+
assert n_auto == best_k, f"Auto-selected {n_auto} but best silhouette at k={best_k}"
364374
print(f"✓ Auto mode correctly selected k={best_k} (max silhouette)")
365375

366376
print("✓ Test 13 PASSED\n")
@@ -388,8 +398,9 @@ def test_clustering_stability():
388398
clusters2 = cluster.kmeans(phate_op2, n_clusters=3, random_state=42)
389399

390400
# Should get identical results
391-
assert np.array_equal(clusters1, clusters2), \
392-
"Same data and parameters should give identical clustering"
401+
assert np.array_equal(
402+
clusters1, clusters2
403+
), "Same data and parameters should give identical clustering"
393404
print("✓ Clustering is reproducible with same data and parameters")
394405

395406
print("✓ Test 14 PASSED\n")

Python/test/test_mds.py

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@ def test_sgd_mds_vs_classic():
9191
# Allow small increase due to stochastic optimization
9292
stress_ratio = stress_sgd / stress_classic
9393
print(f"Stress ratio (SGD/Classic): {stress_ratio:.6f}")
94-
assert stress_ratio < 1.1, f"SGD-MDS significantly increased stress: {stress_ratio:.3f}x"
94+
assert (
95+
stress_ratio < 1.1
96+
), f"SGD-MDS significantly increased stress: {stress_ratio:.3f}x"
9597
print("✓ SGD-MDS maintains or improves stress")
9698

9799
print("✓ Test 2 PASSED\n")
@@ -113,7 +115,10 @@ def test_sgd_mds_metric_wrapper():
113115

114116
Y = sgd_mds_metric(D, n_components=2, random_state=42, verbose=0)
115117

116-
assert Y.shape == (n_samples, 2), f"Expected shape ({n_samples}, 2), got {Y.shape}"
118+
assert Y.shape == (
119+
n_samples,
120+
2,
121+
), f"Expected shape ({n_samples}, 2), got {Y.shape}"
117122
assert not np.any(np.isnan(Y)), "Output contains NaN values"
118123
print(f" ✓ n_samples={n_samples}: shape={Y.shape}, no NaN")
119124

@@ -137,7 +142,12 @@ def test_sgd_mds_vs_smacof():
137142

138143
# Run SMACOF
139144
Y_smacof = smacof(
140-
D, n_components=2, init=Y_classic.copy(), random_state=42, metric=True, max_iter=300
145+
D,
146+
n_components=2,
147+
init=Y_classic.copy(),
148+
random_state=42,
149+
metric=True,
150+
max_iter=300,
141151
)
142152
stress_smacof = compute_stress(D, Y_smacof)
143153
print(f"SMACOF stress: {stress_smacof:.6f}")
@@ -277,11 +287,12 @@ def test_sgd_mds_different_n_components():
277287

278288
for n_comp in [1, 2, 3, 5]:
279289
print(f"\nTesting with n_components={n_comp}")
280-
Y = sgd_mds(
281-
D, n_components=n_comp, n_iter=200, random_state=42
282-
)
290+
Y = sgd_mds(D, n_components=n_comp, n_iter=200, random_state=42)
283291

284-
assert Y.shape == (n_samples, n_comp), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
292+
assert Y.shape == (
293+
n_samples,
294+
n_comp,
295+
), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
285296
assert not np.any(np.isnan(Y))
286297
print(f" ✓ n_components={n_comp}: shape={Y.shape}")
287298

@@ -373,7 +384,10 @@ def test_classic_mds_different_dimensions():
373384
print(f"\nTesting with n_components={n_comp}")
374385
Y = classic(D, n_components=n_comp, random_state=42)
375386

376-
assert Y.shape == (n_samples, n_comp), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
387+
assert Y.shape == (
388+
n_samples,
389+
n_comp,
390+
), f"Expected shape ({n_samples}, {n_comp}), got {Y.shape}"
377391
assert not np.any(np.isnan(Y))
378392
stress = compute_stress(D, Y)
379393
print(f" ✓ n_components={n_comp}: shape={Y.shape}, stress={stress:.2e}")
@@ -461,7 +475,12 @@ def test_smacof_vs_classic():
461475
# Run both methods
462476
Y_classic = classic(D, n_components=2, random_state=42)
463477
Y_smacof = smacof(
464-
D, n_components=2, init=Y_classic.copy(), random_state=42, metric=True, max_iter=300
478+
D,
479+
n_components=2,
480+
init=Y_classic.copy(),
481+
random_state=42,
482+
metric=True,
483+
max_iter=300,
465484
)
466485

467486
stress_classic = compute_stress(D, Y_classic)
@@ -528,7 +547,9 @@ def test_mds_zero_distances():
528547
Y_classic = classic(D, n_components=2, random_state=42)
529548
Y_smacof = smacof(D, n_components=2, random_state=42, metric=True, max_iter=300)
530549
# SGD-MDS needs more iterations and lower learning rate to converge on duplicates
531-
Y_sgd = sgd_mds(D, n_components=2, n_iter=1000, learning_rate=0.01, random_state=42, verbose=0)
550+
Y_sgd = sgd_mds(
551+
D, n_components=2, n_iter=1000, learning_rate=0.01, random_state=42, verbose=0
552+
)
532553

533554
for name, Y in [("Classic", Y_classic), ("SMACOF", Y_smacof), ("SGD", Y_sgd)]:
534555
assert not np.any(np.isnan(Y)), f"{name}: contains NaN with duplicates"
@@ -547,8 +568,12 @@ def test_mds_zero_distances():
547568
print(f"{name}: Point 5 is rank {rank_5_from_10} neighbor of point 10")
548569

549570
# Duplicates should be in top 5 nearest neighbors of each other
550-
assert rank_10_from_5 <= 5, f"{name}: duplicate not in top 5 neighbors (rank={rank_10_from_5})"
551-
assert rank_5_from_10 <= 5, f"{name}: duplicate not in top 5 neighbors (rank={rank_5_from_10})"
571+
assert (
572+
rank_10_from_5 <= 5
573+
), f"{name}: duplicate not in top 5 neighbors (rank={rank_10_from_5})"
574+
assert (
575+
rank_5_from_10 <= 5
576+
), f"{name}: duplicate not in top 5 neighbors (rank={rank_5_from_10})"
552577

553578
print("✓ All methods handle duplicates correctly")
554579
print("✓ Test 15 PASSED\n")

0 commit comments

Comments
 (0)