Standardize RFK syntax; add curvature estimation + Kappa MLP + clustering to tutorial

pchlenski · pchlenski · commit 6052f7613fce · 2025-07-13T15:21:57.000-07:00
diff --git a/manify/__init__.py b/manify/__init__.py
@@ -8,6 +8,7 @@
     install_import_hook("manify", "beartype.beartype")
     print("Beartype import hook installed for Manify. Will use beartype for type checking.")
 
+from manify.clustering import RiemannianFuzzyKMeans
 from manify.curvature_estimation import greedy_signature_selection, sampled_delta_hyperbolicity, sectional_curvature
 from manify.embedders import CoordinateLearning, ProductSpaceVAE, SiameseNetwork
 from manify.manifolds import Manifold, ProductManifold
@@ -41,5 +42,7 @@
     "sampled_delta_hyperbolicity",
     "sectional_curvature",
     "greedy_signature_selection",
+    # manify.clustering
+    "RiemannianFuzzyKMeans",
     # no utils
 ]
diff --git a/manify/clustering/fuzzy_kmeans.py b/manify/clustering/fuzzy_kmeans.py
@@ -43,7 +43,7 @@ class RiemannianFuzzyKMeans(BaseEstimator, ClusterMixin):
 
     Attributes:
         n_clusters: The number of clusters to form.
-        manifold: An initialized manifold object (from manifolds.py) on which clustering will be performed.
+        pm: An initialized manifold object (from manifolds.py) on which clustering will be performed.
         m: Fuzzifier parameter. Controls the softness of the partition.
         lr: Learning rate for the optimizer.
         max_iter: Maximum number of iterations for the optimization.
@@ -71,7 +71,7 @@ class RiemannianFuzzyKMeans(BaseEstimator, ClusterMixin):
     def __init__(
         self,
         n_clusters: int,
-        manifold: Manifold | ProductManifold,
+        pm: Manifold | ProductManifold,
         m: float = 2.0,
         lr: float = 0.1,
         max_iter: int = 100,
@@ -81,7 +81,7 @@ def __init__(
         verbose: bool = False,
     ):
         self.n_clusters = n_clusters
-        self.manifold = manifold
+        self.pm = pm
         self.m = m
         self.lr = lr
         self.max_iter = max_iter
@@ -97,11 +97,11 @@ def _init_centers(self, X: Float[torch.Tensor, "n_points n_features"]) -> None:
             torch.manual_seed(self.random_state)
             np.random.seed(self.random_state)
 
-        # Input data X's second dimension should match the manifold's ambient dimension
-        if X.shape[1] != self.manifold.ambient_dim:
+        # Input data X's second dimension should match the pm's ambient dimension
+        if X.shape[1] != self.pm.ambient_dim:
             raise ValueError(
                 f"Input data X's dimension ({X.shape[1]}) does not match "
-                f"the manifold's ambient dimension ({self.manifold.ambient_dim})."
+                f"the manifold's ambient dimension ({self.pm.ambient_dim})."
             )
 
         # Generate initial centers using the manifold's sample method
@@ -112,16 +112,15 @@ def _init_centers(self, X: Float[torch.Tensor, "n_points n_features"]) -> None:
 
         # For sampling initial centers, we want n_clusters distinct points.
         # The .sample() method typically takes a z_mean of shape (num_points_to_sample, ambient_dim).
-        # If we provide self.manifold.mu0 repeated n_clusters times,
+        # If we provide self.pm.mu0 repeated n_clusters times,
         # it samples n_clusters points, each around mu0.
-        means_for_sampling_centers = self.manifold.mu0.repeat(self.n_clusters, 1)
-        centers = self.manifold.sample(z_mean=means_for_sampling_centers)
+        centers = self.pm.sample(self.n_clusters)
 
         # IMPORTANT: Use self.manifold.manifold for ManifoldParameter,
         # as self.manifold is our wrapper and self.manifold.manifold is the geoopt object.
         self.mu_ = ManifoldParameter(
             centers.clone().detach(),  # type: ignore
-            manifold=self.manifold.manifold,
+            manifold=self.pm.manifold,
         )  # Ensure centers are detached
         self.mu_.requires_grad_(True)
 
@@ -150,22 +149,22 @@ def fit(self, X: Float[torch.Tensor, "n_points n_features"], y: None = None) ->
             X = torch.tensor(X, dtype=torch.get_default_dtype())
 
         # Ensure X is on the same device as the manifold
-        X = X.to(self.manifold.device)
+        X = X.to(self.pm.device)
 
-        if X.shape[1] != self.manifold.ambient_dim:
+        if X.shape[1] != self.pm.ambient_dim:
             raise ValueError(
                 f"Input data X's dimension ({X.shape[1]}) in fit() does not match "
-                f"the manifold's ambient dimension ({self.manifold.ambient_dim})."
+                f"the manifold's ambient dimension ({self.pm.ambient_dim})."
             )
 
         self._init_centers(X)
         m, tol = self.m, self.tol
         losses = []
         for i in range(self.max_iter):
             self.opt_.zero_grad()
-            # self.manifold.dist is implemented in manifolds.py and handles broadcasting
-            d = self.manifold.dist(X, self.mu_)  # X is (N,D), mu_ is (K,D) -> d is (N,K)
-            # Original RFK: d = self.manifold.dist(X.unsqueeze(1), self.mu_.unsqueeze(0))
+            # self.pm.dist is implemented in manifolds.py and handles broadcasting
+            d = self.pm.dist(X, self.mu_)  # X is (N,D), mu_ is (K,D) -> d is (N,K)
+            # Original RFK: d = self.pm.dist(X.unsqueeze(1), self.mu_.unsqueeze(0))
             # The .dist in manifolds.py uses X[:, None] and Y[None, :], so direct call should work if mu_ is (K,D)
 
             S = torch.sum(d.pow(-2 / (m - 1)) + 1e-8, dim=1)  # Add epsilon for stability
@@ -181,7 +180,7 @@ def fit(self, X: Float[torch.Tensor, "n_points n_features"], y: None = None) ->
         # save the result
         self.losses_ = np.array(losses)
         with torch.no_grad():  # Ensure no gradients are computed for final calculations
-            dfin = self.manifold.dist(X, self.mu_)  # Re-calculate dist to final centers
+            dfin = self.pm.dist(X, self.mu_)  # Re-calculate dist to final centers
             inv = dfin.pow(-2 / (m - 1)) + 1e-8  # Add epsilon
             u_final = inv / (inv.sum(dim=1, keepdim=True) + 1e-8)  # Add epsilon
         self.u_ = u_final.detach().cpu().numpy()
@@ -208,19 +207,19 @@ def predict(self, X: Float[torch.Tensor, "n_points n_features"]) -> Int[torch.Te
             X = torch.tensor(X, dtype=torch.get_default_dtype())
 
         # Ensure X is on the same device as the manifold
-        X = X.to(self.manifold.device)
+        X = X.to(self.pm.device)
 
-        if X.shape[1] != self.manifold.ambient_dim:
+        if X.shape[1] != self.pm.ambient_dim:
             raise ValueError(
                 f"Input data X's dimension ({X.shape[1]}) in predict() does not match "
-                f"the manifold's ambient dimension ({self.manifold.ambient_dim})."
+                f"the manifold's ambient dimension ({self.pm.ambient_dim})."
             )
 
         if not hasattr(self, "mu_") or self.mu_ is None:
             raise RuntimeError("The RFK model has not been fitted yet. Call 'fit' before 'predict'.")
 
         with torch.no_grad():
-            dmat = self.manifold.dist(X, self.mu_)  # X is (N,D), mu_ is (K,D) -> dmat is (N,K)
+            dmat = self.pm.dist(X, self.mu_)  # X is (N,D), mu_ is (K,D) -> dmat is (N,K)
             inv = dmat.pow(-2 / (self.m - 1)) + 1e-8  # Add epsilon
             u = inv / (inv.sum(dim=1, keepdim=True) + 1e-8)  # Add epsilon
             labels = torch.argmax(u, dim=1).cpu().numpy()
diff --git a/manify/curvature_estimation/greedy_method.py b/manify/curvature_estimation/greedy_method.py
@@ -20,6 +20,7 @@ def greedy_signature_selection(
     candidate_components: Iterable[tuple[float, int]] = ((-1.0, 2), (0.0, 2), (1.0, 2)),
     max_components: int = 3,
     pipeline: Callable[..., float] = distortion_pipeline,
+    verbose: bool = False,
     **kwargs: dict[str, Any],
 ) -> tuple[ProductManifold, list[float]]:
     r"""Greedily estimates an optimal product manifold signature.
@@ -32,6 +33,7 @@ def greedy_signature_selection(
         candidate_components: Candidate (curvature, dimension) pairs to consider.
         max_components: Maximum number of components to include.
         pipeline: Function that takes a ProductManifold, plus additional arguments, and returns a loss value.
+        verbose: If True, prints progress information.
         **kwargs: Additional keyword arguments to pass to the pipeline function.
 
     Returns:
@@ -44,25 +46,35 @@ def greedy_signature_selection(
     candidate_components_list = list(candidate_components)  # For type safe iteration
 
     # Greedy loop
-    for _ in range(max_components):
+    for i in range(max_components):
+        if verbose:
+            print(f"Iteration {i + 1}/{max_components}")
         best_loss, best_idx = current_loss, -1
 
         # Try each candidate
         for idx, comp in enumerate(candidate_components_list):
-            pm = ProductManifold(signature=signature + [comp])
+            if verbose:
+                print(f"  Trying component {comp} (index {idx})")
+            pm = ProductManifold(signature=signature.copy() + [comp])
             loss = pipeline(pm, **kwargs)
             if loss < best_loss:
                 best_loss, best_idx = loss, idx
 
         # If no improvement, stop
         if best_idx < 0:
+            if verbose:
+                print("No improvement found, stopping.")
             break
 
         # Otherwise accept that component
         signature.append(candidate_components_list[best_idx])
         current_loss = best_loss
         loss_history.append(current_loss)
+        if verbose:
+            print(f"  Accepted component {candidate_components_list[best_idx]} with loss {current_loss:.4f}")
+            print(f"  Current signature: {signature}")
+            print()
 
     # Return final manifold
-    optimal_pm = ProductManifold(signature=signature)
+    optimal_pm = ProductManifold(signature=signature.copy())
     return optimal_pm, loss_history
diff --git a/notebooks/Manify Tutorial.ipynb b/notebooks/Manify Tutorial.ipynb
diff --git a/tests/test_curvature_estimation.py b/tests/test_curvature_estimation.py
@@ -75,7 +75,7 @@ def test_delta_hyperbolicity():
 def test_greedy_method():
     # Get a very small subset of the polblogs dataset
     _, D, _, y = load_hf("polblogs")
-    D = D[:128, :128] / D.max()
+    D = D[:128, :128]
     y = y[:128]
     D = D / D.max()