pchlenski
diff --git a/‎manify/curvature_estimation/delta_hyperbolicity.py‎
Lines changed: 30 additions & 30 deletions b/‎manify/curvature_estimation/delta_hyperbolicity.py‎
Lines changed: 30 additions & 30 deletions
diff --git a/‎manify/curvature_estimation/greedy_method.py‎
Lines changed: 3 additions & 7 deletions b/‎manify/curvature_estimation/greedy_method.py‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎manify/manifolds.py‎
Lines changed: 22 additions & 28 deletions b/‎manify/manifolds.py‎
Lines changed: 22 additions & 28 deletions
@@ -4,10 +4,13 @@
 
 import torch
 from jaxtyping import Float
+from typing import Tuple
 
 
-def sampled_delta_hyperbolicity(dismat, n_samples=1000, reference_idx=0):
-    n = dismat.shape[0]
+def sampled_delta_hyperbolicity(
+    D: Float[torch.Tensor, "n_points n_points"], n_samples: int = 1000, reference_idx: int = 0
+) -> Tuple[Float[torch.Tensor, "n_samples,"], Float[torch.Tensor, "n_samples 3"]]:
+    n = D.shape[0]
     # Sample n_samples triplets of points randomly
     indices = torch.randint(0, n, (n_samples, 3))
 
@@ -17,31 +20,31 @@ def sampled_delta_hyperbolicity(dismat, n_samples=1000, reference_idx=0):
     x, y, z = indices.T
     w = reference_idx  # set reference point
 
-    xy_w = 0.5 * (dismat[w, x] + dismat[w, y] - dismat[x, y])
-    xz_w = 0.5 * (dismat[w, x] + dismat[w, z] - dismat[x, z])
-    yz_w = 0.5 * (dismat[w, y] + dismat[w, z] - dismat[y, z])
+    xy_w = 0.5 * (D[w, x] + D[w, y] - D[x, y])
+    xz_w = 0.5 * (D[w, x] + D[w, z] - D[x, z])
+    yz_w = 0.5 * (D[w, y] + D[w, z] - D[y, z])
 
     # delta(x,y,z) = min((x,y)_w,(y-z)_w) - (x,z)_w
     deltas = torch.minimum(xy_w, yz_w) - xz_w
-    diam = torch.max(dismat)
+    diam = torch.max(D)
     rel_deltas = 2 * deltas / diam
 
     return rel_deltas, indices
 
 
 def iterative_delta_hyperbolicity(
-    dismat: Float[torch.Tensor, "n_points n_points"],
+    D: Float[torch.Tensor, "n_points n_points"], reference_idx: int = 0
 ) -> Float[torch.Tensor, "n_points n_points n_points"]:
     """delta(x,y,z) = min((x,y)_w,(y-z)_w) - (x,z)_w"""
-    n = dismat.shape[0]
-    w = 0
+    n = D.shape[0]
+    w = reference_idx
     gromov_products = torch.zeros((n, n))
     deltas = torch.zeros((n, n, n))
 
     # Get Gromov Products
     for x in range(n):
         for y in range(n):
-            gromov_products[x, y] = gromov_product(w, x, y, dismat)
+            gromov_products[x, y] = gromov_product(w, x, y, D)
 
     # Get Deltas
     for x in range(n):
@@ -52,55 +55,52 @@ def iterative_delta_hyperbolicity(
                 yz_w = gromov_products[y, z]
                 deltas[x, y, z] = torch.minimum(xy_w, yz_w) - xz_w
 
-    diam = torch.max(dismat)
+    diam = torch.max(D)
     rel_deltas = 2 * deltas / diam
 
     return rel_deltas, gromov_products
 
 
-def gromov_product(i: int, j: int, k: int, dismat: Float[torch.Tensor, "n_points n_points"]) -> float:
+def gromov_product(i: int, j: int, k: int, D: Float[torch.Tensor, "n_points n_points"]) -> float:
     """(j,k)_i = 0.5 (d(i,j) + d(i,k) - d(j,k))"""
-    d_ij = dismat[i, j]
-    d_ik = dismat[i, k]
-    d_jk = dismat[j, k]
-    return 0.5 * (d_ij + d_ik - d_jk)
+    return float(0.5 * (D[i, j] + D[i, k] - D[j, k]))
 
 
 def delta_hyperbolicity(
-    dismat: Float[torch.Tensor, "n_points n_points"], relative=True, full=False
+    D: Float[torch.Tensor, "n_points n_points"], reference_idx: int = 0, relative: bool = True, full: bool = False
 ) -> Float[torch.Tensor, ""]:
     """
     Compute the delta-hyperbolicity of a metric space.
 
     Args:
-        dismat: Distance matrix of the metric space.
+        D: Distance matrix of the metric space.
         relative: Whether to return the relative delta-hyperbolicity.
         full: Whether to return the full delta tensor or just the maximum delta.
 
     Returns:
         delta: Delta-hyperbolicity of the metric space.
     """
 
-    n = dismat.shape[0]
-    p = 0
+    n = D.shape[0]
+    w = reference_idx
 
-    row = dismat[p, :].unsqueeze(0)  # (1,N)
-    col = dismat[:, p].unsqueeze(1)  # (N,1)
-    XY_p = 0.5 * (row + col - dismat)
+    row = D[w, :].unsqueeze(0)  # (1,N)
+    col = D[:, w].unsqueeze(1)  # (N,1)
+    XY_w = 0.5 * (row + col - D)
 
-    XY_p_xy = XY_p.unsqueeze(2).expand(-1, -1, n)  # (n,n,n)
-    XY_p_yz = XY_p.unsqueeze(0).expand(n, -1, -1)  # (n,n,n)
-    XY_p_xz = XY_p.unsqueeze(1).expand(-1, n, -1)  # (n,n,n)
+    XY_w_xy = XY_w.unsqueeze(2).expand(-1, -1, n)  # (n,n,n)
+    XY_w_yz = XY_w.unsqueeze(0).expand(n, -1, -1)  # (n,n,n)
+    XY_w_xz = XY_w.unsqueeze(1).expand(-1, n, -1)  # (n,n,n)
 
-    out = torch.minimum(XY_p_xy, XY_p_yz)
+    out = torch.minimum(XY_w_xy, XY_w_yz)
 
     if not full:
-        delta = (out - XY_p_xz).max().item()
+        delta = (out - XY_w_xz).max().item()
     else:
-        delta = out - XY_p_xz
+        delta = out - XY_w_xz
 
     if relative:
-        diam = torch.max(dismat).item()
+        diam = torch.max(D).item()
         delta = 2 * delta / diam
 
     return delta
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Tuple
+from typing import Tuple, Any
 
 import torch
 
@@ -12,12 +12,8 @@
 def greedy_curvature_method(
     pm: ProductManifold,
     dists: torch.Tensor,
-    candidate_components: Tuple[Tuple[float, int], ...] = (
-        (-1.0, 2),
-        (0.0, 2),
-        (1.0, 2),
-    ),
+    candidate_components: Tuple[Tuple[float, int], ...] = ((-1.0, 2), (0.0, 2), (1.0, 2)),
     max_components: int = 3,
-):
+) -> Any:
     """The greedy curvature estimation method from Tabaghi et al. at https://arxiv.org/pdf/2102.10204"""
     raise NotImplementedError
@@ -196,10 +196,7 @@ def sample(
         sigma: Optional[Float[torch.Tensor, "n_points n_dim n_dim"]] = None,
     ) -> Union[
         Float[torch.Tensor, "n_points n_ambient_dim"],
-        Tuple[
-            Float[torch.Tensor, "n_points n_ambient_dim"],
-            Float[torch.Tensor, "n_points n_dim"],
-        ],
+        Tuple[Float[torch.Tensor, "n_points n_ambient_dim"], Float[torch.Tensor, "n_points n_dim"]],
     ]:
         """
         Sample from the variational distribution.
@@ -228,7 +225,7 @@ def sample(
         N = torch.distributions.MultivariateNormal(
             loc=torch.zeros((n, self.dim), device=self.device), covariance_matrix=sigma
         )
-        v = N.sample()  # type: ignore
+        v = N.sample()
 
         # Don't need to adjust normal vectors for the Scaled manifold class in geoopt - very cool!
 
@@ -356,14 +353,14 @@ def stereographic(self, *points: Float[torch.Tensor, "n_points n_dim"]) -> Tuple
 
         if self.is_stereographic:
             print("Manifold is already in stereographic coordinates.")
-            return self, *points  # type: ignore
+            return self, *points
 
         # Convert manifold
         stereo_manifold = Manifold(self.curvature, self.dim, device=self.device, stereographic=True)
 
         # Euclidean edge case
         if self.type == "E":
-            return stereo_manifold, *points  # type: ignore
+            return stereo_manifold, *points
 
         # Convert points
         num = [X[:, 1:] for X in points]
@@ -373,7 +370,7 @@ def stereographic(self, *points: Float[torch.Tensor, "n_points n_dim"]) -> Tuple
         stereo_points = [n / d for n, d in zip(num, denom)]
         assert all([stereo_manifold.manifold.check_point(X) for X in stereo_points])
 
-        return stereo_manifold, *stereo_points  # type: ignore
+        return stereo_manifold, *stereo_points
 
     def inverse_stereographic(self, *points: Float[torch.Tensor, "n_points n_dim_stereo"]) -> Tuple["Manifold", ...]:
         """
@@ -389,14 +386,14 @@ def inverse_stereographic(self, *points: Float[torch.Tensor, "n_points n_dim_ste
         """
         if not self.is_stereographic:
             print("Manifold is already in original coordinates.")
-            return self, *points  # type: ignore
+            return self, *points
 
         # Convert manifold
         orig_manifold = Manifold(self.curvature, self.dim, device=self.device, stereographic=False)
 
         # Euclidean edge case
         if self.type == "E":
-            return orig_manifold, *points  # type: ignore
+            return orig_manifold, *points
 
         # Inverse projection for points
         out = []
@@ -427,7 +424,7 @@ def inverse_stereographic(self, *points: Float[torch.Tensor, "n_points n_dim_ste
 
             out.append(inv_points)
 
-        return orig_manifold, *out  # type: ignore
+        return orig_manifold, *out
 
     def apply(self, f: Callable) -> Callable:
         """
@@ -475,11 +472,11 @@ def __init__(self, signature: List[Tuple[float, int]], device: str = "cpu", ster
         # Actually initialize the geoopt manifolds; other derived properties
         self.P = [Manifold(curvature, dim, device=device, stereographic=stereographic) for curvature, dim in signature]
         manifold_class = geoopt.StereographicProductManifold if stereographic else geoopt.ProductManifold
-        self.manifold = manifold_class(*[(M.manifold, M.ambient_dim) for M in self.P]).to(device)  # type: ignore
+        self.manifold = manifold_class(*[(M.manifold, M.ambient_dim) for M in self.P]).to(device)
         self.name = " x ".join([M.name for M in self.P])
 
         # Origin
-        self.mu0 = torch.cat([M.mu0 for M in self.P], axis=1).to(self.device)  # type: ignore
+        self.mu0 = torch.cat([M.mu0 for M in self.P], axis=1).to(self.device)
 
         # Manifold <-> Dimension mapping
         self.ambient_dim, self.n_manifolds, self.dim = 0, 0, 0
@@ -507,11 +504,11 @@ def __init__(self, signature: List[Tuple[float, int]], device: str = "cpu", ster
             for j, k in zip(intrinsic_dims, ambient_dims[-len(intrinsic_dims) :]):
                 self.projection_matrix[j, k] = 1.0
 
-    def params(self):
+    def params(self) -> List[float]:
         """Returns scales for all component manifolds"""
         return [x.scale() for x in self.manifold.manifolds]
 
-    def to(self, device: str):
+    def to(self, device: str) -> "ProductManifold":
         """Move all components to a new device"""
         self.device = device
         self.P = [M.to(device) for M in self.P]
@@ -628,24 +625,23 @@ def log_likelihood(
             M.log_likelihood(z_M, mu_M, sigma_M).unsqueeze(dim=1)
             for M, z_M, mu_M, sigma_M in zip(self.P, z_factorized, mu_factorized, sigma_factorized)
         ]
-        return torch.cat(component_lls, axis=1).sum(axis=1)  # type: ignore
+        return torch.cat(component_lls, axis=1).sum(axis=1)
 
     def stereographic(self, *points: Float[torch.Tensor, "n_points n_dim"]) -> Tuple["ProductManifold", ...]:
         if self.is_stereographic:
             print("Manifold is already in stereographic coordinates.")
-            return self, *points  # type: ignore
+            return self, *points
 
         # Convert manifold
         stereo_manifold = ProductManifold(self.signature, device=self.device, stereographic=True)
 
         # Convert points
         stereo_points = [
-            torch.hstack([M.stereographic(x)[1] for x, M in zip(self.factorize(X), self.P)])  # type: ignore
-            for X in points
+            torch.hstack([M.stereographic(x)[1] for x, M in zip(self.factorize(X), self.P)]) for X in points
         ]
         assert all([stereo_manifold.manifold.check_point(X) for X in stereo_points])
 
-        return stereo_manifold, *stereo_points  # type: ignore
+        return stereo_manifold, *stereo_points
 
     def inverse_stereographic(self, *points: Float[torch.Tensor, "n_points n_dim_stereo"]) -> Tuple[Manifold]:
         if not self.is_stereographic:
@@ -660,9 +656,9 @@ def inverse_stereographic(self, *points: Float[torch.Tensor, "n_points n_dim_ste
         ]
         assert all([orig_manifold.manifold.check_point(X) for X in orig_points])
 
-        return orig_manifold, *orig_points  # type: ignore
+        return orig_manifold, *orig_points
 
-    @torch.no_grad()
+    @torch.no_grad()  # type: ignore
     def gaussian_mixture(
         self,
         num_points: int = 1_000,
@@ -713,7 +709,7 @@ def gaussian_mixture(
             z_mean=torch.stack([self.mu0] * num_clusters),
             sigma_factorized=[torch.stack([torch.eye(M.dim)] * num_clusters) * cov_scale_means for M in self.P],
         )
-        assert cluster_means.shape == (num_clusters, self.ambient_dim)  # type: ignore
+        assert cluster_means.shape == (num_clusters, self.ambient_dim)
 
         # Generate class assignments
         cluster_probs = torch.rand(num_clusters)
@@ -726,10 +722,8 @@ def gaussian_mixture(
 
         # Generate covariance matrices for each class - Wishart distribution
         cov_matrices = [
-            torch.distributions.Wishart(
-                df=M.dim + 1, covariance_matrix=torch.eye(M.dim) * cov_scale_points  # type: ignore
-            ).sample(
-                sample_shape=(num_clusters,)  # type: ignore
+            torch.distributions.Wishart(df=M.dim + 1, covariance_matrix=torch.eye(M.dim) * cov_scale_points).sample(
+                sample_shape=(num_clusters,)
             )
             + torch.eye(M.dim) * 1e-5  # jitter to avoid singularity
             for M in self.P
@@ -764,7 +758,7 @@ def gaussian_mixture(
 
             # Noise component
             N = torch.distributions.Normal(0, regression_noise_std)
-            v = N.sample((num_points,)).to(self.device)  # type: ignore
+            v = N.sample((num_points,)).to(self.device)
             labels += v
 
             # Normalize regression labels to range [0, 1] so that RMSE can be more easily interpreted