Add embedders documentation

pchlenski · pchlenski · commit b31df35cdf80 · 2025-04-23T15:41:34.000-04:00
diff --git a/manify/embedders/__init__.py b/manify/embedders/__init__.py
@@ -1,3 +1,14 @@
+"""Tools for embedding data into Riemannian manifolds and product spaces.
+
+The embedders module provides various ways to embed data into manifolds of constant
+or mixed curvature. The module includes:
+
+* `coordinate_learning`: Direct optimization of coordinates in a product manifold.
+* `siamese`: Siamese network-based embedding for metric learning.
+* `vae`: Variational autoencoders for learning representations in product manifolds.
+* `_losses`: Loss functions for measuring embedding quality.
+"""
+
 import manify.embedders.coordinate_learning
 import manify.embedders.siamese
 import manify.embedders.vae
diff --git a/manify/embedders/_losses.py b/manify/embedders/_losses.py
@@ -1,4 +1,9 @@
-"""Implementation of different measurement metrics"""
+"""Implementation of metrics and loss functions for evaluating embedding quality.
+
+This module provides various functions to measure the quality of embeddings
+in Riemannian manifolds, including distortion metrics, average distance error,
+and other evaluation measures for both graph and general embedding tasks.
+"""
 
 from __future__ import annotations
 
@@ -16,19 +21,26 @@ def distortion_loss(
     D_true: Float[torch.Tensor, "n_points n_points"],
     pairwise: bool = False,
 ) -> Float[torch.Tensor, ""]:
-    """Compute the distortion loss between estimated SQUARED distances and true SQUARED distances.
+    r"""Computes the distortion loss between estimated and true squared distances.
+
+    The distortion loss measures how well the pairwise distances in the embedding space match the true distances. It is
+    calculated as
+
+    $$\sum_{i,j} \left(\left(\frac{D_{\text{est}}(i,j)}{D_{\text{true}}(i,j)}\right)^2 - 1\right),$$
+
+    where the sum is over all pairs of points (or just unique pairs if `pairwise=True`).
 
     Args:
-        D_est: A tensor of estimated pairwise distances.
-        D_true: A tensor of true pairwise distances.
-        pairwise: A boolean indicating whether to return whether D_est and D_true are pairwise
+        D_est: Tensor of estimated pairwise squared distances.
+        D_true: Tensor of true pairwise squared distances.
+        pairwise: Whether to consider only unique pairs (upper triangular part of the matrices). Defaults to False.
 
     Returns:
-        float: A float indicating the distortion loss, calculated as the sum of the squared relative
-            errors between the estimated and true squared distances.
+        loss: Scalar tensor representing the distortion loss.
 
-    See also: `square_loss` in HazyResearch hyperbolics repo:
-    https://github.com/HazyResearch/hyperbolics/blob/master/pytorch/hyperbolic_models.py#L178
+    Note:
+        This is similar to the `square_loss` in HazyResearch hyperbolics repository:
+        https://github.com/HazyResearch/hyperbolics/blob/master/pytorch/hyperbolic_models.py#L178
     """
 
     # Turn into flat vectors of pairwise distances. For pairwise distances, we only consider the upper triangle.
@@ -54,15 +66,22 @@ def d_avg(
     D_true: Float[torch.Tensor, "n_points n_points"],
     pairwise: bool = False,
 ) -> Float[torch.Tensor, ""]:
-    """Average distance error D_av
+    r"""Computes the average relative distance error (D_avg).
+
+    The average distance error is the mean relative error between the estimated and true distances:
+
+    $$D_{\text{avg}} = \frac{1}{N} \sum_{i,j} \frac{|D_{\text{est}}(i,j) - D_{\text{true}}(i,j)|}{D_{\text{true}}(i,j)},$$
+
+    where $N$ is the number of distances being considered. This metric provides a normalized measure of how accurately
+    the embedding preserves the original distances.
+
     Args:
-        D_est (n_points, n_points): A tensor of estimated pairwise distances.
-        D_true (n_points, n_points).: A tensor of true pairwise distances.
-        pairwise (bool): A boolean indicating whether to return whether D_est and D_true are pairwise
+        D_est: Tensor of estimated pairwise distances.
+        D_true: Tensor of true pairwise distances.
+        pairwise: Whether to consider only unique pairs (upper triangular part of the matrices). Defaults to False.
 
     Returns:
-        float: A float indicating the average distance error D_avg, calculated as the
-        mean relative error across all pairwise distances.
+        d_avg: Scalar tensor representing the average relative distance error.
     """
 
     if pairwise:
@@ -84,22 +103,41 @@ def d_avg(
 
 
 def mean_average_precision(x_embed: Float[torch.Tensor, "n_points n_dim"], graph: nx.Graph) -> Float[torch.Tensor, ""]:
-    """Mean averae precision (mAP) from the Gu et al paper."""
+    r"""Computes the mean average precision (mAP) for graph embedding evaluation.
+
+    This metric is used to evaluate how well an embedding preserves the neighborhood structure of a graph, as described
+    in Gu et al. (2019): "Learning Mixed-Curvature Representations in Product Spaces".
+
+    Args:
+        x_embed: Tensor containing the embeddings of the graph nodes.
+        graph: NetworkX graph representing the original graph structure.
+
+    Returns:
+        mAP: Mean average precision score.
+
+    Note:
+        This function is currently not implemented.
+    """
     raise NotImplementedError
 
 
 def dist_component_by_manifold(pm: ProductManifold, x_embed: Float[torch.Tensor, "n_points n_dim"]) -> List[float]:
-    """
-    Compute the variance in pairwise distances explained by each manifold component.
+    r"""Computes the proportion of variance in pairwise distances explained by each manifold component.
+
+    The contribution is calculated as the ratio of the sum of squared distances in each component to the total squared
+    distance:
+
+    $$\text{contribution}_k = \frac{\sum_{i<j} D^2_k(x_i, x_j)}{\sum_{i<j} D^2_{\text{total}}(x_i, x_j)}$$
+
+    where $D^2_k$ is the squared distance in the $k$-th manifold component.
 
     Args:
-        pm: The product manifold.
-        x_embed (n_points, n_dim): A tensor of embeddings.
+        pm: The product manifold containing multiple component manifolds.
+        x_embed: Tensor of embeddings in the product manifold.
 
     Returns:
-        List[float]: A list of proportions, where each value represents the fraction
-                     of total distance variance explained by the corresponding
-                     manifold component.
+        contributions: List of proportions, where each value represents the fraction of total distance variance
+        explained by the corresponding manifold component.
     """
     sq_dists_by_manifold = [M.pdist2(x_embed[:, pm.man2dim[i]]) for i, M in enumerate(pm.P)]
     total_sq_dist = pm.pdist2(x_embed)
diff --git a/manify/embedders/coordinate_learning.py b/manify/embedders/coordinate_learning.py
@@ -1,4 +1,10 @@
-"""Implementation for coordinate training and optimization"""
+"""Implementation for direct coordinate optimization in Riemannian manifolds.
+
+This module provides functions for learning optimal embeddings in product manifolds by directly optimizing the
+coordinates using Riemannian optimization. This approach is particularly useful for embedding graphs using metric learning
+to maintain pairwise distances in the target space. The optimization is performed using Riemannian gradient descent
+with support for non-transductive training, in which gradients from the test set to the training set are masked out.
+"""
 
 from __future__ import annotations
 
@@ -33,25 +39,44 @@ def train_coords(
     loss_window_size: int = 100,
     logging_interval: int = 10,
 ) -> Tuple[Float[torch.Tensor, "n_points n_dim"], Dict[str, List[float]]]:
-    """
-    Coordinate training and optimization
+    r"""Trains point coordinates in a product manifold to match target distances.
+
+    This function optimizes the coordinates of points in a product manifold to match a given distance matrix. The
+    optimization is performed in two phases:
+
+    1. Burn-in phase: Initial optimization with a smaller learning rate to find a good starting configuration.
+    2. Training phase: Fine-tuning of the coordinates with a larger learning rate, and optionally optimizing the scale
+        factors (curvatures) of the manifold components.
+
+    The optimization uses Riemannian Adam optimizer to respect the manifold structure during gradient updates. The loss
+    is computed based on the distortion between the pairwise distances in the embedding and the target distances.
+
+    For non-transductive settings, the function supports split between training and testing points, optimizing different
+    combinations of distances (train-train, test-test, train-test).
 
     Args:
-        pm: ProductManifold object that encapsulates the manifold and its signature.
-        dists: (n_points, n_points) Tensor representing the pairwise distance matrix between points.
-        test_indices: (n_test) Tensor representing the indices of the test points.
-        device: Device for training (default: "cpu").
-        burn_in_learning_rate: Learning rate during the burn-in phase (default: 1e-3).
-        burn_in_iterations: Number of iterations during the burn-in phase (default: 2,000).
-        learning_rate: Learning rate during the training phase (default: 1e-2).
-        scale_factor_learning_rate: Learning rate for scale factor optimization (default: 0.0).
-        training_iterations: Number of iterations for the training phase (default: 18,000).
-        loss_window_size: Window size for computing the moving average of the loss (default: 100).
-        logging_interval: Interval for logging the training progress (default: 10).
+        pm: ProductManifold object specifying the target manifold structure.
+        dists: Tensor representing the target pairwise distance matrix between points.
+        test_indices: Tensor containing indices of test points for transductive learning.
+            Defaults to an empty tensor (all points are used for training).
+        device: Device for tensor computations. Defaults to "cpu".
+        burn_in_learning_rate: Learning rate for the burn-in phase. Defaults to 1e-3.
+        burn_in_iterations: Number of iterations for the burn-in phase. Defaults to 2,000.
+        learning_rate: Learning rate for the main training phase. Defaults to 1e-2.
+        scale_factor_learning_rate: Learning rate for optimizing manifold scale factors.
+            Defaults to 0.0 (no optimization of curvatures).
+        training_iterations: Number of iterations for the main training phase. Defaults to 18,000.
+        loss_window_size: Window size for computing moving average loss. Defaults to 100.
+        logging_interval: Interval for logging training progress. Defaults to 10.
 
     Returns:
-        pm.x_embed: Tensor of the final learned coordinates in the manifold.
-        losses: List of loss values at each iteration during training.
+        embeddings: Tensor of shape (n_points, n_dim) with optimized coordinates in the manifold.
+        losses: Dictionary containing loss histories for different components:
+
+            * 'train_train': Loss between training points
+            * 'test_test': Loss between test points (if test_indices is provided)
+            * 'train_test': Loss between training and test points (if test_indices is provided)
+            * 'total': Sum of all loss components
     """
     # Move everything to the device
     n = dists.shape[0]
diff --git a/manify/embedders/siamese.py b/manify/embedders/siamese.py
@@ -1,4 +1,12 @@
-"""Siamese network embedder"""
+"""Siamese network implementation for manifold embedding.
+
+This module provides a Siamese network architecture that can be used for embedding data into product manifolds. Siamese
+networks are particularly useful for metric learning tasks, where the goal is to learn a distance-preserving embedding,
+while also encoding a set of features.
+
+The SiameseNetwork class supports both encoding (embedding) data into a manifold space and optionally decoding
+(reconstructing) from the embedding space back to the original data space.
+"""
 
 from __future__ import annotations
 
@@ -11,6 +19,29 @@
 
 
 class SiameseNetwork(torch.nn.Module):
+    """Siamese network for embedding data into a product manifold space.
+
+    A Siamese network consists of an encoder network that maps input data to a latent representation in a product
+    manifold, and optionally a decoder network that maps the latent representation back to the original feature space.
+
+    Attributes:
+        pm: The product manifold object defining the embedding space.
+        encoder: Neural network module that maps input data to the embedding space.
+        decoder: Optional neural network module for reconstructing input data from embeddings.
+        reconstruction_loss: Loss function for measuring reconstruction quality.
+
+    Args:
+        pm: Product manifold object defining the target embedding space.
+        encoder: Neural network module that maps inputs to the embedding space.
+        decoder: Optional neural network module that maps embeddings back to input space.
+            If None, a no-op identity module is used. Defaults to None.
+        reconstruction_loss: Type of reconstruction loss to use.
+            Currently only "mse" (mean squared error) is supported. Defaults to "mse".
+
+    Raises:
+        ValueError: If an unsupported reconstruction_loss is specified.
+    """
+
     def __init__(
         self,
         pm: ProductManifold,
@@ -35,23 +66,28 @@ def __init__(
             raise ValueError(f"Unknown reconstruction loss: {reconstruction_loss}")
 
     def encode(self, x: Float[torch.Tensor, "batch_size n_features"]) -> Float[torch.Tensor, "batch_size n_latent"]:
-        """Encodes the input tensor into a latent representation.
+        """Encodes input data into the manifold embedding space.
+
+        Takes a batch of input data and passes it through the encoder network to obtain embeddings in the manifold.
 
         Args:
-            x (TensorType["batch_size", "n_features"]): The input tensor.
+            x: Input data tensor..
 
         Returns:
-            TensorType["batch_size", "n_latent"]: The encoded latent representation.
+            embeddings: Tensor containing the embeddings in the manifold space.
         """
         return self.encoder(x)
 
     def decode(self, z: Float[torch.Tensor, "batch_size n_latent"]) -> Float[torch.Tensor, "batch_size n_features"]:
-        """Decodes the latent representation back to the input space.
+        """Decodes manifold embeddings back to the original input space.
+
+        Takes a batch of embeddings from the manifold space and passes them through
+        the decoder network to reconstruct the original input data.
 
         Args:
-            z (TensorType["batch_size", "n_latent"]): The latent representation.
+            z: Embedding tensor from the manifold space.
 
         Returns:
-            TensorType["batch_size", "n_features"]: The reconstructed input tensor.
+            reconstructed: Tensor containing the reconstructed input data.
         """
         return self.decoder(z)
diff --git a/manify/embedders/vae.py b/manify/embedders/vae.py
diff --git a/manify/manifolds.py b/manify/manifolds.py