Update documentation

pchlenski · pchlenski · commit bfb80071be5b · 2025-04-23T15:13:46.000-04:00
diff --git a/docs/api.md b/docs/api.md
diff --git a/docs/gen_ref_pages.py b/docs/gen_ref_pages.py
@@ -66,10 +66,13 @@
 with mkdocs_gen_files.open(f"{doc_dir}/index.md", "w") as fd:
     fd.write("# API Reference\n\n")
     fd.write("## Overview\n\n")
+
+    # Only show modules, not individual classes
     fd.write(f"::: {src_dir}\n")
     fd.write("    options:\n")
     fd.write("        show_category_heading: false\n")
     fd.write("        members_order: source\n")
     fd.write("        filters: ['!^_', '!^Parameters$']\n")
     fd.write("        show_root_heading: false\n")
     fd.write("        heading_level: 3\n")
+    fd.write("        members: false\n")  # This line prevents showing class members
diff --git a/docs/index.md b/docs/index.md
@@ -1,5 +1,3 @@
 # Welcome to Manify
 
-A library for geometric ML with manifold-based methods.
-
-- 📚 API Reference: [API](api.md)
+A Python Library for Learning Non-Euclidean Representations
diff --git a/manify/embedders/_losses.py b/manify/embedders/_losses.py
@@ -16,18 +16,18 @@ def distortion_loss(
     D_true: Float[torch.Tensor, "n_points n_points"],
     pairwise: bool = False,
 ) -> Float[torch.Tensor, ""]:
-    """
-    Compute the distortion loss between estimated SQUARED distances and true SQUARED distances.
+    """Compute the distortion loss between estimated SQUARED distances and true SQUARED distances.
+
     Args:
-        D_est (n_points, n_points): A tensor of estimated pairwise distances.
-        D_true (n_points, n_points).: A tensor of true pairwise distances.
-        pairwise (bool): A boolean indicating whether to return whether D_est and D_true are pairwise
+        D_est: A tensor of estimated pairwise distances.
+        D_true: A tensor of true pairwise distances.
+        pairwise: A boolean indicating whether to return whether D_est and D_true are pairwise
 
     Returns:
         float: A float indicating the distortion loss, calculated as the sum of the squared relative
-         errors between the estimated and true squared distances.
+            errors between the estimated and true squared distances.
 
-    See also: square_loss in HazyResearch hyperbolics repo:
+    See also: `square_loss` in HazyResearch hyperbolics repo:
     https://github.com/HazyResearch/hyperbolics/blob/master/pytorch/hyperbolic_models.py#L178
     """
 
diff --git a/manify/manifolds.py b/manify/manifolds.py
diff --git a/manify/predictors/_kernel.py b/manify/predictors/_kernel.py
@@ -20,13 +20,13 @@ def compute_kernel_and_norm_manifold(
 
     Args:
         manifold: The manifold in which the computation occurs.
-        X_source((n_points_source, n_dim)): A tensor of the source points
-        X_target("n_points_target", "n_dim"): A tensor of target points
+        X_source: A tensor of the source points
+        X_target: A tensor of target points
 
     Return:
-        Tuple("n_points_source", "n_points_target"): A tuple of two tensors. The first tensor
-        is the kernel matrix of shape computed based on the manifold type. And the second tensor
-        A scalar normalization constant for the kernel, determined by the manifold's curvature or scale.
+        A tuple of two tensors. The first tensor is the kernel matrix of shape computed based on the manifold type.
+        And the second tensor is a scalar normalization constant for the kernel, determined by the manifold's curvature
+        or scale.
     """
     if X_target is None:
         X_target = X_source
@@ -72,13 +72,13 @@ def product_kernel(
 
     Args:
         pm: The product manifold in which the computation occurs.
-        X_source((n_points_source, n_dim)): A tensor of the source points
-        X_target("n_points_target", "n_dim"): A tensor of target points
+        X_source: A tensor of the source points
+        X_target: A tensor of target points
 
     Returns:
-        Tuple("n_points_source", "n_points_target"): A tuple of two tensors. The first tensor is the
-        kernel matrix of shape computed based on the product manifold type. And the second tensor is a
-        scalar normalization constant for the kernel, determined by the product manifold's curvature or scale.
+        A tuple of two tensors. The first tensor is the kernel matrix of shape computed based on the product manifold
+        type. And the second tensor is a scalar normalization constant for the kernel, determined by the product
+        manifold's curvature or scale.
     """
     # If X_target is None, set it to X_source
     if X_target is None:
diff --git a/manify/predictors/_midpoint.py b/manify/predictors/_midpoint.py
@@ -16,7 +16,7 @@ def hyperbolic_midpoint(u: float, v: float, assert_hyperbolic: bool = False) ->
         u: The first angular coordinate.
         v: The second angular coordinate.
         assert_hyperbolic: A boolean value. If True, verifies that the midpoint satisfies the hyperbolic
-        distance property. Defaults to False.
+            distance property. Defaults to False.
 
     Returns:
         torch.Tensor: The computed hyperbolic midpoint between u and v.
@@ -81,22 +81,20 @@ def midpoint(
     manifold: Manifold,
     special_first: bool = False,
 ) -> Float[torch.Tensor, ""]:
-    """
-    Driver code to compute the midpoint between two angular coordinates give the manifold type.
+    """Compute the midpoint between two angular coordinates given the manifold type.
 
     This function automatically selects the appropriate midpoint calculation depending
     on the manifold type. It supports hyperbolic, Euclidean, and spherical geometries.
 
     Args:
         u: The first angular coordinate.
         v: The second angular coordinate.
-        manifold (Manifold): An object representing the manifold type.
-        special_first (bool, optional): If True, uses the manifold-specific midpoint
-        calculations given the manifold type of hyperbolic or euclidean. Defaults to False.
+        manifold: An object representing the manifold type.
+        special_first: If True, uses the manifold-specific midpoint calculations given the manifold type of hyperbolic
+            or euclidean. Defaults to False.
 
     Returns:
         torch.Tensor: The computed midpoint between u and v, based on the selected geometry.
-
     """
     if torch.isclose(u, v):
         return u
diff --git a/manify/predictors/kappa_gcn.py b/manify/predictors/kappa_gcn.py
@@ -377,8 +377,7 @@ def fit(
         Args:
             X (torch.Tensor): Feature matrix.
             y (torch.Tensor): Labels for training nodes.
-            adj (torch.Tensor): Adjacency or distance matrix.
-            train_idx (torch.Tensor): Indices of nodes for training.
+            A (torch.Tensor): Adjacency or distance matrix.
             epochs: Number of training epochs (default=200).
             lr: Learning rate (default=1e-2).
             use_tqdm: Whether to use tqdm for progress bar.
diff --git a/manify/utils/benchmarks.py b/manify/utils/benchmarks.py
@@ -103,39 +103,65 @@ def benchmark(
     lp_train_idx: Optional[Float[torch.Tensor, "n_samples,"]] = None,
     lp_test_idx: Optional[Float[torch.Tensor, "n_samples,"]] = None,
 ) -> Dict[str, float]:
-    """
-    Benchmarks various machine learning models on a dataset using a product manifold structure.
+    """Benchmarks various machine learning models on Riemannian manifold datasets.
+
+    Evaluates and compares different machine learning models on datasets with a
+    product manifold structure, providing metrics for their performance.
 
     Args:
-        X (batch, dim): Input tensor of features
-        y (batch,): Input tensor of labels.
-        pm: The defined product manifold for benchmarks.
-        split: Data splitting strategy ('train_test' or 'cross_val').
-        device: Device for computation ('cpu', 'cuda', 'mps').
-        score: Scoring metric for model evaluation ('accuracy', 'f1-micro', etc.).
+        X: Tensor of input features with shape (batch, dim).
+        y: Tensor of target labels with shape (batch,).
+        pm: ProductManifold object defining the geometric structure for benchmarks.
+        device: Device for computation. Options: 'cpu', 'cuda', 'mps'. Defaults to 'cpu'.
+        score: List of scoring metrics for model evaluation (e.g., 'accuracy', 'f1-micro').
+            Defaults to None.
         models: List of model names to evaluate. Options include:
-            * "sklearn_dt": Decision tree from scikit-learn.
-            * "sklearn_rf": Random forest from scikit-learn.
-            * "product_dt": Product space decision tree.
-            * "product_rf": Product space random forest.
-            * "tangent_dt": Decision tree on tangent space.
-            * "tangent_rf": Random forest on tangent space.
-            * "knn": k-nearest neighbors.
-            * "ps_perceptron": Product space perceptron.
-        max_depth: Maximum depth of tree-based models in integer.
-        n_estimators: Integer number of estimators for random forest models.
-        min_samples_split: Minimum number of samples required to split an internal node.
-        min_samples_leaf: Minimum number of samples in a leaf node.
-        task: Task type ('classification' or 'regression').
-        seed: Random seed for reproducibility.
-        use_special_dims: Boolean for whether to use special manifold dimensions.
-        n_features: Feature dimensionality type ('d' or 'd_choose_2').
-        X_train, X_test, y_train, y_test: Training and testing datasets, X: feature, y: label.
-        batch_size: Batch size for certain models.
+            * "sklearn_dt": Decision tree from scikit-learn
+            * "sklearn_rf": Random forest from scikit-learn
+            * "product_dt": Product space decision tree
+            * "product_rf": Product space random forest
+            * "tangent_dt": Decision tree on tangent space
+            * "tangent_rf": Random forest on tangent space
+            * "knn": k-nearest neighbors
+            * "ps_perceptron": Product space perceptron
+            Defaults to None.
+        max_depth: Maximum depth of tree-based models. Defaults to 5.
+        n_estimators: Number of estimators for ensemble models. Defaults to 12.
+        min_samples_split: Minimum samples required to split an internal node. Defaults to 2.
+        min_samples_leaf: Minimum samples required in a leaf node. Defaults to 1.
+        task: Type of machine learning task. Options: 'classification' or 'regression'.
+            Defaults to 'classification'.
+        seed: Random seed for reproducibility. Defaults to None.
+        use_special_dims: Whether to use special manifold dimensions. Defaults to False.
+        n_features: Feature dimensionality type. Options: 'd' or 'd_choose_2'.
+            Defaults to 'd_choose_2'.
+        X_train: Training feature tensor with shape (n_samples, n_manifolds).
+            If provided, overrides split from X. Defaults to None.
+        X_test: Testing feature tensor with shape (n_samples, n_manifolds).
+            If provided, used with X_train. Defaults to None.
+        y_train: Training labels tensor with shape (n_samples,).
+            Must be provided if X_train is given. Defaults to None.
+        y_test: Testing labels tensor with shape (n_samples,).
+            Must be provided if X_test is given. Defaults to None.
+        batch_size: Batch size for neural network models. Defaults to None.
+        adj: Adjacency matrix for graph-based models with shape (n_nodes, n_nodes).
+            Defaults to None.
+        A_train: Training adjacency matrix with shape (n_samples, n_samples).
+            Defaults to None.
+        A_test: Testing adjacency matrix with shape (n_samples, n_samples).
+            Defaults to None.
+        hidden_dims: List of hidden layer dimensions for neural networks.
+            Defaults to None.
+        epochs: Number of training epochs for iterative models. Defaults to 4000.
+        lr: Learning rate for gradient-based optimization. Defaults to 1e-4.
+        kappa_gcn_layers: Number of layers in GCN models. Defaults to 1.
+        lp_train_idx: Training indices for link prediction with shape (n_samples,).
+            Defaults to None.
+        lp_test_idx: Testing indices for link prediction with shape (n_samples,).
+            Defaults to None.
 
     Returns:
-        Dict[str, float]: Dictionary of model names and their corresponding evaluation scores.
-
+        Dictionary mapping model names to their corresponding evaluation scores.
     """
     if score is None:
         score = ["accuracy", "f1-micro", "f1-macro"]
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -24,6 +24,7 @@ plugins:
 # Explicitly define navigation structure
 nav:
   - Home: index.md
+  - Installation: installation.md
   - API Reference:
     - Overview: reference/index.md
     - Manifolds: reference/manifolds.md
@@ -37,12 +38,15 @@ nav:
       - Coordinate Learning: reference/embedders/coordinate_learning.md
       - Siamese: reference/embedders/siamese.md
       - VAE: reference/embedders/vae.md
+      - Losses: reference/embedders/_losses.md
     - Predictors:
       - Overview: reference/predictors/index.md
       - Decision Tree: reference/predictors/decision_tree.md
       - Kappa GCN: reference/predictors/kappa_gcn.md
       - Perceptron: reference/predictors/perceptron.md
       - SVM: reference/predictors/svm.md
+      - Kernel: reference/predictors/_kernel.md
+      - Midpoint: reference/predictors/_midpoint.md
     - Utils:
       - Overview: reference/utils/index.md
       - Benchmarks: reference/utils/benchmarks.md
@@ -56,4 +60,10 @@ markdown_extensions:
   - pymdownx.highlight
   - pymdownx.superfences
   - toc:
-      permalink: true
+      permalink: true
+  - pymdownx.arithmatex:
+      generic: true
+
+extra_javascript:
+  - https://polyfill.io/v3/polyfill.min.js?features=es6
+  - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js