Add n_samples to sampling; add tutorial

pchlenski · pchlenski · commit 948ffa4dab59 · 2025-07-13T05:06:13.000-07:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -29,15 +29,6 @@ jobs:
           python-version: ${{ matrix.python-version }}
           cache: "pip"
 
-      # Cache HuggingFace - this saves time running tests/test_utils.py on subsequent runs
-      - name: Cache HuggingFace datasets
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/huggingface
-          key: ${{ runner.os }}-huggingface-${{ hashFiles('tests/test_*.py') }}
-          restore-keys: |
-            ${{ runner.os }}-huggingface-
-
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -67,8 +58,8 @@ jobs:
         run: mypy manify/
 
       # Unit testing
-      - name: Run unit tests & collect coverage
-        run: pytest tests --cov --cov-report=xml
+      - name: Run unit tests & collect coverage (except dataloaders)
+        run: pytest tests --cov --cov-report=xml -k "not test_dataloaders"
 
 
       # Code coverage
@@ -79,3 +70,46 @@ jobs:
           fail_ci_if_error: false
           verbose: true
           flags: unittests
+          name: python-${{ matrix.python-version }}
+
+  # Dataloaders run in parallel, for speed
+  test-dataloaders:
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Cache HuggingFace datasets
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/huggingface
+          key: ${{ runner.os }}-huggingface-dataloaders-v1
+          restore-keys: |
+            ${{ runner.os }}-huggingface-dataloaders-
+            ${{ runner.os }}-huggingface-
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev]"
+
+      - name: Run dataloader tests
+        run: pytest tests/test_utils.py::test_dataloaders -v --cov=manify/dataloaders --cov-report=xml
+
+      # Upload dataloader coverage separately
+      - name: Upload dataloader coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          fail_ci_if_error: false
+          verbose: true
+          flags: dataloaders
+          name: dataloaders
+
diff --git a/manify/clustering/fuzzy_kmeans.py b/manify/clustering/fuzzy_kmeans.py
@@ -115,7 +115,7 @@ def _init_centers(self, X: Float[torch.Tensor, "n_points n_features"]) -> None:
         # If we provide self.manifold.mu0 repeated n_clusters times,
         # it samples n_clusters points, each around mu0.
         means_for_sampling_centers = self.manifold.mu0.repeat(self.n_clusters, 1)
-        centers, _ = self.manifold.sample(z_mean=means_for_sampling_centers)
+        centers = self.manifold.sample(z_mean=means_for_sampling_centers)
 
         # IMPORTANT: Use self.manifold.manifold for ManifoldParameter,
         # as self.manifold is our wrapper and self.manifold.manifold is the geoopt object.
diff --git a/manify/curvature_estimation/_pipelines.py b/manify/curvature_estimation/_pipelines.py
@@ -53,7 +53,7 @@ def distortion_pipeline(
     return float(distortion_loss(new_dists, dists_rescaled).item())
 
 
-def classifier_pipeline(
+def predictor_pipeline(
     pm: ProductManifold,
     dists: Float[torch.Tensor, "n_nodes n_nodes"],
     labels: Float[torch.Tensor, "n_nodes"],
diff --git a/manify/embedders/coordinate_learning.py b/manify/embedders/coordinate_learning.py
@@ -121,7 +121,7 @@ def fit(  # type: ignore[override]
         n = D.shape[0]
         covs = [torch.stack([torch.eye(M.dim) / self.pm.dim] * n).to(self.device) for M in self.pm.P]
         means = torch.vstack([self.pm.mu0] * n).to(self.device)
-        X_embed, _ = self.pm.sample(z_mean=means, sigma_factorized=covs)
+        X_embed = self.pm.sample(z_mean=means, sigma_factorized=covs)
         D = D.to(self.device)
 
         # Get train and test indices set up
diff --git a/manify/embedders/vae.py b/manify/embedders/vae.py
@@ -180,7 +180,7 @@ def forward(
         sigmas = [torch.diag_embed(torch.exp(z_logvar) + 1e-8) for z_logvar in sigma_factorized]
 
         # Sample and decode
-        z, _ = self.pm.sample(z_means, sigmas)
+        z = self.pm.sample(z_mean=z_means, sigma_factorized=sigmas)
         x_reconstructed = self.decode(z)
         return x_reconstructed, z_means, sigmas
 
@@ -213,7 +213,8 @@ def kl_divergence(
         sigmas_factorized_interleaved = [
             torch.repeat_interleave(sigma, self.n_samples, dim=0) for sigma in sigma_factorized
         ]
-        z_samples, _ = self.pm.sample(means, sigmas_factorized_interleaved)
+        # We want to use n_samples = 1 here, since we'll need to pass the interleaved means/sigmas to the log-likelihood
+        z_samples = self.pm.sample(z_mean=means, sigma_factorized=sigmas_factorized_interleaved)
         log_qz = self.pm.log_likelihood(z_samples, means, sigmas_factorized_interleaved)
         log_pz = self.pm.log_likelihood(z_samples)
         return (log_qz - log_pz).view(-1, self.n_samples).mean(dim=1)
diff --git a/manify/manifolds.py b/manify/manifolds.py
@@ -211,18 +211,25 @@ def _to_tangent_plane_mu0(
 
     def sample(
         self,
-        z_mean: Float[torch.Tensor, "n_points n_ambient_dim"] | None = None,
+        n_samples: int = 1,
+        z_mean: Float[torch.Tensor, "n_points n_ambient_dim"] | Float[torch.Tensor, "n_ambient_dim"] | None = None,
         sigma: Float[torch.Tensor, "n_points n_dim n_dim"] | None = None,
-    ) -> tuple[Float[torch.Tensor, "n_points n_ambient_dim"], Float[torch.Tensor, "n_points n_dim"]]:
+        return_tangent: bool = False,
+    ) -> (
+        tuple[Float[torch.Tensor, "n_points n_ambient_dim"], Float[torch.Tensor, "n_points n_dim"]]
+        | Float[torch.Tensor, "n_points n_ambient_dim"]
+    ):
         """Sample points from the variational distribution on the manifold.
 
         Args:
+            n_samples: Number of points to sample.
             z_mean: Tensor representing the mean of the sample distribution.
             sigma: Optional tensor representing the covariance matrix. If None, defaults to an identity matrix.
+            return_tangent: Whether to return the tangent vectors along with the sampled points.
 
         Returns:
             x: Tensor of sampled points on the manifold
-            v: Tensor of tangent vectors
+            v: Tensor of tangent vectors (if `return_tangent` is True).
         """
         z_mean = self.mu0 if z_mean is None else z_mean
         z_mean = torch.Tensor(z_mean).reshape(-1, self.ambient_dim).to(self.device)
@@ -237,6 +244,10 @@ def sample(
         assert torch.allclose(sigma, sigma.transpose(-1, -2)), "Covariance matrix must be symmetric"
         assert z_mean.shape[-1] == self.ambient_dim, f"Expected z_mean shape {self.ambient_dim}, got {z_mean.shape[-1]}"
 
+        # Adjust for n_points:
+        z_mean = torch.repeat_interleave(z_mean, n_samples, dim=0)
+        sigma = torch.repeat_interleave(sigma, n_samples, dim=0)
+
         # Sample initial vector from N(0, sigma)
         N = torch.distributions.MultivariateNormal(
             loc=torch.zeros((n, self.dim), device=self.device), covariance_matrix=sigma
@@ -260,8 +271,7 @@ def sample(
         # Exp map onto the manifold
         x = self.manifold.expmap(x=z_mean, u=z)
 
-        # Different samples and tangent vectors
-        return x, v
+        return (x, v) if return_tangent else x
 
     def log_likelihood(
         self,
@@ -611,19 +621,26 @@ def factorize(
 
     def sample(
         self,
+        n_samples: int = 1,
         z_mean: Float[torch.Tensor, "n_points n_ambient_dim"] | None = None,
-        sigma_factorized: list[Float[torch.Tensor, "n_points ..."]] | None = None,  # TODO: fix ... annotations
-    ) -> tuple[Float[torch.Tensor, "n_points n_ambient_dim"], Float[torch.Tensor, "n_points total_intrinsic_dim"]]:
+        sigma_factorized: list[Float[torch.Tensor, "n_points ..."]] | None = None,
+        return_tangent: bool = False,
+    ) -> (
+        tuple[Float[torch.Tensor, "n_points n_ambient_dim"], Float[torch.Tensor, "n_points total_intrinsic_dim"]]
+        | Float[torch.Tensor, "n_points n_ambient_dim"]
+    ):
         """Sample from the variational distribution.
 
         Args:
+            n_samples: Number of points to sample.
             z_mean: Tensor representing the mean of the sample distribution. If None, defaults to the origin `self.mu0`.
             sigma_factorized: List of tensors representing factorized covariance matrices for each manifold. If None,
                 defaults to a list of identity matrices for each manifold.
+            return_tangent: Whether to return the tangent vectors along with the sampled points.
 
         Returns:
             x: Tensor of sampled points on the manifold
-            v: Tensor of tangent vectors
+            v: Tensor of tangent vectors (if `return_tangent` is True).
         """
         z_mean = self.mu0 if z_mean is None else z_mean
         z_mean = torch.Tensor(z_mean).reshape(-1, self.ambient_dim).to(self.device)
@@ -637,24 +654,28 @@ def sample(
             for M, sigma in zip(self.P, sigma_factorized, strict=False)
         ]
 
-        assert all(sigma.shape == (n, M.dim, M.dim) for M, sigma in zip(self.P, sigma_factorized, strict=False)), (
-            "Sigma matrices must match the dimensions of the manifolds."
-        )
-        assert z_mean.shape[-1] == self.ambient_dim, (
+        # Adjust for n_points:
+        z_mean = torch.repeat_interleave(z_mean, n_samples, dim=0)
+        sigma_factorized = [torch.repeat_interleave(sigma, n_samples, dim=0) for sigma in sigma_factorized]
+
+        assert all(
+            sigma.shape == (n * n_samples, M.dim, M.dim) for M, sigma in zip(self.P, sigma_factorized, strict=False)
+        ), "Sigma matrices must match the dimensions of the manifolds."
+        assert z_mean.shape == (n * n_samples, self.ambient_dim), (
             "z_mean must have the same ambient dimension as the product manifold."
         )
 
         # Sample initial vector from N(0, sigma)
         samples = [
-            M.sample(z_M, sigma_M)
+            M.sample(1, z_M, sigma_M, return_tangent=True)
             for M, z_M, sigma_M in zip(self.P, self.factorize(z_mean), sigma_factorized, strict=False)
         ]
 
         x = torch.cat([s[0] for s in samples], dim=1)
         v = torch.cat([s[1] for s in samples], dim=1)
 
         # Different samples and tangent vectors
-        return x, v
+        return (x, v) if return_tangent else x
 
     def log_likelihood(
         self,
@@ -807,15 +828,13 @@ def gaussian_mixture(
             cov_scale_means /= self.dim
 
         # Generate cluster means
-        cluster_means, _ = self.sample(
-            z_mean=torch.vstack([self.mu0] * num_clusters),
-            sigma_factorized=[torch.stack([torch.eye(M.dim)] * num_clusters) * cov_scale_means for M in self.P],
-        )
+        cluster_means = self.sample(num_clusters, sigma_factorized=[torch.eye(M.dim) * cov_scale_means for M in self.P])
         assert cluster_means.shape == (num_clusters, self.ambient_dim), "Cluster means shape mismatch."
 
         # Generate class assignments
         cluster_probs = torch.rand(num_clusters)
         cluster_probs /= cluster_probs.sum()
+
         # Draw cluster assignments: ensure at least 2 points per cluster. This is to ensure splits can always happen.
         cluster_assignments = torch.multinomial(input=cluster_probs, num_samples=num_points, replacement=True)
         while (cluster_assignments.bincount() < 2).any():
@@ -835,7 +854,7 @@ def gaussian_mixture(
         sample_means = torch.stack([cluster_means[c] for c in cluster_assignments])
         assert sample_means.shape == (num_points, self.ambient_dim), "Sample means shape mismatch."
         sample_covs = [torch.stack([cov_matrix[c] for c in cluster_assignments]) for cov_matrix in cov_matrices]
-        samples, tangent_vals = self.sample(z_mean=sample_means, sigma_factorized=sample_covs)
+        samples, tangent_vals = self.sample(z_mean=sample_means, sigma_factorized=sample_covs, return_tangent=True)
         assert samples.shape == (num_points, self.ambient_dim), "Sample shape mismatch."
 
         # Map clusters to classes
diff --git a/manify/predictors/kappa_gcn.py b/manify/predictors/kappa_gcn.py
@@ -214,6 +214,7 @@ def fit(
         if use_tqdm:
             my_tqdm = tqdm(total=epochs, desc=tqdm_prefix)
 
+        losses = []
         for i in range(epochs):
             opt.zero_grad()
             if riemannian_params:
@@ -234,12 +235,13 @@ def fit(
             if torch.isnan(loss):
                 print("Loss is NaN, stopping training.")
                 break
+            losses.append(loss.item())
 
         if use_tqdm:
             my_tqdm.close()
 
         self.is_fitted_ = True
-        self.loss_history_["train"] = [loss.item()]
+        self.loss_history_["train"] = losses
         return self
 
     def predict_proba(
diff --git a/notebooks/Manify Tutorial.ipynb b/notebooks/Manify Tutorial.ipynb
diff --git a/tests/test_curvature_estimation.py b/tests/test_curvature_estimation.py
diff --git a/tests/test_manifolds.py b/tests/test_manifolds.py