pchlenski
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 5 additions & 4 deletions b/‎.github/workflows/test.yml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎manify/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎manify/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎manify/curvature_estimation/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎manify/curvature_estimation/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎manify/embedders/siamese.py‎
Lines changed: 191 additions & 2 deletions b/‎manify/embedders/siamese.py‎
Lines changed: 191 additions & 2 deletions
diff --git a/‎manify/embedders/vae.py‎
Lines changed: 6 additions & 2 deletions b/‎manify/embedders/vae.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎manify/utils/dataloaders.py‎
Lines changed: 1 addition & 0 deletions b/‎manify/utils/dataloaders.py‎
Lines changed: 1 addition & 0 deletions
@@ -41,25 +41,26 @@ jobs:
       # Code quality checks
       - name: Check code formatting with Black
         run: black --check manify/ --line-length 120
-        continue-on-error: true
 
       - name: Check import ordering with isort
         run: isort --check-only --profile black manify/ --line-width 120
-        continue-on-error: true
 
       - name: Run pylint
         run: pylint manify/
-        continue-on-error: true
 
       # Type checking
       - name: Check type annotations with MyPy
         run: mypy manify/
-        continue-on-error: true
 
       # Unit testing
       - name: Run unit tests & collect coverage
         run: pytest tests --cov=manify --cov-report=xml:coverage.xml
 
+      # Check docstrings are in Google style      
+      - name: Check docstrings are in Google style
+        run: pydocstyle manify/ --convention=google
+        continue-on-error: true
+
       # Code coverage
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v5
 
@@ -1,14 +1,14 @@
 """Manify: A Python Library for Learning Non-Euclidean Representations."""
 
 from manify.curvature_estimation import (
-    sampled_delta_hyperbolicity,
     delta_hyperbolicity,
-    sectional_curvature,
     greedy_signature_selection,
+    sampled_delta_hyperbolicity,
+    sectional_curvature,
 )
 from manify.embedders import CoordinateLearning, ProductSpaceVAE, SiameseNetwork
 from manify.manifolds import Manifold, ProductManifold
-from manify.predictors import ProductSpaceDT, ProductSpaceRF, KappaGCN, ProductSpacePerceptron, ProductSpaceSVM
+from manify.predictors import KappaGCN, ProductSpaceDT, ProductSpacePerceptron, ProductSpaceRF, ProductSpaceSVM
 
 # import manify.utils
 
 
@@ -7,7 +7,7 @@
 * `sectional_curvature`: Estimates the sectional curvature of a graph from its distance matrix.
 """
 
-from manify.curvature_estimation.delta_hyperbolicity import sampled_delta_hyperbolicity, delta_hyperbolicity
+from manify.curvature_estimation.delta_hyperbolicity import delta_hyperbolicity, sampled_delta_hyperbolicity
 from manify.curvature_estimation.greedy_method import greedy_signature_selection
 from manify.curvature_estimation.sectional_curvature import sectional_curvature
 
 
@@ -42,7 +42,7 @@ class SiameseNetwork(BaseEmbedder, torch.nn.Module):
         beta: Weight for the distortion term in the loss function.
         device: Device for tensor computations.
         reconstruction_loss: Type of reconstruction loss to use.
-        
+
 
     Args:
         pm: Product manifold defining the structure of the latent space.
@@ -61,10 +61,18 @@ def __init__(
         encoder: torch.nn.Module,
         decoder: Optional[torch.nn.Module] = None,
         reconstruction_loss: str = "mse",
+        beta: float = 1.0,
+        random_state: Optional[int] = None,
+        device: str = "cpu",
     ):
-        super().__init__()
+        # Init both base classes
+        torch.nn.Module.__init__(self)
+        BaseEmbedder.__init__(self, pm=pm, random_state=random_state, device=device)
+
+        # Now we assign
         self.pm = pm
         self.encoder = encoder
+        self.beta = beta
 
         if decoder is not None:
             self.decoder = decoder
@@ -104,3 +112,184 @@ def decode(self, z: Float[torch.Tensor, "batch_size n_latent"]) -> Float[torch.T
             reconstructed: Tensor containing the reconstructed input data.
         """
         return self.decoder(z)
+
+    def forward(
+        self, x1: Float[torch.Tensor, "batch_size n_features"], x2: Float[torch.Tensor, "batch_size n_features"]
+    ) -> Tuple[
+        Float[torch.Tensor, "batch_size n_latent"],
+        Float[torch.Tensor, "batch_size n_latent"],
+        Float[torch.Tensor, "batch_size,"],
+        Float[torch.Tensor, "batch_size n_features"],
+        Float[torch.Tensor, "batch_size n_features"],
+    ]:
+        """Given two points, return their encodings, reconstructions, and embedding distance.
+
+        Args:
+            x1: First input tensor.
+            x2: Second input tensor.
+
+        Returns:
+            z1: Encoded representation of the first input.
+            z2: Encoded representation of the second input.
+            D_hat: Estimated distance between the two embeddings.
+            reconstructed1: Reconstructed input from the first embedding.
+            reconstructed2: Reconstructed input from the second embedding.
+        """
+        z1 = self.pm.expmap(self.encode(x1) @ self.pm.projection_matrix)
+        z2 = self.pm.expmap(self.encode(x2) @ self.pm.projection_matrix)
+        D_hat = self.pm.manifold.dist(z1, z2)  # use manifold dist to get (batch_size, ) vector of dists
+        reconstructed1 = self.decode(z1)
+        reconstructed2 = self.decode(z2)
+        return z1, z2, D_hat, reconstructed1, reconstructed2
+
+    def fit(  # type: ignore[override]
+        self,
+        X: Float[torch.Tensor, "n_points n_features"],
+        D: Float[torch.Tensor, "n_points n_points"],
+        lr: float = 1e-3,
+        burn_in_lr: float = 1e-4,
+        curvature_lr: float = 0.0,  # Off by default
+        burn_in_iterations: int = 1,
+        training_iterations: int = 9,
+        loss_window_size: int = 100,
+        logging_interval: int = 10,
+        batch_size: int = 32,
+        clip_grad: bool = True,
+    ) -> "SiameseNetwork":
+        """Fit the SiameseNetwork embedder.
+
+        Args:
+            X: Input data features to encode.
+            D: Pairwise distances to emulate.
+            lr: Learning rate for the optimizer.
+            burn_in_lr: Learning rate during burn-in phase.
+            curvature_lr: Learning rate for curvature updates.
+            burn_in_iterations: Number of iterations for burn-in phase.
+            training_iterations: Number of iterations for training phase.
+            loss_window_size: Size of the window for loss averaging.
+            logging_interval: Interval for logging progress.
+            batch_size: Number of samples per batch.
+            clip_grad: Whether to clip gradients.
+
+        Returns:
+            self: Fitted SiameseNetwork instance.
+        """
+        if self.random_state is not None:
+            torch.manual_seed(self.random_state)
+
+        n_samples = len(X)
+
+        # Generate all upper triangular pairs using torch
+        indices = torch.triu_indices(n_samples, n_samples, offset=1)
+        pairs = torch.hstack([indices]).T  # (n_pairs, 2)
+
+        # Number of pairs and batches
+        n_pairs = len(pairs)
+        n_batches_per_epoch = (n_pairs + batch_size - 1) // batch_size  # Ceiling division
+        total_iterations = (burn_in_iterations + training_iterations) * n_batches_per_epoch
+
+        my_tqdm = tqdm(total=total_iterations)
+
+        opt = torch.optim.Adam(
+            [
+                {"params": [p for p in self.parameters() if p not in set(self.pm.parameters())], "lr": burn_in_lr},
+                {"params": self.pm.parameters(), "lr": 0},
+            ]
+        )
+        losses: Dict[str, List[float]] = {"total": [], "reconstruction": [], "distortion": []}
+
+        for epoch in range(burn_in_iterations + training_iterations):
+            if epoch == burn_in_iterations:
+                opt.param_groups[0]["lr"] = lr
+                opt.param_groups[1]["lr"] = curvature_lr
+
+            # Shuffle all pairs
+            shuffle_idx = torch.randperm(n_pairs)
+            shuffled_pairs = pairs[shuffle_idx]
+
+            for batch_start in range(0, n_pairs, batch_size):
+                batch_end = min(batch_start + batch_size, n_pairs)
+                batch_pairs = shuffled_pairs[batch_start:batch_end]
+
+                # Extract indices for this batch
+                batch_indices1 = batch_pairs[:, 0]
+                batch_indices2 = batch_pairs[:, 1]
+
+                # Get data for these indices
+                X1 = X[batch_indices1]
+                X2 = X[batch_indices2]
+
+                # Extract the corresponding distances from D using advanced indexing
+                D_batch = D[batch_indices1, batch_indices2]
+
+                # Forward pass
+                opt.zero_grad()
+                _, _, D_hat, Y1, Y2 = self(X1, X2)
+                mse1 = torch.nn.functional.mse_loss(Y1, X1)
+                mse2 = torch.nn.functional.mse_loss(Y2, X2)
+
+                # D_hat and D_batch are now 1D tensors of pairwise distances
+                distortion = distortion_loss(D_hat, D_batch, pairwise=False)
+                L = mse1 + mse2 + self.beta * distortion
+                L.backward()
+
+                # Add to losses
+                losses["total"].append(L.item())
+                losses["reconstruction"].append(mse1.item() + mse2.item())
+                losses["distortion"].append(distortion.item())
+
+                if clip_grad:
+                    torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
+                    torch.nn.utils.clip_grad_norm_(self.pm.parameters(), max_norm=1.0)
+
+                opt.step()
+
+                # TQDM management
+                my_tqdm.update(1)
+                my_tqdm.set_description(
+                    f"L: {L.item():.3e}, recon: {mse1.item() + mse2.item():.3e}, dist: {distortion.item():.3e}"
+                )
+
+                # Logging
+                if my_tqdm.n % logging_interval == 0:
+                    d = {f"r{i}": f"{logscale.item():.3f}" for i, logscale in enumerate(self.pm.parameters())}
+                    d["L_avg"] = f"{np.mean(losses['total'][-loss_window_size:]):.3e}"
+                    d["recon_avg"] = f"{np.mean(losses['reconstruction'][-loss_window_size:]):.3e}"
+                    d["dist_avg"] = f"{np.mean(losses['distortion'][-loss_window_size:]):.3e}"
+                    my_tqdm.set_postfix(d)
+
+        # Final maintenance: update attributes
+        self.loss_history_ = losses
+        self.is_fitted_ = True
+
+        return self
+
+    def transform(
+        self, X: Float[torch.Tensor, "n_points n_features"], D: None = None, batch_size: int = 32, expmap: bool = True
+    ) -> Float[torch.Tensor, "n_points n_latent"]:
+        """Transforms input data into manifold embeddings.
+
+        Args:
+            X: Features to embed with SiameseNetwork.
+            D: Ignored.
+            batch_size: Number of samples per batch.
+            expmap: Whether to use exponential map for embedding.
+
+        Returns:
+            embeddings: Embeddings produced by forward pass of trained SiameseNetwork model.
+        """
+        # Set random state
+        if self.random_state is not None:
+            torch.manual_seed(self.random_state)
+
+        # Save the  embeddings
+        embeddings_list = []
+        for i in range(0, len(X), batch_size):
+            batch = X[i : i + batch_size]
+            embeddings = self.encode(batch)
+            if expmap:
+                embeddings = self.pm.expmap(embeddings @ self.pm.projection_matrix)
+            embeddings_list.append(embeddings)
+        embeddings = torch.cat(embeddings_list, dim=0)
+
+        return embeddings
@@ -306,7 +306,10 @@ def fit(  # type: ignore[override]
 
         my_tqdm = tqdm(total=(burn_in_iterations + training_iterations) * len(X))
         opt = torch.optim.Adam(
-            [{"params": self.parameters(), "lr": burn_in_lr}, {"params": self.pm.parameters(), "lr": 0}]
+            [
+                {"params": [p for p in self.parameters() if p not in set(self.pm.parameters())], "lr": burn_in_lr},
+                {"params": self.pm.parameters(), "lr": 0},
+            ]
         )
         losses: Dict[str, List[float]] = {"elbo": [], "ll": [], "kl": []}
         for epoch in range(burn_in_iterations + training_iterations):
@@ -319,12 +322,12 @@ def fit(  # type: ignore[override]
                 X_batch = X[i : i + batch_size]
                 elbo, ll, kl = self.elbo(X_batch)
                 L = -elbo
+                L.backward()
 
                 # Add to losses
                 losses["elbo"].append(elbo.item())
                 losses["ll"].append(ll.item())
                 losses["kl"].append(kl.item())
-                L.backward()
 
                 if clip_grad:
                     torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1.0)
@@ -362,6 +365,7 @@ def transform(
         Args:
             X: Features to embed with VAE.
             D: Ignored.
+            batch_size: Number of samples per batch.
             expmap: Whether to use exponential map for embedding.
 
         Returns:
 
@@ -28,6 +28,7 @@
 | neuron_33 | classification | ❌ | ✅ | ✅ | ❌ | [Allen Brain Atlas](https://celltypes.brain-map.org/experiment/electrophysiology/623474400) |
 | neuron_46 | classification | ❌ | ✅ | ✅ | ❌ | [Allen Brain Atlas](https://celltypes.brain-map.org/experiment/electrophysiology/623474400) |
 | traffic | regression | ❌ | ✅ | ✅ | ❌ | [Kaggle: Traffic Prediction Dataset](https://www.kaggle.com/datasets/fedesoriano/traffic-prediction-dataset) |
+| qiita | none | ✅ | ✅ | ❌ | ❌ | [NeuroSEED Git Repo](https://github.com/gcorso/NeuroSEED) |
 """
 
 from __future__ import annotations