orbital-materials
diff --git a/‎orb_models/forcefield/angular.py‎
Lines changed: 1561 additions & 0 deletions b/‎orb_models/forcefield/angular.py‎
Lines changed: 1561 additions & 0 deletions
diff --git a/‎orb_models/forcefield/calculator.py‎
Lines changed: 16 additions & 0 deletions b/‎orb_models/forcefield/calculator.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎orb_models/forcefield/conservative_regressor.py‎
Lines changed: 143 additions & 110 deletions b/‎orb_models/forcefield/conservative_regressor.py‎
Lines changed: 143 additions & 110 deletions
diff --git a/‎orb_models/forcefield/forcefield_heads.py‎
Lines changed: 15 additions & 5 deletions b/‎orb_models/forcefield/forcefield_heads.py‎
Lines changed: 15 additions & 5 deletions
diff --git a/‎orb_models/forcefield/gns.py‎
Lines changed: 83 additions & 41 deletions b/‎orb_models/forcefield/gns.py‎
Lines changed: 83 additions & 41 deletions
diff --git a/‎orb_models/forcefield/graph_regressor.py‎
Lines changed: 32 additions & 0 deletions b/‎orb_models/forcefield/graph_regressor.py‎
Lines changed: 32 additions & 0 deletions
@@ -74,6 +74,14 @@ def __init__(
 
         self.implemented_properties = model.properties  # type: ignore
 
+        # TODO: Untangle the spaghetti of how we handle the naming for the heads.
+        # This is required because ASE will check the implemented_properties for
+        # the existence of the property before calling the calculator, so it's not
+        # sufficient to just return the property names from the model and handle
+        # the conservative case in `calculate`.
+        if self.conservative:
+            self.implemented_properties.extend(["forces", "stress"])
+
     def calculate(self, atoms=None, properties=None, system_changes=all_changes):
         """Calculate properties.
 
@@ -103,7 +111,15 @@ def calculate(self, atoms=None, properties=None, system_changes=all_changes):
         batch = batch.to(self.device)  # type: ignore
         out = self.model.predict(batch)  # type: ignore
         self.results = {}
+        model_has_direct_heads = (
+            "forces" in self.model.heads and "stress" in self.model.heads  # type: ignore
+        )
         for property in self.implemented_properties:
+            # The model has no direct heads for forces/stress, so we skip these properties.
+            if not model_has_direct_heads and property == "forces":
+                continue
+            if not model_has_direct_heads and property == "stress":
+                continue
             _property = "energy" if property == "free_energy" else property
             self.results[property] = to_numpy(out[_property].squeeze())
 
 
@@ -356,6 +356,7 @@ def __init__(
         dropout: Optional[float] = None,
         activation: str = "ssp",
         detach_node_features: bool = True,
+        hard_clamp: bool = True,
     ):
         """Initializes the ConfidenceHead MLP.
 
@@ -371,13 +372,16 @@ def __init__(
             detach_node_features: If True, detaches node features from computational graph.
                 This means that the confidence loss has no impact on training the underlying
                 forcefield model.
+            hard_clamp: If True, ignore any errors above max_error such that they do not contribute
+                to the loss, rather than just clamping them to the max_bin.
         """
         super().__init__()
         self.target = _confidence
         self.num_bins = num_bins
         self.max_error = max_error
         self.detach_node_features = detach_node_features
-        # Define bin edges (from 0 to max_error)
+        self.hard_clamp = hard_clamp
+        self.ignore_index = -100
         if binning_scale == "linear":
             bins = torch.linspace(0.0, max_error, int(num_bins + 1))
         elif binning_scale == "exponential":
@@ -409,9 +413,13 @@ def get_error_bins(self, force_error: torch.Tensor) -> torch.Tensor:
         Returns:
             Bin indices of shape (n_atoms,)
         """
-        force_error = torch.clamp(force_error, 0, self.max_error)
-        bins = torch.bucketize(force_error, self.bin_edges) - 1  # type: ignore
-        return torch.clamp(bins, 0, self.num_bins - 1)
+        clamped_error = torch.clamp(force_error, 0, self.max_error)
+        bins = torch.bucketize(clamped_error, self.bin_edges) - 1  # type: ignore
+        clamped = torch.clamp(bins, 0, self.num_bins - 1)
+
+        if self.hard_clamp:
+            clamped[force_error > self.max_error] = self.ignore_index
+        return clamped
 
     def forward(
         self, node_features: torch.Tensor, batch: base.AtomGraphs
@@ -451,7 +459,9 @@ def loss(
         true_bins = self.get_error_bins(force_error)
 
         # Cross entropy loss
-        loss = torch.nn.functional.cross_entropy(confidence_logits, true_bins)
+        loss = torch.nn.functional.cross_entropy(
+            confidence_logits, true_bins, ignore_index=self.ignore_index
+        )
 
         # Calculate accuracy
         pred_bins = torch.argmax(confidence_logits, dim=-1)
 
@@ -1,7 +1,7 @@
 """Pyg implementation of Graph Net Simulator."""
 
 from collections import OrderedDict
-from typing import Callable, List, Optional, Literal, Dict, Any, Tuple
+from typing import Callable, List, Optional, Literal, Dict, Any, Tuple, Union
 
 import torch
 from torch import nn
@@ -10,6 +10,7 @@
 from orb_models.forcefield import base, segment_ops
 from orb_models.forcefield.nn_util import build_mlp, get_cutoff, mlp_and_layer_norm
 from orb_models.forcefield.embedding import AtomEmbedding, AtomEmbeddingBag
+from orb_models.forcefield.angular import UnitVector
 
 _KEY = "feat"
 
@@ -274,23 +275,31 @@ def forward(self, nodes: torch.Tensor) -> torch.Tensor:
 class MoleculeGNS(nn.Module):
     """GNS that works on molecular data."""
 
-    _deprecated_args = ["noise_scale", "add_virtual_node", "self_cond", "interactions"]
+    _deprecated_args = [
+        "noise_scale",
+        "add_virtual_node",
+        "self_cond",
+        "interactions",
+        "num_node_in_features",
+        "num_edge_in_features",
+    ]
 
     def __init__(
         self,
-        num_node_in_features: int,
-        num_node_out_features: int,
-        num_edge_in_features: int,
         latent_dim: int,
         num_message_passing_steps: int,
         num_mlp_layers: int,
         mlp_hidden_dim: int,
         rbf_transform: Callable,
-        node_feature_names: Optional[List[str]] = None,
-        edge_feature_names: Optional[List[str]] = None,
+        angular_transform: Optional[Callable] = None,
+        outer_product_with_cutoff: bool = False,
+        use_embedding: bool = False,  # atom type embedding
         expects_atom_type_embedding: bool = False,
-        use_embedding: bool = False,
         interaction_params: Optional[Dict[str, Any]] = None,
+        num_node_out_features: int = 3,
+        extra_embed_dims: Union[int, Tuple[int, int]] = 0,
+        node_feature_names: Optional[List[str]] = None,
+        edge_feature_names: Optional[List[str]] = None,
         checkpoint: Optional[str] = None,
         activation="ssp",
         mlp_norm: str = "layer_norm",
@@ -299,28 +308,33 @@ def __init__(
         """Initializes the molecular GNS.
 
         Args:
-            num_node_in_features (int): Number input nodes features.
-            num_node_out_features (int): Number output nodes features.
-            num_edge_in_features (int): Number input edge features.
             latent_dim (int): Latent dimension of processor.
             num_message_passing_steps (int): Number of message passing steps.
             num_mlp_layers (int): Number of MLP layers.
             mlp_hidden_dim (int): MLP hidden dimension.
+            rbf_transform (Callable): A function that takes in edge lengths and returns
+                a tensor of RBF features.
+            angular_transform (Callable): A function that takes in edge vectors and
+                returns a tensor of angular features.
+            outer_product_with_cutoff (bool): Create initial edge embeddings via
+                an outer product of rbf and angular embeddings and a envelope cutoff.
+            use_embedding: Whether to embed atom types using an embedding table or embedding bag.
+            expects_atom_type_embedding (bool): Whether or not the model expects the input
+                to be pre-embedded. This is used for atom type models, because the one-hot
+                embedding is noised, rather than being explicitly one-hot.
+            interaction_params (Optional[Dict[str, Any]]): Additional parameters
+                to pass to the interaction network.
+            num_node_out_features (int): Number output nodes features.
+            extra_embed_dims (int): Number of extra embedding dimensions to use.
+                If an int, both the node and edge embeddings will have this number of extra dims.
+                If a tuple, then it is interpreted as [extra_node_embed_dim, extra_edge_embed_dim].
             node_feature_names (List[str]): Which tensors from batch.node_features to
                 concatenate to form the initial node latents. Note: These are "extra"
                 features - we assume the base atomic number representation is already
                 included.
             edge_feature_names (List[str]): Which tensors from batch.edge_features to
                 concatenate to form the initial edge latents. Note: These are "extra"
                 features - we assume the base edge vector features are already included.
-            rbf_transform: An RBF transform to use for the edge features.
-            expects_atom_type_embedding (bool): Whether or not the model expects
-                the input to be pre-embedded. This is used for atom type models,
-                because the one-hot embedding is noised, rather than being
-                explicitly one-hot.
-            use_embedding: Whether to embed atom types using an embedding table or embedding bag.
-            interaction_params (Optional[Dict[str, Any]]): Additional parameters
-                to pass to the interaction network.
             checkpoint (bool): Whether or not to use checkpointing.
             activation (str): Activation function to use.
             mlp_norm (str): Normalization layer to use in the MLP.
@@ -333,9 +347,42 @@ def __init__(
                 f"The following kwargs are not arguments to GraphRegressor: {kwargs.keys()}"
             )
 
+        self.node_feature_names = node_feature_names or []
+        self.edge_feature_names = edge_feature_names or []
+
+        # Edge embedding
+        self.outer_product_with_cutoff = outer_product_with_cutoff
+        self.rbf_transform = rbf_transform
+        if angular_transform is None:
+            angular_transform = UnitVector()
+        self.angular_transform = angular_transform
+        if self.outer_product_with_cutoff:
+            self.edge_embed_size = rbf_transform.num_bases * angular_transform.dim  # type: ignore
+        else:
+            if hasattr(rbf_transform, "num_bases"):
+                num_bases = rbf_transform.num_bases
+            else:
+                num_bases = rbf_transform.keywords["num_bases"]  # type: ignore
+            self.edge_embed_size = num_bases + angular_transform.dim  # type: ignore
+
+        # Node embedding
+        self.expects_atom_type_embedding = expects_atom_type_embedding
+        self.use_embedding = use_embedding
+        if self.use_embedding:
+            self.node_embed_size = latent_dim
+            if self.expects_atom_type_embedding:
+                # Use embedding bag for atom type diffusion
+                self.atom_emb = AtomEmbeddingBag(self.node_embed_size, 118)
+            else:
+                self.atom_emb = AtomEmbedding(self.node_embed_size, 118)  # type: ignore
+        else:
+            self.node_embed_size = 118
+        if isinstance(extra_embed_dims, int):
+            extra_embed_dims = (extra_embed_dims, extra_embed_dims)  # type: ignore
+
         self._encoder = Encoder(
-            num_node_in_features=num_node_in_features,
-            num_edge_in_features=num_edge_in_features,
+            num_node_in_features=self.node_embed_size + extra_embed_dims[0],
+            num_edge_in_features=self.edge_embed_size + extra_embed_dims[1],
             latent_dim=latent_dim,
             num_mlp_layers=num_mlp_layers,
             mlp_hidden_dim=mlp_hidden_dim,
@@ -370,19 +417,6 @@ def __init__(
             checkpoint=checkpoint,
             activation=activation,
         )
-        self.rbf = rbf_transform
-        self.expects_atom_type_embedding = expects_atom_type_embedding
-        self.use_embedding = use_embedding
-
-        if self.use_embedding:
-            if self.expects_atom_type_embedding:
-                # Use embedding bag for atom type diffusion
-                self.atom_emb = AtomEmbeddingBag(latent_dim, 118)
-            else:
-                self.atom_emb = AtomEmbedding(latent_dim, 118)  # type: ignore
-
-        self.node_feature_names = node_feature_names or []
-        self.edge_feature_names = edge_feature_names or []
 
     def forward(self, batch: base.AtomGraphs) -> Dict[str, torch.Tensor]:
         """Encode a graph using molecular GNS.
@@ -455,14 +489,22 @@ def featurize_edges(self, batch: base.AtomGraphs) -> torch.Tensor:
         vectors = batch.edge_features["vectors"]
         # replace 0s with 1s to avoid division by zero
         lengths = vectors.norm(dim=1)
-        non_zero_divisor = torch.where(lengths == 0, torch.ones_like(lengths), lengths)
-        unit_vectors = vectors / non_zero_divisor.unsqueeze(1)
-        rbfs = self.rbf(lengths)
-        edge_features = torch.cat([rbfs, unit_vectors], dim=1)
 
-        # This is for backward compatibility with old code
-        # Configs now assume that the base model features are already included
-        # and only specify "extra" features
+        angular_embedding = self.angular_transform(vectors)  # (nedges, x)
+        rbfs = self.rbf_transform(lengths)  # (nedges, y)
+
+        if self.outer_product_with_cutoff:
+            cutoff = get_cutoff(lengths)
+            # (nedges, x, y)
+            outer_product = rbfs[:, :, None] * angular_embedding[:, None, :]
+            # (nedges, x * y)
+            edge_features = cutoff * outer_product.view(
+                vectors.shape[0], self.edge_embed_size
+            )
+        else:
+            edge_features = torch.cat([rbfs, angular_embedding], dim=1)
+
+        # For backwards compatibility, exclude 'feat'
         feature_names = [k for k in self.edge_feature_names if k != "feat"]
         return torch.cat(
             [edge_features, *[batch.edge_features[k] for k in feature_names]], dim=-1
 
@@ -17,6 +17,7 @@
 from orb_models.forcefield import segment_ops
 from orb_models.forcefield.gns import MoleculeGNS
 from orb_models.forcefield.load import _load_forcefield_state_dict
+from orb_models.forcefield.pair_repulsion import ZBLBasis
 
 
 class GraphRegressor(nn.Module):
@@ -35,6 +36,7 @@ def __init__(
         model_requires_grad: bool = True,
         cutoff_layers: Optional[int] = None,
         loss_weights: Optional[Dict[str, float]] = None,
+        pair_repulsion: bool = False,
     ) -> None:
         """Initializes the GraphRegressor.
 
@@ -65,6 +67,14 @@ def __init__(
         self.loss_weights = loss_weights
         self.model_requires_grad = model_requires_grad
 
+        self.pair_repulsion = pair_repulsion
+        if self.pair_repulsion:
+            self.pair_repulsion_fn = ZBLBasis(
+                p=6,
+                node_aggregation="sum",
+                compute_gradients=True,
+            )
+
         self.model = model
         if self.cutoff_layers is not None:
             gns = (
@@ -85,6 +95,19 @@ def forward(
         for name, head in self.heads.items():
             res = head(node_features, batch)
             out[name] = res
+
+        if self.pair_repulsion:
+            out_pair_raw = self.pair_repulsion_fn(batch)
+            for name, head in self.heads.items():
+                raw = out_pair_raw.get(name, None)
+                if raw is None:
+                    continue
+                if name == "energy" and head.atom_avg:
+                    raw = (raw / batch.n_node).unsqueeze(1)
+                out[name] = out[name] + head.normalizer(
+                    raw,
+                    online=False,
+                )
         return out
 
     def predict(
@@ -100,6 +123,15 @@ def predict(
                 output[name] = _split_prediction(pred, batch.n_node)
             else:
                 output[name] = pred
+
+        if self.pair_repulsion:
+            out_pair_raw = self.pair_repulsion_fn(batch)
+            for name, head in self.heads.items():
+                raw = out_pair_raw.get(name, None)
+                if raw is None:
+                    continue
+                output[name] = output[name] + raw
+
         return output
 
     def loss(self, batch: base.AtomGraphs) -> base.ModelOutput: