Fix calculator naming for conservative models with no direct heads (#76)

DeNeutoy · web-flow · commit ea303c476317 · 2025-04-08T15:32:39.000-07:00
* fix calculator error

* make sure all readme examples work

* add note on compilation

* fix loading for direct models
diff --git a/README.md b/README.md
@@ -70,9 +70,12 @@ For example, `orb-v3-conservative-inf-omat` is a model that:
 - Computes forces/stress as gradients of energy
 - Has effectively infinite neighbors (120 in practice)
 - Was trained on the OMat24 dataset
-```
 
-*We suggest using models trained on OMAT24, as these models are more performant and the data they are trained on uses newer pseudopotentials in VASP (PBE54 vs PBE52)*. `-mpa` models should be used if compatability with benchmarks (for example, Matbench Discovery) is required.
+
+Orb-v3 models are **compiled** by default and use Pytorch's dynamic batching, which means that they do not need to recompile as graph sizes change. However, the first call to the model will be slower, as the graph is compiled by torch.
+
+
+**We suggest using models trained on OMAT24**, as these models are more performant and the data they are trained on uses newer pseudopotentials in VASP (PBE54 vs PBE52)*. `-mpa` models should be used if compatability with benchmarks (for example, Matbench Discovery) is required.
 
 #### V2 Models
 
@@ -99,11 +102,13 @@ from orb_models.forcefield.base import batch_graphs
 
 device = "cpu"  # or device="cuda"
 orbff = pretrained.orb_v3_conservative_inf_omat(
-  device=device
+  device=device,
   precision="float32-high",   # or "float32-highest" / "float64
 )
 atoms = bulk('Cu', 'fcc', a=3.58, cubic=True)
 graph = atomic_system.ase_atoms_to_atom_graphs(atoms, orbff.system_config, device=device)
+atoms = bulk('Cu', 'fcc', a=3.58, cubic=True)
+graph = atomic_system.ase_atoms_to_atom_graphs(atoms, orbff.system_config, device=device)
 
 # Optionally, batch graphs for faster inference
 # graph = batch_graphs([graph, graph, ...])
@@ -131,7 +136,7 @@ from orb_models.forcefield.calculator import ORBCalculator
 device="cpu" # or device="cuda"
 # or choose another model using ORB_PRETRAINED_MODELS[model_name]()
 orbff = pretrained.orb_v3_conservative_inf_omat(
-  device=device
+  device=device,
   precision="float32-high",   # or "float32-highest" / "float64
 )
 calc = ORBCalculator(orbff, device=device)
diff --git a/orb_models/dataset/base_datasets.py b/orb_models/dataset/base_datasets.py
@@ -10,7 +10,6 @@
 from orb_models.forcefield.base import AtomGraphs
 
 
-
 class AtomsDataset(ABC, Dataset):
     """AtomsDataset.
 
diff --git a/orb_models/forcefield/atomic_system.py b/orb_models/forcefield/atomic_system.py
@@ -25,6 +25,7 @@ class SystemConfig:
     radius: float
     max_num_neighbors: int
 
+
 def atom_graphs_to_ase_atoms(
     graphs: AtomGraphs,
     energy: Optional[torch.Tensor] = None,
@@ -83,6 +84,7 @@ def atom_graphs_to_ase_atoms(
 
     return atoms_list
 
+
 def ase_atoms_to_atom_graphs(
     atoms: ase.Atoms,
     system_config: SystemConfig,
diff --git a/orb_models/forcefield/calculator.py b/orb_models/forcefield/calculator.py
@@ -118,7 +118,9 @@ def calculate(self, atoms=None, properties=None, system_changes=all_changes):
             self.results[property] = to_numpy(out[_property].squeeze())
 
         if self.conservative:
-            self.results["direct_forces"] = self.results["forces"]
-            self.results["direct_stress"] = self.results["stress"]
+            if self.model.forces_name in self.results:
+                self.results["direct_forces"] = self.results[self.model.forces_name]
+            if self.model.stress_name in self.results:
+                self.results["direct_stress"] = self.results[self.model.stress_name]
             self.results["forces"] = self.results[self.model.grad_forces_name]
             self.results["stress"] = self.results[self.model.grad_stress_name]
diff --git a/orb_models/forcefield/direct_regressor.py b/orb_models/forcefield/direct_regressor.py
@@ -3,11 +3,15 @@
 
 from orb_models.forcefield.pair_repulsion import ZBLBasis
 from orb_models.forcefield import base
-from orb_models.forcefield.forcefield_utils import split_prediction, validate_regressor_inputs
+from orb_models.forcefield.forcefield_utils import (
+    split_prediction,
+    validate_regressor_inputs,
+)
 from orb_models.forcefield.gns import MoleculeGNS
 from orb_models.forcefield.load import load_forcefield_state_dict
 from orb_models.forcefield.atomic_system import SystemConfig
 
+
 class DirectForcefieldRegressor(torch.nn.Module):
     """Direct Forcefield regressor."""
 
@@ -70,19 +74,17 @@ def __init__(
                 param.requires_grad = False
 
         if heads_require_grad is not None:
-             for head_name, requires_grad in heads_require_grad.items():
-                 assert head_name in self.heads
-                 for param in self.heads[head_name].parameters():
-                     param.requires_grad = requires_grad
-
+            for head_name, requires_grad in heads_require_grad.items():
+                assert head_name in self.heads
+                for param in self.heads[head_name].parameters():
+                    param.requires_grad = requires_grad
 
         self._system_config = system_config
 
     @property
     def system_config(self) -> SystemConfig:
         return self._system_config
 
-
     def forward(
         self, batch: base.AtomGraphs
     ) -> Dict[str, Union[torch.Tensor, Dict[str, torch.Tensor]]]:
@@ -206,4 +208,4 @@ def _get_raw_repulsion(
         for prop_type in property_types:
             if prop_type in name and "d3" not in name and "d4" not in name:
                 return out_pair_repulsion[prop_type]
-        return None
+        return None
diff --git a/orb_models/forcefield/forcefield_heads.py b/orb_models/forcefield/forcefield_heads.py
@@ -706,4 +706,4 @@ def loss(
             f"{name}_mse_raw": ((raw_pred - target) ** 2).mean(),
         }
 
-        return base.ModelOutput(loss=loss, log=metrics)
+        return base.ModelOutput(loss=loss, log=metrics)
diff --git a/orb_models/forcefield/forcefield_utils.py b/orb_models/forcefield/forcefield_utils.py
@@ -6,7 +6,6 @@
 from orb_models.forcefield.gns import MoleculeGNS
 
 
-
 def validate_regressor_inputs(
     heads: Union[Sequence[torch.nn.Module], Mapping[str, torch.nn.Module]],
     loss_weights: Dict[str, float],
diff --git a/orb_models/forcefield/load.py b/orb_models/forcefield/load.py
@@ -25,7 +25,7 @@ def load_forcefield_state_dict(
             if skip_artifact_reference_energy is True.
 
     NOTE: We assume that the presence of the prefix "heads." in any key of the
-    state_dict implies that the state_dict comes from a DirectForcefieldRegressor 
+    state_dict implies that the state_dict comes from a DirectForcefieldRegressor
     or ConservativeForcefieldRegressor.
     """
     state_dict = dict(state_dict)  # Shallow copy
diff --git a/orb_models/forcefield/pretrained.py b/orb_models/forcefield/pretrained.py
@@ -14,7 +14,13 @@
     gaussian_basis_function,
 )
 from orb_models.forcefield.gns import MoleculeGNS
-from orb_models.forcefield.forcefield_heads import ConfidenceHead, EnergyHead, ForceHead, GraphHead, StressHead
+from orb_models.forcefield.forcefield_heads import (
+    ConfidenceHead,
+    EnergyHead,
+    ForceHead,
+    GraphHead,
+    StressHead,
+)
 from orb_models.forcefield.rbf import BesselBasis
 from orb_models.utils import set_torch_precision
 
@@ -88,7 +94,7 @@ def orb_v2_architecture(
                 activation="ssp",
             ),
             "forces": ForceHead(
-                latent_dim=256, 
+                latent_dim=256,
                 num_mlp_layers=1,
                 mlp_hidden_dim=256,
                 remove_mean=True,
@@ -140,7 +146,7 @@ def orb_v3_conservative_architecture(
     head_mlp_depth: int = 1,
     num_message_passing_steps: int = 5,
     activation: str = "silu",
-    device: Optional[torch.device] = None,
+    device: Optional[Union[torch.device, str]] = None,
     system_config: Optional[SystemConfig] = None,
 ) -> ConservativeForcefieldRegressor:
     """The orb-v3 conservative architecture."""
@@ -189,6 +195,7 @@ def orb_v3_conservative_architecture(
         pair_repulsion=True,
         system_config=system_config,
     )
+    device = get_device(device)
     if device is not None and device != torch.device("cpu"):
         model.cuda(device)
     else:
@@ -268,13 +275,15 @@ def orb_v3_direct_architecture(
         pair_repulsion=True,
         system_config=system_config,
     )
+    device = get_device(device)
     if device is not None and device != torch.device("cpu"):
         model.cuda(device)
     else:
         model = model.cpu()
 
     return model
 
+
 def orb_v3_conservative_20_omat(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-conservative-20-omat-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -291,6 +300,7 @@ def orb_v3_conservative_20_omat(
 
     return model
 
+
 def orb_v3_conservative_inf_omat(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-conservative-inf-omat-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -310,6 +320,7 @@ def orb_v3_conservative_inf_omat(
 
     return model
 
+
 def orb_v3_direct_20_omat(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-direct-20-omat-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -325,6 +336,7 @@ def orb_v3_direct_20_omat(
 
     return model
 
+
 def orb_v3_direct_inf_omat(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-direct-inf-omat-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -344,6 +356,7 @@ def orb_v3_direct_inf_omat(
 
     return model
 
+
 def orb_v3_conservative_20_mpa(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-conservative-20-mpa-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -359,6 +372,7 @@ def orb_v3_conservative_20_mpa(
 
     return model
 
+
 def orb_v3_conservative_inf_mpa(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-conservative-inf-mpa-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -378,6 +392,7 @@ def orb_v3_conservative_inf_mpa(
 
     return model
 
+
 def orb_v3_direct_20_mpa(
     weights_path: str = "https://orbitalmaterials-public-models.s3.us-west-1.amazonaws.com/forcefields/orb-v3/orb-v3-direct-20-mpa-20250404.ckpt",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -393,6 +408,7 @@ def orb_v3_direct_20_mpa(
 
     return model
 
+
 def orb_v3_direct_inf_mpa(
     weights_path: str = "",  # noqa: E501
     device: Union[torch.device, str, None] = None,
@@ -469,7 +485,9 @@ def orb_d3_sm_v2(
 ) -> DirectForcefieldRegressor:
     """Load ORB D3 small v2 with 20 max neighbors, trained on MPTraj + Alexandria."""
     system_config = SystemConfig(radius=6.0, max_num_neighbors=20)
-    model = orb_v2_architecture(num_message_passing_steps=10, device=device, system_config=system_config)
+    model = orb_v2_architecture(
+        num_message_passing_steps=10, device=device, system_config=system_config
+    )
     model = load_model_for_inference(
         model, weights_path, device, precision=precision, compile=compile
     )
@@ -485,7 +503,9 @@ def orb_d3_xs_v2(
 ) -> DirectForcefieldRegressor:
     """Load ORB D3 xs v2 with 20 max neighbors, trained on MPTraj + Alexandria."""
     system_config = SystemConfig(radius=6.0, max_num_neighbors=20)
-    model = orb_v2_architecture(num_message_passing_steps=5, device=device, system_config=system_config)
+    model = orb_v2_architecture(
+        num_message_passing_steps=5, device=device, system_config=system_config
+    )
     model = load_model_for_inference(
         model, weights_path, device, precision=precision, compile=compile
     )
@@ -547,7 +567,7 @@ def orb_v1_mptraj_only(
 
 ORB_PRETRAINED_MODELS = {
     # most performant orb-v3 omat models
-    "orb-v3-conservative-20-omat": orb_v3_conservative_20_omat, 
+    "orb-v3-conservative-20-omat": orb_v3_conservative_20_omat,
     "orb-v3-conservative-inf-omat": orb_v3_conservative_inf_omat,
     "orb-v3-direct-20-omat": orb_v3_direct_20_omat,
     "orb-v3-direct-inf-omat": orb_v3_direct_inf_omat,
diff --git a/tests/forcefield/conftest.py b/tests/forcefield/conftest.py
@@ -167,14 +167,12 @@ def gns_model():
 
 
 @pytest.fixture
-def conservative_regressor(gns_model, energy_head, force_head, stress_head):
+def conservative_regressor(gns_model, energy_head):
     return ConservativeForcefieldRegressor(
-        heads={"energy": energy_head, "forces": force_head, "stress": stress_head},
+        heads={"energy": energy_head},
         model=gns_model,
         loss_weights={
             "energy": 1.0,
-            "forces": 1.0,
-            "stress": 1.0,
             "grad_forces": 1.0,
             "grad_stress": 1.0,
             "rotational_grad": 1.0,
diff --git a/tests/forcefield/test_calculator.py b/tests/forcefield/test_calculator.py
@@ -36,15 +36,11 @@ def test_conservative_calculator(conservative_regressor, shared_fixtures_path):
         conservative_calc.results["stress"],
         nonconservative_calc.results[conservative_calc.model.grad_stress_name],
     )
-    assert np.allclose(
-        conservative_calc.results["direct_forces"],
-        nonconservative_calc.results["forces"],
-    )
-    assert np.allclose(
-        conservative_calc.results["direct_stress"],
-        nonconservative_calc.results["stress"],
-    )
 
+    # Check that the direct forces and stress are not present in the results dict.
+    # This is a sanity check as we do not have multi-headed models.
+    assert "direct_forces" not in conservative_calc.results
+    assert "direct_stress" not in conservative_calc.results
 
 def test_calc_conservative_defaults(conservative_regressor):
 
diff --git a/tests/forcefield/test_conservative.py b/tests/forcefield/test_conservative.py
@@ -14,8 +14,6 @@ def test_regressor_forward(request, conservative_regressor, graph_name):
     graph = request.getfixturevalue(graph_name)
     out = conservative_regressor(graph)
     assert "energy" in out
-    assert "forces" in out
-    assert "stress" in out
     assert "grad_forces" in out
     assert "grad_stress" in out
 
@@ -85,22 +83,10 @@ def test_regressor_predict(batch, conservative_regressor):
     conservative_regressor.eval()
     inference = conservative_regressor.predict(batch)
     assert "energy" in inference
-    assert "forces" in inference
-    assert "stress" in inference
     assert "grad_forces" in inference
     assert "grad_stress" in inference
 
 
-def test_conservative_model_can_distill(batch, conservative_regressor):
-    conservative_regressor.eval()
-    conservative_regressor.distill_direct_heads = True
-    distill_output = conservative_regressor.loss(batch)
-
-    conservative_regressor.distill_direct_heads = False
-    output = conservative_regressor.loss(batch)
-    assert not torch.allclose(output.loss, distill_output.loss)
-
-
 def test_featurization_differentiability_with_conservative_regressor(
     conservative_regressor,
 ):

Original file line number	Diff line number	Diff line change
`@@ -706,4 +706,4 @@ def loss(`
`706`	`706`	`f"{name}_mse_raw": ((raw_pred - target) ** 2).mean(),`
`707`	`707`	`}`
`708`	`708`
`709`		`- return base.ModelOutput(loss=loss, log=metrics)`
	`709`	`+ return base.ModelOutput(loss=loss, log=metrics)`