instadeepai
diff --git a/‎docs/source/api_reference/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/api_reference/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/tutorials/new_benchmark/index.rst‎
Lines changed: 9 additions & 1 deletion b/‎docs/source/tutorials/new_benchmark/index.rst‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎src/mlipaudit/benchmark.py‎
Lines changed: 7 additions & 0 deletions b/‎src/mlipaudit/benchmark.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/mlipaudit/benchmarks/folding_stability/folding_stability.py‎
Lines changed: 48 additions & 17 deletions b/‎src/mlipaudit/benchmarks/folding_stability/folding_stability.py‎
Lines changed: 48 additions & 17 deletions
diff --git a/‎src/mlipaudit/benchmarks/sampling/helpers.py‎
Lines changed: 7 additions & 1 deletion b/‎src/mlipaudit/benchmarks/sampling/helpers.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/mlipaudit/benchmarks/sampling/sampling.py‎
Lines changed: 62 additions & 35 deletions b/‎src/mlipaudit/benchmarks/sampling/sampling.py‎
Lines changed: 62 additions & 35 deletions
@@ -35,6 +35,7 @@ Benchmark implementations
     small_molecules/radial_distribution
     small_molecules/solvent_radial_distribution
     small_molecules/reactivity
+    small_molecules/nudged_elastic_band
     biomolecules/folding_stability
     biomolecules/sampling
     general/stability
 
@@ -127,9 +127,17 @@ Here is an example of a very minimal new benchmark implementation:
 The data loading as a cached property is only recommended if the loaded data
 is need in both the `run_model()` and the `analyze()` functions.
 
-Note that the functions `_compute_energies_blackbox` and `_analyze_blackbox`` are
+Note that the functions `_compute_energies_blackbox` and `_analyze_blackbox` are
 placeholders for the actual implementations.
 
+Another class attribute that can be specified optionally is `reusable_output_id`,
+which is `None` by default. It can be used to signal that two benchmarks use the exact
+same `run_model()` method and the exact same signature for the model output class.
+This ID should be of type tuple with the names of the benchmarks in it, see the
+benchmarks `Sampling` and `FoldingStability` for an example of this. See the source code
+of the main benchmarking script for how it reuses the model output of one for the other
+benchmark without rerunning any simulation or inference.
+
 **Furthermore, you need to add an import for your benchmark to the**
 `src/mlipaudit/benchmarks/__init__.py` **file such that the benchmark can be**
 **automatically picked up by the CLI tool.**
 
@@ -76,6 +76,11 @@ class Benchmark(ABC):
             if there are some element types that the model cannot handle. If False,
             the benchmark must have its own custom logic to handle missing element
             types. Defaults to True.
+        reusable_output_id: An optional ID that references other benchmarks with
+            identical input systems and `ModelOutput` signatures (in form of a tuple).
+            If present, a user or the CLI can make use of this information to reuse
+            cached model outputs from another benchmark carrying the same ID instead of
+            rerunning simulations or inference.
     """
 
     name: str = ""
@@ -86,6 +91,8 @@ class Benchmark(ABC):
     required_elements: set[str] | None = None
     skip_if_elements_missing: bool = True
 
+    reusable_output_id: tuple[str, ...] | None = None
+
     def __init__(
         self,
         force_field: ForceField | ASECalculator,
 
@@ -33,6 +33,7 @@
     create_mdtraj_trajectory_from_simulation_state,
     get_simulation_engine,
 )
+from mlipaudit.utils.simulation import REUSABLE_BIOMOLECULES_OUTPUTS_ID
 from mlipaudit.utils.stability import is_simulation_stable
 
 logger = logging.getLogger("mlipaudit")
@@ -50,16 +51,16 @@
 }
 
 SIMULATION_CONFIG = {
-    "num_steps": 100_000,
-    "snapshot_interval": 100,
-    "num_episodes": 100,
+    "num_steps": 250_000,
+    "snapshot_interval": 10_000,
+    "num_episodes": 25,
     "temperature_kelvin": 300.0,
 }
 
 SIMULATION_CONFIG_DEV = {
-    "num_steps": 10,
+    "num_steps": 5,
     "snapshot_interval": 1,
-    "num_episodes": 10,
+    "num_episodes": 1,
     "temperature_kelvin": 300.0,
 }
 
@@ -168,6 +169,11 @@ class FoldingStabilityBenchmark(Benchmark):
             if there are some atomic element types that the model cannot handle. If
             False, the benchmark must have its own custom logic to handle missing atomic
             element types. For this benchmark, the attribute is set to True.
+        reusable_output_id: An optional ID that references other benchmarks with
+            identical input systems and `ModelOutput` signatures (in form of a tuple).
+            If present, a user or the CLI can make use of this information to reuse
+            cached model outputs from another benchmark carrying the same ID instead of
+            rerunning simulations or inference.
     """
 
     name = "folding_stability"
@@ -177,16 +183,13 @@ class FoldingStabilityBenchmark(Benchmark):
 
     required_elements = {"H", "N", "O", "S", "C"}
 
+    reusable_output_id = REUSABLE_BIOMOLECULES_OUTPUTS_ID
+
     def run_model(self) -> None:
         """Run an MD simulation for each biosystem.
 
         The simulation results are stored in the `model_output` attribute.
         """
-        self.model_output = FoldingStabilityModelOutput(
-            structure_names=[],
-            simulation_states=[],
-        )
-
         if self.run_mode == RunMode.DEV:
             structure_names = STRUCTURE_NAMES[:1]
         elif self.run_mode == RunMode.FAST:
@@ -199,10 +202,17 @@ def run_model(self) -> None:
         else:
             md_kwargs = SIMULATION_CONFIG
 
+        self.model_output = FoldingStabilityModelOutput(
+            structure_names=[],
+            simulation_states=[],
+        )
+
         for structure_name in structure_names:
             logger.info("Running MD for %s", structure_name)
             xyz_filename = structure_name + ".xyz"
-            atoms = ase_read(self.data_input_dir / self.name / xyz_filename)
+            atoms = ase_read(
+                self.data_input_dir / self.name / "starting_structures" / xyz_filename
+            )
 
             md_engine = get_simulation_engine(
                 atoms, self.force_field, box=BOX_SIZES[structure_name], **md_kwargs
@@ -230,6 +240,8 @@ def analyze(self) -> FoldingStabilityResult:
         if self.model_output is None:
             raise RuntimeError("Must call run_model() first.")
 
+        self._assert_structure_names_in_model_output()
+
         molecule_results = []
         num_stable = 0
 
@@ -246,13 +258,15 @@ def analyze(self) -> FoldingStabilityResult:
                 continue
 
             num_stable += 1
-
-            topology_filename = structure_name + ".pdb"
-            ref_filename = structure_name + "_ref.pdb"
+            box_size = BOX_SIZES[structure_name]
 
             mdtraj_traj_solv = create_mdtraj_trajectory_from_simulation_state(
                 simulation_state,
-                topology_path=self.data_input_dir / self.name / topology_filename,
+                topology_path=self.data_input_dir
+                / self.name
+                / "pdb_reference_structures"
+                / f"{structure_name}.pdb",
+                cell_lengths=box_size,  # type: ignore
             )
             ase_traj_solv = create_ase_trajectory_from_simulation_state(
                 simulation_state
@@ -271,14 +285,20 @@ def analyze(self) -> FoldingStabilityResult:
             # 2. Match in secondary structure (from DSSP)
             match_secondary_structure = get_match_secondary_structure(
                 mdtraj_traj,
-                ref_path=self.data_input_dir / self.name / ref_filename,
+                ref_path=self.data_input_dir
+                / self.name
+                / "pdb_reference_structures"
+                / f"{structure_name}_ref.pdb",
                 simplified=False,
             )
 
             # 3. TM-score and RMSD
             tm_scores, rmsd_values = compute_tm_scores_and_rmsd_values(
                 mdtraj_traj,
-                self.data_input_dir / self.name / ref_filename,
+                self.data_input_dir
+                / self.name
+                / "pdb_reference_structures"
+                / f"{structure_name}_ref.pdb",
             )
 
             initial_rg = rg_values[0]
@@ -333,3 +353,14 @@ def analyze(self) -> FoldingStabilityResult:
             ),
             score=score,
         )
+
+    def _assert_structure_names_in_model_output(self) -> None:
+        """Asserts whether model output structure names are fine as potentially they
+        have been transferred from a different benchmark.
+        """
+        assert set(self.model_output.structure_names).issubset(STRUCTURE_NAMES)  # type: ignore
+        assert len(self.model_output.structure_names) == (  # type: ignore
+            1
+            if self.run_mode == RunMode.DEV
+            else (2 if self.run_mode == RunMode.FAST else len(STRUCTURE_NAMES))
+        )
@@ -188,7 +188,13 @@ def get_all_dihedrals_from_trajectory(
                 dihedrals[residue] = {}
             dihedrals[residue][dihedral_name] = angles_deg[:, i]
 
-    return dihedrals
+    # Drop residues which don't contain both backbone dihedrals phi and psi
+    filtered_dihedrals = {}
+    for residue, dihedrals in dihedrals.items():
+        if not ("phi" in dihedrals) ^ ("psi" in dihedrals):
+            filtered_dihedrals[residue] = dihedrals
+
+    return filtered_dihedrals
 
 
 def identify_outlier_data_points(
 
@@ -35,34 +35,35 @@
     create_mdtraj_trajectory_from_simulation_state,
     get_simulation_engine,
 )
+from mlipaudit.utils.simulation import REUSABLE_BIOMOLECULES_OUTPUTS_ID
 from mlipaudit.utils.stability import is_simulation_stable
 
 logger = logging.getLogger("mlipaudit")
 
 STRUCTURE_NAMES = [
-    "thr_ile_solv",
-    "asn_asp_solv",
-    "tyr_trp_solv",
+    "chignolin_1uao_xray",
+    "trp_cage_2jof_xray",
+    "orexin_beta_1cq0_nmr",
 ]
 
-CUBIC_BOX_SIZES = {
-    "thr_ile_solv": 21.802,
-    "asn_asp_solv": 21.806,
-    "tyr_trp_solv": 24.012,
+BOX_SIZES = {
+    "chignolin_1uao_xray": [23.98, 22.45, 20.68],
+    "trp_cage_2jof_xray": [29.33, 29.74, 23.59],
+    "orexin_beta_1cq0_nmr": [40.30, 29.56, 33.97],
 }
 
 SIMULATION_CONFIG = {
-    "num_steps": 150_000,
-    "snapshot_interval": 1000,
-    "num_episodes": 150,
-    "temperature_kelvin": 350.0,
+    "num_steps": 250_000,
+    "snapshot_interval": 10_000,
+    "num_episodes": 25,
+    "temperature_kelvin": 300.0,
 }
 
-SIMULATION_CONFIG_FAST = {
-    "num_steps": 1,
+SIMULATION_CONFIG_DEV = {
+    "num_steps": 5,
     "snapshot_interval": 1,
     "num_episodes": 1,
-    "temperature_kelvin": 350.0,
+    "temperature_kelvin": 300.0,
 }
 
 RESNAME_TO_BACKBONE_RESIDUE_TYPE = {
@@ -260,6 +261,11 @@ class SamplingBenchmark(Benchmark):
             if there are some atomic element types that the model cannot handle. If
             False, the benchmark must have its own custom logic to handle missing atomic
             element types. For this benchmark, the attribute is set to True.
+        reusable_output_id: An optional ID that references other benchmarks with
+            identical input systems and `ModelOutput` signatures (in form of a tuple).
+            If present, a user or the CLI can make use of this information to reuse
+            cached model outputs from another benchmark carrying the same ID instead of
+            rerunning simulations or inference.
     """
 
     name = "sampling"
@@ -269,36 +275,37 @@ class SamplingBenchmark(Benchmark):
 
     required_elements = {"N", "H", "O", "S", "C"}
 
+    reusable_output_id = REUSABLE_BIOMOLECULES_OUTPUTS_ID
+
     def run_model(self) -> None:
         """Run an MD simulation for each system."""
-        self.model_output = SamplingModelOutput(
-            structure_names=[],
-            simulation_states=[],
-        )
-
         if self.run_mode == RunMode.DEV:
-            md_config_dict = SIMULATION_CONFIG_FAST
-            structure_names = ["thr_ile_solv"]
+            structure_names = STRUCTURE_NAMES[:1]
         elif self.run_mode == RunMode.FAST:
-            md_config_dict = SIMULATION_CONFIG
-            structure_names = ["thr_ile_solv", "asn_asp_solv"]
+            structure_names = STRUCTURE_NAMES[:2]
         else:
-            md_config_dict = SIMULATION_CONFIG
             structure_names = STRUCTURE_NAMES
 
+        if self.run_mode == RunMode.DEV:
+            md_kwargs = SIMULATION_CONFIG_DEV
+        else:
+            md_kwargs = SIMULATION_CONFIG
+
+        self.model_output = SamplingModelOutput(
+            structure_names=[],
+            simulation_states=[],
+        )
+
         for structure_name in structure_names:
             logger.info("Running MD for %s", structure_name)
             xyz_filename = structure_name + ".xyz"
-            box_size = CUBIC_BOX_SIZES[structure_name]
-            md_kwargs = dict(
-                box=box_size,
-                **md_config_dict,
-            )
             atoms = ase_read(
                 self.data_input_dir / self.name / "starting_structures" / xyz_filename
             )
 
-            md_engine = get_simulation_engine(atoms, self.force_field, **md_kwargs)
+            md_engine = get_simulation_engine(
+                atoms, self.force_field, box=BOX_SIZES[structure_name], **md_kwargs
+            )
             md_engine.run()
 
             final_state = md_engine.state
@@ -317,6 +324,8 @@ def analyze(self) -> SamplingResult:
         if self.model_output is None:
             raise RuntimeError("Must call run_model() first.")
 
+        self._assert_structure_names_in_model_output()
+
         systems = []
         skipped_systems = []
 
@@ -359,7 +368,7 @@ def analyze(self) -> SamplingResult:
                 continue
 
             num_stable += 1
-            box_size = CUBIC_BOX_SIZES[structure_name]
+            box_size = BOX_SIZES[structure_name]
 
             trajectory = create_mdtraj_trajectory_from_simulation_state(
                 simulation_state,
@@ -369,7 +378,7 @@ def analyze(self) -> SamplingResult:
                     / "pdb_reference_structures"
                     / f"{structure_name}.pdb"
                 ),
-                cell_lengths=(box_size, box_size, box_size),
+                cell_lengths=box_size,  # type: ignore
             )
 
             dihedrals_data = get_all_dihedrals_from_trajectory(trajectory)
@@ -704,17 +713,24 @@ def _get_sampled_distributions(
 
         unique_residue_names = set([residue.name for residue in dihedrals_data.keys()])
 
+        dihedrals_per_unique_name: dict[str, dict[str, np.ndarray]] = {}
+        for residue, dihedrals in dihedrals_data.items():
+            if residue.name not in dihedrals_per_unique_name:
+                dihedrals_per_unique_name[residue.name] = defaultdict(list)
+            for dihedral_type, angle_list in dihedrals.items():
+                dihedrals_per_unique_name[residue.name][dihedral_type].extend(
+                    angle_list
+                )
+
         for residue_name in unique_residue_names:
             if not backbone:
                 dihedral_keys = self._get_allowed_sidechain_dihedral_keys(residue_name)
                 if len(dihedral_keys) == 0:
                     continue
 
             sampled_distributions[residue_name] = np.column_stack([
-                dihedrals_data[residue][dihedral_key]
-                for residue in dihedrals_data.keys()
+                dihedrals_per_unique_name[residue_name][dihedral_key]
                 for dihedral_key in dihedral_keys
-                if residue.name == residue_name
             ])
 
         return sampled_distributions
@@ -779,3 +795,14 @@ def _average_over_residues(
             The average metrics.
         """
         return np.mean(list(metrics_per_residue.values()))
+
+    def _assert_structure_names_in_model_output(self) -> None:
+        """Asserts whether model output structure names are fine as potentially they
+        have been transferred from a different benchmark.
+        """
+        assert set(self.model_output.structure_names).issubset(STRUCTURE_NAMES)  # type: ignore
+        assert len(self.model_output.structure_names) == (  # type: ignore
+            1
+            if self.run_mode == RunMode.DEV
+            else (2 if self.run_mode == RunMode.FAST else len(STRUCTURE_NAMES))
+        )