instadeepai
diff --git a/‎docs/source/api_reference/utils/inference_and_simulation.rst‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/api_reference/utils/inference_and_simulation.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/mlipaudit/benchmark.py‎
Lines changed: 3 additions & 0 deletions b/‎src/mlipaudit/benchmark.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/mlipaudit/benchmarks/bond_length_distribution/bond_length_distribution.py‎
Lines changed: 45 additions & 25 deletions b/‎src/mlipaudit/benchmarks/bond_length_distribution/bond_length_distribution.py‎
Lines changed: 45 additions & 25 deletions
diff --git a/‎src/mlipaudit/benchmarks/conformer_selection/conformer_selection.py‎
Lines changed: 57 additions & 27 deletions b/‎src/mlipaudit/benchmarks/conformer_selection/conformer_selection.py‎
Lines changed: 57 additions & 27 deletions
@@ -11,4 +11,6 @@ Inference and simulation helpers
 
 .. autofunction:: get_simulation_engine
 
+.. autofunction:: run_simulation
+
 .. autoclass:: ASESimulationEngineWithCalculator
@@ -34,10 +34,13 @@ class BenchmarkResult(BaseModel):
     """A base model for all benchmark results.
 
     Attributes:
+        failed: Whether all the simulations or inferences failed
+            and no analysis could be performed. Defaults to False.
         score: The final score for the benchmark between
             0 and 1.
     """
 
+    failed: bool = False
     score: float | None = Field(ge=0, le=1, default=None)
 
 
 
@@ -23,7 +23,7 @@
 from mlipaudit.benchmark import Benchmark, BenchmarkResult, ModelOutput
 from mlipaudit.run_mode import RunMode
 from mlipaudit.scoring import compute_benchmark_score
-from mlipaudit.utils import get_simulation_engine
+from mlipaudit.utils import run_simulation
 from mlipaudit.utils.stability import is_simulation_stable
 
 logger = logging.getLogger("mlipaudit")
@@ -37,12 +37,13 @@
     "temperature_kelvin": 300.0,
 }
 
-SIMULATION_CONFIG_FAST = {
+SIMULATION_CONFIG_DEV = {
     "num_steps": 10,
     "snapshot_interval": 1,
     "num_episodes": 1,
     "temperature_kelvin": 300.0,
 }
+NUM_DEV_SYSTEMS = 2
 
 DEVIATION_SCORE_THRESHOLD = 0.05
 
@@ -75,13 +76,17 @@ class MoleculeModelOutput(BaseModel):
 
     Attributes:
         molecule_name: The name of the molecule.
-        simulation_state: The simulation state.
+        simulation_state: The simulation state. Defaults to None
+            if the simulation failed.
+        failed: Whether the simulation failed on the molecule.
+            Defaults to False.
     """
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     molecule_name: str
-    simulation_state: SimulationState
+    simulation_state: SimulationState | None = None
+    failed: bool = False
 
 
 class BondLengthDistributionModelOutput(ModelOutput):
@@ -90,9 +95,11 @@ class BondLengthDistributionModelOutput(ModelOutput):
 
     Attributes:
         molecules: A list of simulation states for every molecule.
+        num_failed: The number of molecules for which simulation failed.
     """
 
     molecules: list[MoleculeModelOutput]
+    num_failed: int = 0
 
 
 class BondLengthDistributionMoleculeResult(BaseModel):
@@ -105,8 +112,8 @@ class BondLengthDistributionMoleculeResult(BaseModel):
             with each frame corresponding to 1ps of simulation time.
         avg_deviation: The average deviation of the molecule over the
             whole trajectory.
-        failed: Whether the simulation was stable. If not stable, the other
-            attributes will be not be set.
+        failed: Whether the simulation succeeded and was stable. If not,
+            the other attributes will default to None. Defaults to False.
     """
 
     molecule_name: str
@@ -122,7 +129,9 @@ class BondLengthDistributionResult(BenchmarkResult):
     Attributes:
         molecules: The individual results for each molecule in a list.
         avg_deviation: The average of the average deviations for each
-            molecule that was stable. If no stable molecules, will be None.
+            molecule that was stable. If the benchmark failed, will be None.
+        failed: Whether all the simulations or inferences failed
+            and no analysis could be performed. Defaults to False.
         score: The final score for the benchmark between
             0 and 1.
     """
@@ -168,30 +177,36 @@ def run_model(self) -> None:
         the reference structure. The simulation state is stored in the
         `model_output` attribute.
         """
-        molecule_outputs = []
-
         if self.run_mode == RunMode.DEV:
-            md_kwargs = SIMULATION_CONFIG_FAST
+            md_kwargs = SIMULATION_CONFIG_DEV
         else:
             md_kwargs = SIMULATION_CONFIG
 
+        molecule_outputs, num_failed = [], 0
+
         for pattern_name, molecule in self._bond_length_distribution_data.items():
             logger.info("Running MD for %s", pattern_name)
 
             atoms = Atoms(
                 symbols=molecule.atom_symbols,
                 positions=molecule.coordinates,
             )
-            md_engine = get_simulation_engine(atoms, self.force_field, **md_kwargs)
-            md_engine.run()
+            simulation_state = run_simulation(atoms, self.force_field, **md_kwargs)
+
+            if simulation_state is not None:
+                molecule_output = MoleculeModelOutput(
+                    molecule_name=pattern_name, simulation_state=simulation_state
+                )
+            else:
+                molecule_output = MoleculeModelOutput(
+                    molecule_name=pattern_name, failed=True
+                )
+                num_failed += 1
 
-            molecule_output = MoleculeModelOutput(
-                molecule_name=pattern_name, simulation_state=md_engine.state
-            )
             molecule_outputs.append(molecule_output)
 
         self.model_output = BondLengthDistributionModelOutput(
-            molecules=molecule_outputs
+            molecules=molecule_outputs, num_failed=num_failed
         )
 
     def analyze(self) -> BondLengthDistributionResult:
@@ -210,19 +225,22 @@ def analyze(self) -> BondLengthDistributionResult:
         if self.model_output is None:
             raise RuntimeError("Must call run_model() first.")
 
-        results = []
-        num_stable = 0
-        for molecule_output in self.model_output.molecules:
-            trajectory = molecule_output.simulation_state.positions
+        results: list[BondLengthDistributionMoleculeResult] = []
+        num_succeeded = 0
 
-            if not is_simulation_stable(molecule_output.simulation_state):
+        for molecule_output in self.model_output.molecules:
+            if molecule_output.failed or not is_simulation_stable(
+                molecule_output.simulation_state
+            ):
                 molecule_result = BondLengthDistributionMoleculeResult(
                     molecule_name=molecule_output.molecule_name, failed=True
                 )
                 results.append(molecule_result)
                 continue
 
-            num_stable += 1
+            num_succeeded += 1
+
+            trajectory = molecule_output.simulation_state.positions
 
             pattern_indices = self._bond_length_distribution_data[
                 molecule_output.molecule_name
@@ -247,8 +265,10 @@ def analyze(self) -> BondLengthDistributionResult:
             )
             results.append(molecule_result)
 
-        if num_stable == 0:
-            return BondLengthDistributionResult(molecules=results, score=0.0)
+        if num_succeeded == 0:
+            return BondLengthDistributionResult(
+                molecules=results, failed=True, score=0.0
+            )
 
         avg_deviation = statistics.mean(
             r.avg_deviation for r in results if r.avg_deviation is not None
@@ -275,6 +295,6 @@ def _bond_length_distribution_data(self) -> dict[str, Molecule]:
             dataset = Molecules.validate_json(f.read())
 
         if self.run_mode == RunMode.DEV:
-            dataset = dict(list(dataset.items())[:2])
+            dataset = dict(list(dataset.items())[:NUM_DEV_SYSTEMS])
 
         return dataset
@@ -30,14 +30,15 @@
 logger = logging.getLogger("mlipaudit")
 
 WIGGLE_DATASET_FILENAME = "wiggle150_dataset.json"
+NUM_DEV_SYSTEMS = 1
 
 MAE_SCORE_THRESHOLD = 0.5
 RMSE_SCORE_THRESHOLD = 1.5
 
 
 class ConformerSelectionMoleculeResult(BaseModel):
     """Results object for small molecule conformer selection benchmark for a single
-    molecule.
+    molecule. Will have attributes set to None if the inference failed.
 
     Attributes:
         molecule_name: The molecule's name.
@@ -51,31 +52,37 @@ class ConformerSelectionMoleculeResult(BaseModel):
             and reference energy profiles.
         predicted_energy_profile: The predicted energy profile for each conformer.
         reference_energy_profile: The reference energy profiles for each conformer.
+        failed: Whether the inference failed on the molecule.
     """
 
     molecule_name: str
-    mae: NonNegativeFloat
-    rmse: NonNegativeFloat
-    spearman_correlation: float = Field(ge=-1.0, le=1.0)
-    spearman_p_value: float = Field(ge=0.0, le=1.0)
-    predicted_energy_profile: list[float]
-    reference_energy_profile: list[float]
+    mae: NonNegativeFloat | None = None
+    rmse: NonNegativeFloat | None = None
+    spearman_correlation: float | None = Field(ge=-1.0, le=1.0, default=None)
+    spearman_p_value: float | None = Field(ge=0.0, le=1.0, default=None)
+    predicted_energy_profile: list[float] | None = None
+    reference_energy_profile: list[float] | None = None
+    failed: bool = False
 
 
 class ConformerSelectionResult(BenchmarkResult):
     """Results object for small molecule conformer selection benchmark.
 
     Attributes:
         molecules: The individual results for each molecule in a list.
-        avg_mae: The MAE values for all molecules averaged.
-        avg_rmse: The RMSE values for all molecules averaged.
+        avg_mae: The MAE values for all molecules that didn't fail averaged.
+            Is None in the case all the inferences failed.
+        avg_rmse: The RMSE values for all molecules that didn't fail averaged.
+            Is None in the case all the inferences failed.
+        failed: Whether all the simulations or inferences failed
+            and no analysis could be performed. Defaults to False.
        score: The final score for the benchmark between
             0 and 1.
     """
 
     molecules: list[ConformerSelectionMoleculeResult]
-    avg_mae: NonNegativeFloat
-    avg_rmse: NonNegativeFloat
+    avg_mae: NonNegativeFloat | None = None
+    avg_rmse: NonNegativeFloat | None = None
 
 
 class ConformerSelectionMoleculeModelOutput(BaseModel):
@@ -84,20 +91,25 @@ class ConformerSelectionMoleculeModelOutput(BaseModel):
     Attributes:
         molecule_name: The molecule's name.
         predicted_energy_profile: The predicted energy profile for the conformers.
+            Is None if the inference failed on the molecule.
+        failed: Whether the inference failed on the molecule.
     """
 
     molecule_name: str
-    predicted_energy_profile: list[float]
+    predicted_energy_profile: list[float] | None = None
+    failed: bool = False
 
 
 class ConformerSelectionModelOutput(ModelOutput):
     """Stores model outputs for the conformer selection benchmark.
 
     Attributes:
         molecules: Results for each molecule.
+        num_failed: The number of molecules on which inference failed.
     """
 
     molecules: list[ConformerSelectionMoleculeModelOutput]
+    num_failed: int = 0
 
 
 class Conformer(BaseModel):
@@ -160,7 +172,7 @@ def run_model(self) -> None:
         The calculation is performed as a batched inference using the MLIP force field
         directly. The energy profile is stored in the `model_output` attribute.
         """
-        molecule_outputs = []
+        molecule_outputs, num_failed = [], 0
         for structure in self._wiggle150_data:
             logger.info("Running energy calculations for %s", structure.molecule_name)
 
@@ -178,17 +190,23 @@ def run_model(self) -> None:
                 batch_size=16,
             )
 
-            energy_profile_list: list[float] = [
-                prediction.energy for prediction in predictions
-            ]
+            if None in predictions:
+                model_output = ConformerSelectionMoleculeModelOutput(
+                    molecule_name=structure.molecule_name, failed=True
+                )
+                num_failed += 1
 
-            model_output = ConformerSelectionMoleculeModelOutput(
-                molecule_name=structure.molecule_name,
-                predicted_energy_profile=energy_profile_list,
-            )
+            else:
+                energy_profile_list = [prediction.energy for prediction in predictions]  # type: ignore
+                model_output = ConformerSelectionMoleculeModelOutput(
+                    molecule_name=structure.molecule_name,
+                    predicted_energy_profile=energy_profile_list,
+                )
             molecule_outputs.append(model_output)
 
-        self.model_output = ConformerSelectionModelOutput(molecules=molecule_outputs)
+        self.model_output = ConformerSelectionModelOutput(
+            molecules=molecule_outputs, num_failed=num_failed
+        )
 
     def analyze(self) -> ConformerSelectionResult:
         """Calculates the MAE, RMSE and Spearman correlation.
@@ -210,12 +228,21 @@ def analyze(self) -> ConformerSelectionResult:
             conformer.molecule_name: np.array(conformer.dft_energy_profile)
             for conformer in self._wiggle150_data
         }
-
         results = []
+
         for molecule in self.model_output.molecules:
             molecule_name = molecule.molecule_name
-            energy_profile = molecule.predicted_energy_profile
-            energy_profile = np.array(energy_profile)
+
+            if molecule.failed:
+                results.append(
+                    ConformerSelectionMoleculeResult(
+                        molecule_name=molecule_name, failed=True
+                    )
+                )
+                continue
+
+            energy_profile = np.array(molecule.predicted_energy_profile)
+
             ref_energy_profile = np.array(reference_energy_profiles[molecule_name])
 
             min_ref_energy = np.min(ref_energy_profile)
@@ -251,8 +278,11 @@ def analyze(self) -> ConformerSelectionResult:
 
             results.append(molecule_result)
 
-        avg_mae = statistics.mean(r.mae for r in results)
-        avg_rmse = statistics.mean(r.rmse for r in results)
+        if self.model_output.num_failed == len(self.model_output.molecules):
+            return ConformerSelectionResult(molecules=results, failed=True, score=0.0)
+
+        avg_mae = statistics.mean(r.mae for r in results if r.mae is not None)
+        avg_rmse = statistics.mean(r.rmse for r in results if r.rmse is not None)
 
         score = compute_benchmark_score(
             [[r.mae for r in results], [r.rmse for r in results]],
@@ -276,6 +306,6 @@ def _wiggle150_data(self) -> list[Conformer]:
             wiggle150_data = Conformers.validate_json(f.read())
 
         if self.run_mode == RunMode.DEV:
-            wiggle150_data = wiggle150_data[:1]
+            wiggle150_data = wiggle150_data[:NUM_DEV_SYSTEMS]
 
         return wiggle150_data