instadeepai
diff --git a/‎docs/source/api_reference/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/api_reference/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/api_reference/run_mode.rst‎
Lines changed: 8 additions & 0 deletions b/‎docs/source/api_reference/run_mode.rst‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source/tutorials/cli/index.rst‎
Lines changed: 5 additions & 3 deletions b/‎docs/source/tutorials/cli/index.rst‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎docs/source/tutorials/new_benchmark/index.rst‎
Lines changed: 7 additions & 0 deletions b/‎docs/source/tutorials/new_benchmark/index.rst‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/mlipaudit/benchmark.py‎
Lines changed: 16 additions & 6 deletions b/‎src/mlipaudit/benchmark.py‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎src/mlipaudit/bond_length_distribution/bond_length_distribution.py‎
Lines changed: 3 additions & 2 deletions b/‎src/mlipaudit/bond_length_distribution/bond_length_distribution.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/mlipaudit/conformer_selection/conformer_selection.py‎
Lines changed: 2 additions & 1 deletion b/‎src/mlipaudit/conformer_selection/conformer_selection.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/mlipaudit/dihedral_scan/dihedral_scan.py‎
Lines changed: 2 additions & 1 deletion b/‎src/mlipaudit/dihedral_scan/dihedral_scan.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/mlipaudit/folding_stability/folding_stability.py‎
Lines changed: 5 additions & 2 deletions b/‎src/mlipaudit/folding_stability/folding_stability.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎src/mlipaudit/main.py‎
Lines changed: 7 additions & 5 deletions b/‎src/mlipaudit/main.py‎
Lines changed: 7 additions & 5 deletions
@@ -13,6 +13,7 @@ Base classes and utilities
 
     benchmark
     io
+    run_mode
     utils/trajectory_helpers
 
 Benchmark implementations
 
@@ -0,0 +1,8 @@
+.. _run_mode:
+
+.. module:: mlipaudit.run_mode
+
+Run Mode
+========
+
+.. autoclass:: RunMode
@@ -39,9 +39,11 @@ The tool has the following command line options:
   list of benchmark names (e.g., ``dihedral_scan``, ``ring_planarity``) or ``all`` to
   run all available benchmarks which is also the default which means that if this flag
   is not used, all benchmarks will be run.
-* ``--fast-dev-run``: *Optional* setting that allows to run a very minimal version of
-  each benchmark for development and testing purposes. The default behavior is that it
-  is not set.
+* ``--run-mode``: *Optional* setting that allows to run faster versions of the
+  benchmark suite. The default option ``standard`` which runs the entire suite.
+  The option ``fast`` runs a slightly faster version for some of the very long-running
+  benchmarks. The option ``dev`` runs a very minimal version of each benchmark for
+  development and testing purposes.
 
 For example, if you want to run the entire benchmark suite for two models, say
 ``visnet_1`` and ``mace_2``, use this command:
 
@@ -77,6 +77,13 @@ members to override are:
 Hence, to add a new benchmark, three classes must be implemented, the benchmark, model
 output, and results class.
 
+Note that we also recommend that a new benchmarks implements a very minimal version
+of itself that is run if ``self.run_mode == RunMode.DEV``. For very long-running
+benchmarks, we also recommend to implement a version for
+``self.run_mode == RunMode.FAST`` that may differ
+from ``self.run_mode == RunMode.STANDARD``, however, for most benchmarks this may
+not be necessary.
+
 Minimal example implementation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 
@@ -16,14 +16,17 @@
 import zipfile
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal, TypeAlias
 
 from ase import Atom
 from huggingface_hub import hf_hub_download
 from mlip.models import ForceField
 from pydantic import BaseModel
 
 from mlipaudit.exceptions import ChemicalElementsMissingError
+from mlipaudit.run_mode import RunMode
+
+RunModeAsString: TypeAlias = Literal["dev", "fast", "standard"]
 
 
 class BenchmarkResult(BaseModel):
@@ -75,7 +78,7 @@ def __init__(
         self,
         force_field: ForceField,
         data_input_dir: str | os.PathLike = "./data",
-        fast_dev_run: bool = False,
+        run_mode: RunMode | RunModeAsString = RunMode.STANDARD,
     ) -> None:
         """Initializes the benchmark.
 
@@ -85,19 +88,26 @@ def __init__(
                 "./data". If the subdirectory "{data_input_dir}/{benchmark_name}"
                 exists, the benchmark expects the relevant data to be in there,
                 otherwise it will download it from HuggingFace.
-            fast_dev_run: Whether to do a fast developer run. Subclasses
-                should ensure that when `True`, their benchmark runs in a
+            run_mode: Whether to run the standard benchmark length, a faster version,
+                or a very fast development version. Subclasses
+                should ensure that when `RunMode.DEV`, their benchmark runs in a
                 much shorter timeframe, by running on a reduced number of
-                test cases, for instance.
+                test cases, for instance. Implementing `RunMode.FAST` being different
+                from `RunMode.STANDARD` is optional and only recommended for very
+                long-running benchmarks. This argument can also be passed as a string
+                "dev", "fast", or "standard".
 
         Raises:
             ChemicalElementsMissingError: If initialization is attempted
                 with a force field that cannot perform inference on the
                 required elements.
         """
+        self.run_mode = run_mode
+        if not isinstance(self.run_mode, RunMode):
+            self.run_mode = RunMode(run_mode)
+
         self.force_field = force_field
         self._handle_missing_element_types()
-        self.fast_dev_run = fast_dev_run
         self.data_input_dir = Path(data_input_dir)
 
         self.model_output: ModelOutput | None = None
 
@@ -22,6 +22,7 @@
 from pydantic import BaseModel, ConfigDict, TypeAdapter
 
 from mlipaudit.benchmark import Benchmark, BenchmarkResult, ModelOutput
+from mlipaudit.run_mode import RunMode
 
 logger = logging.getLogger("mlipaudit")
 
@@ -152,7 +153,7 @@ def run_model(self) -> None:
         """
         molecule_outputs = []
 
-        if self.fast_dev_run:
+        if self.run_mode == RunMode.DEV:
             md_config = JaxMDSimulationEngine.Config(**SIMULATION_CONFIG_FAST)
         else:
             md_config = JaxMDSimulationEngine.Config(**SIMULATION_CONFIG)
@@ -231,7 +232,7 @@ def _bond_length_distribution_data(self) -> dict[str, Molecule]:
         ) as f:
             dataset = Molecules.validate_json(f.read())
 
-        if self.fast_dev_run:
+        if self.run_mode == RunMode.DEV:
             dataset = dict(list(dataset.items())[:2])
 
         return dataset
@@ -24,6 +24,7 @@
 from sklearn.metrics import mean_absolute_error, root_mean_squared_error
 
 from mlipaudit.benchmark import Benchmark, BenchmarkResult, ModelOutput
+from mlipaudit.run_mode import RunMode
 
 logger = logging.getLogger("mlipaudit")
 
@@ -255,7 +256,7 @@ def _wiggle150_data(self) -> list[Conformer]:
         ) as f:
             wiggle150_data = Conformers.validate_json(f.read())
 
-        if self.fast_dev_run:
+        if self.run_mode == RunMode.DEV:
             wiggle150_data = wiggle150_data[:1]
 
         return wiggle150_data
@@ -25,6 +25,7 @@
 from sklearn.metrics import mean_absolute_error, root_mean_squared_error
 
 from mlipaudit.benchmark import Benchmark, BenchmarkResult, ModelOutput
+from mlipaudit.run_mode import RunMode
 
 logger = logging.getLogger("mlipaudit")
 
@@ -287,7 +288,7 @@ def _torsion_net_500(self) -> dict[str, Fragment]:
         ) as f:
             dataset = Fragments.validate_json(f.read())
 
-        if self.fast_dev_run:
+        if self.run_mode == RunMode.DEV:
             dataset = {
                 "fragment_001": dataset["fragment_001"],
                 "fragment_002": dataset["fragment_002"],
 
@@ -27,6 +27,7 @@
     compute_tm_scores_and_rmsd_values,
     get_match_secondary_structure,
 )
+from mlipaudit.run_mode import RunMode
 from mlipaudit.utils import (
     create_ase_trajectory_from_simulation_state,
     create_mdtraj_trajectory_from_simulation_state,
@@ -161,8 +162,10 @@ def run_model(self) -> None:
             simulation_states=[],
         )
 
-        structure_names = STRUCTURE_NAMES[:1] if self.fast_dev_run else STRUCTURE_NAMES
-        if self.fast_dev_run:
+        structure_names = (
+            STRUCTURE_NAMES[:1] if self.run_mode == RunMode.DEV else STRUCTURE_NAMES
+        )
+        if self.run_mode == RunMode.DEV:
             md_config = JaxMDSimulationEngine.Config(**SIMULATION_CONFIG_FAST)
         else:
             md_config = JaxMDSimulationEngine.Config(**SIMULATION_CONFIG)
 
@@ -29,6 +29,7 @@
 from mlipaudit.noncovalent_interactions import NoncovalentInteractionsBenchmark
 from mlipaudit.reactivity import ReactivityBenchmark
 from mlipaudit.ring_planarity import RingPlanarityBenchmark
+from mlipaudit.run_mode import RunMode
 from mlipaudit.sampling import SamplingBenchmark
 from mlipaudit.scaling import ScalingBenchmark
 from mlipaudit.small_molecule_minimization import SmallMoleculeMinimizationBenchmark
@@ -89,14 +90,15 @@ def _parser() -> ArgumentParser:
         help="List of benchmarks to run.",
     )
     parser.add_argument(
-        "--fast-dev-run",
-        action="store_true",
-        help="run the benchmarks in fast-dev-run mode",
+        "--run-mode",
+        required=False,
+        choices=[mode.value for mode in RunMode],
+        default=RunMode.STANDARD.value,
+        help="mode to run the benchmarks in",
     )
     return parser
 
 
-# TODO: We should probably handle this in a different (nicer) way
 def _model_class_from_name(model_name: str) -> type[MLIPNetwork]:
     if "visnet" in model_name:
         return Visnet
@@ -170,7 +172,7 @@ def main():
             benchmark = benchmark_class(
                 force_field=force_field,
                 data_input_dir=args.input,
-                fast_dev_run=args.fast_dev_run,
+                run_mode=args.run_mode,
             )
             benchmark.run_model()
             result = benchmark.analyze()