instadeepai
diff --git a/‎.github/workflows/deploy_docs.yaml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/deploy_docs.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎CHANGELOG
Lines changed: 13 additions & 0 deletions b/‎CHANGELOG
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/api_reference/training/training_io_handling.rst
Lines changed: 2 additions & 0 deletions b/‎docs/source/api_reference/training/training_io_handling.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/installation/index.rst
Lines changed: 1 addition & 1 deletion b/‎docs/source/installation/index.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/user_guide/training.rst
Lines changed: 27 additions & 6 deletions b/‎docs/source/user_guide/training.rst
Lines changed: 27 additions & 6 deletions
diff --git a/‎mlip/models/loss.py
Lines changed: 34 additions & 0 deletions b/‎mlip/models/loss.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎mlip/models/loss_helpers.py
Lines changed: 25 additions & 115 deletions b/‎mlip/models/loss_helpers.py
Lines changed: 25 additions & 115 deletions
diff --git a/‎mlip/training/__init__.py
Lines changed: 5 additions & 1 deletion b/‎mlip/training/__init__.py
Lines changed: 5 additions & 1 deletion
@@ -21,6 +21,7 @@ jobs:
           pip install -U pip setuptools
           pip install poetry==${POETRY_VERSION}
           poetry install
+          poetry run pip install git+https://github.com/jax-md/jax-md.git
 
       - name: Sphinx build
         run: |
 
@@ -1,5 +1,18 @@
 # Changelog
 
+## Release 0.1.2
+
+- Fixing the computation of metrics during training, by reweighting the metrics of
+  each batch to account for a varying number of real graphs per batch; this results
+  in the metrics being independent of the batching strategy and number of GPUs employed
+- In addition to the point above, fixing the computation of RMSE metrics by now
+  only computing MSE metrics in the loss and taking the square root at the very end
+  when logging
+- Deleting relative and 95-percentile metrics, as they are not straightforward to
+  compute on-the-fly with our dynamic batching strategy; we recommend to compute them
+  separately for a model checkpoint if necessary
+- Small amount of modifications to README and documentation
+
 ## Release 0.1.1
 
 - Small amount of modifications to README and documentation
 
@@ -56,7 +56,7 @@ more information.
 At time of release, the following install command is supported:
 
 ```bash
-pip install -U "jax[cuda12]"
+pip install -U "jax[cuda12]==0.4.33"
 ```
 
 Note that using the TPU version of *jaxlib* is, in principle, also supported by
@@ -169,7 +169,7 @@ Scott Cameron, Louis Robinson, Tom Barrett, and Alex Laterre.
 
 ## 📚 Citing our work
 
-We kindly request to cite [our white paper](https://arxiv.org/abs/2505.22397)
+We kindly request that you to cite [our white paper](https://arxiv.org/abs/2505.22397)
 when using this library:
 
 C. Brunken, O. Peltre, H. Chomet, L. Walewski, M. McAuliffe, V. Heyraud,
 
@@ -30,3 +30,5 @@ IO handling during training
     .. autofunction:: log_metrics_to_table
 
     .. autofunction:: log_metrics_to_line
+
+    .. autofunction:: convert_mse_to_rmse_in_logs
@@ -19,7 +19,7 @@ At time of release, the following install command is supported:
 
 .. code-block:: bash
 
-    pip install -U "jax[cuda12]"
+    pip install -U "jax[cuda12]==0.4.33"
 
 Note that using the TPU version of *jaxlib* is, in principle, also supported by
 this library. However, it has not been thoroughly tested and should therefore be
 
@@ -82,11 +82,11 @@ Loss
 
 All losses must be implemented as derived classes of
 :py:class:`Loss <mlip.models.loss.Loss>`. We currently implement two losses, the
-Mean-Squared-Error loss (:py:class:`Loss <mlip.models.loss.MSELoss>`), and the
-Huber loss (:py:class:`Loss <mlip.models.loss.HuberLoss>`), which are both losses
+Mean-Squared-Error loss (:py:class:`MSELoss <mlip.models.loss.MSELoss>`), and the
+Huber loss (:py:class:`HuberLoss <mlip.models.loss.HuberLoss>`), which are both losses
 that are derived from a loss that computes errors for energies, forces, and stress,
 and weights them according to some weighting schedule that can depend on the epoch
-number (base class: :py:class:`Loss <mlip.models.loss.WeightedEFSLoss>`).
+number (base class: :py:class:`WeightedEFSLoss <mlip.models.loss.WeightedEFSLoss>`).
 
 If one wants to use the MSE loss for training, simply run this code to initialize it:
 
@@ -105,7 +105,16 @@ If one wants to use the MSE loss for training, simply run this code to initializ
 
 For our two implemented losses, we also allow for computation of more extended metrics
 by setting the `extended_metrics` argument to `True` in the loss constructor.
-By default, it is `False`.
+By default, it is `False`. See the documentation of
+the :py:class:`call method <mlip.models.loss.WeightedEFSLoss.__call__>` of the class
+:py:class:`WeightedEFSLoss <mlip.models.loss.WeightedEFSLoss>` for more information on
+the returned metrics.
+
+Furthermore, note that even though the loss class is supposed to provide these metrics
+averaged just over a given input batch, we reweight these metrics based on the number
+of real (not dummy) graphs per batch in the training loop, such that the
+resulting metrics that are logged during training are accurately averaged
+over the whole dataset.
 
 .. _training_optimizer:
 
@@ -119,9 +128,13 @@ however, this library also has a specialized pipeline that has been inspired by
 `this <https://github.com/ACEsuit/mace>`_ PyTorch MACE implementation.
 It is configurable via a
 :py:class:`OptimizerConfig <mlip.training.optimizer_config.OptimizerConfig>` object that
-has sensible defaults set for training MLIP models.
+has sensible defaults set for training MLIP models. However, we suggest to also check
+out `our white paper <https://arxiv.org/abs/2505.22397>`_ for recommendations for
+sensible ways to adapt the defaults for specific models, for instance, ViSNet and
+NequIP seem to be more prone to NaNs with the default learning rate and benefit from
+using a smaller one such as ``1e-4``.
 
-This default MLIP optimizer can be set up like this:
+The default MLIP optimizer can be set up like this:
 
 .. code-block:: python
 
@@ -206,6 +219,14 @@ which prints the training metrics to the console in a nice table format (using
 :py:func:`log_metrics_to_line() <mlip.training.training_loggers.log_metrics_to_line>`,
 which logs the metrics in a single line.
 
+These logging functions automatically convert any MSE metrics to RMSE for easier
+interpretation. Internally, we only keep track of MSE instead of RMSE because we must
+ensure that the square root is taken at the very end and not before any averaging
+across batches or devices happens. If one desires to do the same conversion in their
+custom logging function, see
+:py:func:`convert_mse_to_rmse_in_logs() <mlip.training.training_loggers.convert_mse_to_rmse_in_logs>`,
+which is a helper function we provide for this task.
+
 Note that it is possible to omit the `io_handler` argument in the
 :py:class:`TrainingLoop <mlip.training.training_loop.TrainingLoop>` class. In that case,
 a default IO handler is set up internally and used. This IO handler does not include
 
@@ -98,6 +98,40 @@ def __call__(
         epoch: int,
         eval_metrics: bool = False,
     ) -> tuple[float, dict[str, float]]:
+        """The call function that outputs the loss and metrics (auxiliary data).
+
+        The metrics returned by this class if `eval_metrics=False`:
+          - average loss per structure
+          - energy, forces, and stress weighting factors
+
+        The metrics returned by this class if `eval_metrics=True`:
+          - average loss per structure
+          - MAE and MAE per atom (for energies, forces, and stress)
+          - MSE and MSE per atom (for energies, forces, and stress)
+
+        **Important note 1:** we provide MSE instead of RMSE, because MSE and MAE
+        metrics allow for downstream reweighting by number of real graphs per batch
+        to obtain the correct metrics over the whole dataset. This reweighting
+        is necessary as not every batch has the same number of real
+        (not dummy) graphs and is therefore done as part of the training loop.
+        Feel free to take the square root of the final MSE metric before logging it.
+        The default loggers provided with this library also report RMSE instead of MSE
+        during training.
+
+        **Important note 2:** we use per-component errors for forces instead of
+        computing force error vectors per atom and then computing their norm.
+
+        Args:
+            prediction: The force field predictor's outputs.
+            ref_graph: The reference graph holding the ground truth data.
+            epoch: The epoch number.
+            eval_metrics: Switch deciding whether to include additional
+                          evaluation metrics to the returned dictionary.
+                          Default is `False`.
+
+        Returns:
+            The loss and the auxiliary metrics dictionary.
+        """
         # Get weights
         energy_weight = self.energy_weight_schedule(epoch)
         forces_weight = self.forces_weight_schedule(epoch)
 
@@ -52,52 +52,16 @@ def compute_mae_stress(delta: jnp.ndarray, mask) -> float:
     return _masked_mean_stress(jnp.abs(delta), mask)
 
 
-def compute_rel_mae(delta: jnp.ndarray, target_val: jnp.ndarray, mask) -> float:
-    target_norm = _masked_mean(jnp.abs(target_val), mask)
-    return _masked_mean(jnp.abs(delta), mask) / (target_norm + 1e-30)
+def compute_mse(delta: jnp.ndarray, mask) -> float:
+    return _masked_mean(jnp.square(delta), mask)
 
 
-def compute_rel_mae_f(delta: jnp.ndarray, target_val: jnp.ndarray, mask) -> float:
-    target_norm = _masked_mean_f(jnp.abs(target_val), mask)
-    return _masked_mean_f(jnp.abs(delta), mask) / (target_norm + 1e-30)
+def compute_mse_f(delta: jnp.ndarray, mask) -> float:
+    return _masked_mean_f(jnp.square(delta), mask)
 
 
-def compute_rel_mae_stress(delta: jnp.ndarray, target_val: jnp.ndarray, mask) -> float:
-    target_norm = _masked_mean_stress(jnp.abs(target_val), mask)
-    return _masked_mean_stress(jnp.abs(delta), mask) / (target_norm + 1e-30)
-
-
-def compute_rmse(delta: jnp.ndarray, mask) -> float:
-    return jnp.sqrt(_masked_mean(jnp.square(delta), mask))
-
-
-def compute_rmse_f(delta: jnp.ndarray, mask) -> float:
-    return jnp.sqrt(_masked_mean_f(jnp.square(delta), mask))
-
-
-def compute_rmse_stress(delta: jnp.ndarray, mask) -> float:
-    return jnp.sqrt(_masked_mean_stress(jnp.square(delta), mask))
-
-
-def compute_rel_rmse(delta: jnp.ndarray, target_val: jnp.ndarray, mask) -> float:
-    target_norm = jnp.sqrt(_masked_mean(jnp.square(target_val), mask))
-    return jnp.sqrt(_masked_mean(jnp.square(delta), mask)) / (target_norm + 1e-30)
-
-
-def compute_rel_rmse_f(delta: jnp.ndarray, target_val: jnp.ndarray, mask) -> float:
-    target_norm = jnp.sqrt(_masked_mean_f(jnp.square(target_val), mask))
-    return jnp.sqrt(_masked_mean_f(jnp.square(delta), mask)) / (target_norm + 1e-30)
-
-
-def compute_rel_rmse_stress(delta: jnp.ndarray, target_val: jnp.ndarray, mask) -> float:
-    target_norm = jnp.sqrt(_masked_mean_stress(jnp.square(target_val), mask))
-    return jnp.sqrt(_masked_mean_stress(jnp.square(delta), mask)) / (
-        target_norm + 1e-30
-    )
-
-
-def compute_q95(delta: jnp.ndarray) -> float:
-    return jnp.percentile(jnp.abs(delta), q=95)
+def compute_mse_stress(delta: jnp.ndarray, mask) -> float:
+    return _masked_mean_stress(jnp.square(delta), mask)
 
 
 def _sum_nodes_of_the_same_graph(
@@ -295,120 +259,66 @@ def compute_eval_metrics(
         stress_per_atom_list.append(ref_graph.globals.stress / jnp.sum(node_mask))
 
     metrics = {
-        "mae_e": None,
-        "rel_mae_e": None,
-        "mae_e_per_atom": None,
-        "rel_mae_e_per_atom": None,
-        "rmse_e": None,
-        "rel_rmse_e": None,
-        "rmse_e_per_atom": None,
-        "rel_rmse_e_per_atom": None,
-        "q95_e": None,
-        "mae_f": None,
-        "rel_mae_f": None,
-        "rmse_f": None,
-        "rel_rmse_f": None,
-        "q95_f": None,
-        "mae_stress": None,
-        "rel_mae_stress": None,
-        "mae_stress_per_atom": None,
-        "rel_mae_stress_per_atom": None,
-        "rmse_stress": None,
-        "rel_rmse_stress": None,
-        "rmse_stress_per_atom": None,
-        "rel_rmse_stress_per_atom": None,
-        "q95_stress": None,
+        "mae_e": jnp.nan,
+        "mae_e_per_atom": jnp.nan,
+        "mse_e": jnp.nan,
+        "mse_e_per_atom": jnp.nan,
+        "mae_f": jnp.nan,
+        "mse_f": jnp.nan,
+        "mae_stress": jnp.nan,
+        "mae_stress_per_atom": jnp.nan,
+        "mse_stress": jnp.nan,
+        "mse_stress_per_atom": jnp.nan,
     }
 
     if len(delta_es_list) > 0:
         delta_es = jnp.concatenate(delta_es_list, axis=0)
         delta_es_per_atom = jnp.concatenate(delta_es_per_atom_list, axis=0)
-        es = jnp.concatenate(es_list, axis=0)
-        es_per_atom = jnp.concatenate(es_per_atom_list, axis=0)
 
         metrics.update(
             {
                 # Mean absolute error
                 "mae_e": compute_mae(delta_es, graph_mask),
-                # Root-mean-square error
-                "rmse_e": compute_rmse(delta_es, graph_mask),
+                # Mean-square error
+                "mse_e": compute_mse(delta_es, graph_mask),
             }
         )
         if extended_metrics:
             metrics.update(
                 {
                     # Mean absolute error
-                    "rel_mae_e": compute_rel_mae(delta_es, es, graph_mask),
                     "mae_e_per_atom": compute_mae(delta_es_per_atom, graph_mask),
-                    "rel_mae_e_per_atom": compute_rel_mae(
-                        delta_es_per_atom, es_per_atom, graph_mask
-                    ),
-                    # Root-mean-square error
-                    "rel_rmse_e": compute_rel_rmse(delta_es, es, graph_mask),
-                    "rmse_e_per_atom": compute_rmse(delta_es_per_atom, graph_mask),
-                    "rel_rmse_e_per_atom": compute_rel_rmse(
-                        delta_es_per_atom, es_per_atom, graph_mask
-                    ),
-                    # Q_95
-                    "q95_e": compute_q95(delta_es),
+                    # Mean-square error
+                    "mse_e_per_atom": compute_mse(delta_es_per_atom, graph_mask),
                 }
             )
 
     if len(delta_fs_list) > 0:
         delta_fs = jnp.concatenate(delta_fs_list, axis=0)
-        fs = jnp.concatenate(fs_list, axis=0)
-
         metrics.update(
             {
                 # Mean absolute error
                 "mae_f": compute_mae_f(delta_fs, node_mask),
-                # Root-mean-square error
-                "rmse_f": compute_rmse_f(delta_fs, node_mask),
+                # Mean-square error
+                "mse_f": compute_mse_f(delta_fs, node_mask),
             }
         )
-        if extended_metrics:
-            metrics.update(
-                {
-                    # Mean absolute error
-                    "rel_mae_f": compute_rel_mae_f(delta_fs, fs, node_mask),
-                    # Root-mean-square error
-                    "rel_rmse_f": compute_rel_rmse_f(delta_fs, fs, node_mask),
-                    # Q_95
-                    "q95_f": compute_q95(delta_fs),
-                }
-            )
 
     if len(delta_stress_list) > 0 and extended_metrics:
         delta_stress = jnp.concatenate(delta_stress_list, axis=0)
         delta_stress_per_atom = jnp.concatenate(delta_stress_per_atom_list, axis=0)
-        stress = jnp.concatenate(stress_list, axis=0)
-        stress_per_atom = jnp.concatenate(stress_per_atom_list, axis=0)
         metrics.update(
             {
                 # Mean absolute error
                 "mae_stress": compute_mae_stress(delta_stress, graph_mask),
-                "rel_mae_stress": compute_rel_mae_stress(
-                    delta_stress, stress, graph_mask
-                ),
                 "mae_stress_per_atom": compute_mae_stress(
                     delta_stress_per_atom, graph_mask
                 ),
-                "rel_mae_stress_per_atom": compute_rel_mae_stress(
-                    delta_stress_per_atom, stress_per_atom, graph_mask
-                ),
-                # Root-mean-square error
-                "rmse_stress": compute_rmse_stress(delta_stress, graph_mask),
-                "rel_rmse_stress": compute_rel_rmse_stress(
-                    delta_stress, stress, graph_mask
-                ),
-                "rmse_stress_per_atom": compute_rmse_stress(
+                # Mean-square error
+                "mse_stress": compute_mse_stress(delta_stress, graph_mask),
+                "mse_stress_per_atom": compute_mse_stress(
                     delta_stress_per_atom, graph_mask
                 ),
-                "rel_rmse_stress_per_atom": compute_rel_rmse_stress(
-                    delta_stress_per_atom, stress_per_atom, graph_mask
-                ),
-                # Q_95
-                "q95_stress": compute_q95(delta_stress),
             }
         )
 
 
@@ -18,6 +18,10 @@
 )
 from mlip.training.optimizer_config import OptimizerConfig
 from mlip.training.training_io_handler import TrainingIOHandler, TrainingIOHandlerConfig
-from mlip.training.training_loggers import log_metrics_to_line, log_metrics_to_table
+from mlip.training.training_loggers import (
+    convert_mse_to_rmse_in_logs,
+    log_metrics_to_line,
+    log_metrics_to_table,
+)
 from mlip.training.training_loop import TrainingLoop
 from mlip.training.training_loop_config import TrainingLoopConfig