stfc · ElliottKasoar · Feb 12, 2025 · Feb 4, 2025 · Feb 4, 2025 · Feb 6, 2025
diff --git a/docs/source/user_guide/command_line.rst b/docs/source/user_guide/command_line.rst
@@ -272,6 +272,38 @@ running an additional 1000 steps, and appending the statistics and trajectory fi
 
 For all options, run ``janus md --help``.
 
+Post-processing
++++++++++++++++
+
+After MD, velocity autocorrelation functions (VAFs) and radial distribution functions (RDFs) may be calculated as a post-processing step. It is possible to build partial VAFs and RDFs from arbitrary sequences of atom indices or species names.
+
+To compute the VAF across all atoms from the command line the following options could be passed:
+
+.. code-block:: bash
+
+   janus md --ensemble nve --struct tests/data/NaCl.cif --steps 1000 --traj-every 10 --post-process-kwargs "{'vaf_compute': True, 'vaf_start': 10, 'vaf_stop': None, 'vaf_step': 2}"
+
+In this case a file ``NaCl-nve-T300.0-vaf.dat`` would be created containing correlation values and lag times. The file will contain 45 correlation values with an implied time step of 20 fs. That is starting at time step 100 and correlating every 20 steps.
+
+The VAF is computed from the trajectory file, so ``--traj-every`` controls the possible temporal resolution of the VAF. Additionally ``'vaf_start'``, ``'vaf_stop'``, and ``'vaf_step'`` in ``-post-process-kwargs`` control which trajectory frames are used to calculate the VAF.
+
+To compute partial VAFs of Na and Cl the following kwargs may be passed:
+
+.. code-block:: bash
+
+   janus md --ensemble nve --struct tests/data/NaCl.cif --steps 100 --traj-every 10 --post-process-kwargs "{'vaf_compute': True, 'vaf_atoms': (('Na',), ('Cl',)), 'vaf_output_files': ('vaf_na.dat', 'vaf_cl.dat')}"
+
+Where ``'vaf_atoms'`` is a ``Sequence`` of ``Sequence`` of element names (or atom indices) included in each VAF. The output files must also be specified in the case of multiple VAFs, by default defined relative to the working directory.
+
+Computing the RDF is similar, for example:
+
+.. code-block:: bash
+
+   janus md --ensemble nve --struct tests/data/NaCl.cif --steps 100 --traj-every 10 --post-process-kwargs "{'rdf_compute': True, 'rdf_rmax': 2.0, 'rdf_elements': ('Na', 'Cl'), 'rdf_by_elements': True}"
+
+will compute the RDFs for ``Na`` and ``Cl`` atoms. Seperately for each paring, up to a maximum cutoff distance of ``2.0`` Angstroms. These will be written to three seperate files ``NaCl-nve-T300.0-Na_Na-rdf.dat``, ``NaCl-nve-T300.0-Cl_Cl-rdf.dat``, and ``NaCl-nve-T300.0-Na_Cl-rdf.dat``. Setting ``'rdf_by_elements': False`` will generate one RDF of all atoms saved to ``NaCl-nve-T300.0-rdf.dat``.
+
+The RDF is also computed from the trajectory file and the options ``rdf_start``, ``rdf_stop``, and ``rdf_step`` may be used to control which trajectory frames are utilised.
 
 Heating
 -------

diff --git a/janus_core/calculations/md.py b/janus_core/calculations/md.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from collections.abc import Sequence
 import datetime
 from functools import partial
 from itertools import combinations_with_replacement
@@ -979,13 +980,18 @@ def _post_process(self) -> None:
             compute_rdf(data, ana, filenames=out_paths, **rdf_args)
 
         if self.post_process_kwargs.get("vaf_compute", False):
-            file_name = self.post_process_kwargs.get("vaf_output_file", None)
+            file_names = self.post_process_kwargs.get("vaf_output_files", None)
             use_vel = self.post_process_kwargs.get("vaf_velocities", False)
             fft = self.post_process_kwargs.get("vaf_fft", False)
 
-            out_path = self._build_filename(
-                "vaf.dat", self.param_prefix, filename=file_name
+            if not isinstance(file_names, Sequence):
+                file_names = (file_names,)
+
+            out_paths = tuple(
+                self._build_filename("vaf.dat", self.param_prefix, filename=file_name)
+                for file_name in file_names
             )
+
             slice_ = (
                 self.post_process_kwargs.get("vaf_start", 0),
                 self.post_process_kwargs.get("vaf_stop", None),
@@ -994,11 +1000,11 @@ def _post_process(self) -> None:
 
             compute_vaf(
                 data,
-                out_path,
+                out_paths,
                 use_velocities=use_vel,
                 fft=fft,
                 index=slice_,
-                filter_atoms=self.post_process_kwargs.get("vaf_atoms", None),
+                atoms_filter=self.post_process_kwargs.get("vaf_atoms", None),
             )
 
     def _write_restart(self) -> None:

diff --git a/janus_core/helpers/janus_types.py b/janus_core/helpers/janus_types.py
@@ -77,7 +77,7 @@ class PostProcessKwargs(TypedDict, total=False):
     vaf_start: int
     vaf_stop: int | None
     vaf_step: int
-    vaf_output_file: PathLike | None
+    vaf_output_files: Sequence[PathLike] | None
 
 
 class CorrelationKwargs(TypedDict, total=True):

diff --git a/janus_core/processing/post_process.py b/janus_core/processing/post_process.py
@@ -165,7 +165,7 @@ def compute_vaf(
     use_velocities: bool = False,
     fft: bool = False,
     index: SliceLike = (0, None, 1),
-    filter_atoms: MaybeSequence[MaybeSequence[int | None]] = ((None),),
+    atoms_filter: MaybeSequence[MaybeSequence[int | str | None]] = ((None,),),
     time_step: float = 1.0,
 ) -> tuple[NDArray[float64], list[NDArray[float64]]]:
     """
@@ -186,7 +186,7 @@ def compute_vaf(
     index
         Images to analyze as `start`, `stop`, `step`.
         Default is all images.
-    filter_atoms
+    atoms_filter
         Compute the VAF averaged over subsets of the system.
         Default is all atoms.
     time_step
@@ -202,37 +202,38 @@ def compute_vaf(
 
     Notes
     -----
-    `filter_atoms` is given as a series of sequences of atoms, where
-    each element in the series denotes a VAF subset to calculate and
-    each sequence determines the atoms (by index) to be included in that VAF.
+    `atoms_filter` is given as a series of sequences of atoms or elements,
+    where each value in the series denotes a VAF subset to calculate and
+    each sequence determines the atoms (by index or element)
+    to be included in that VAF.
 
     E.g.
 
     .. code-block: Python
 
         # Species indices in cell
         na = (1, 3, 5, 7)
-        cl = (2, 4, 6, 8)
+        # Species by name
+        cl = ('Cl')
 
-        compute_vaf(..., filter_atoms=(na, cl))
+        compute_vaf(..., atoms_filter=(na, cl))
 
     Would compute separate VAFs for each species.
 
     By default, one VAF will be computed for all atoms in the structure.
     """
     # Ensure if passed scalars they are turned into correct dimensionality
-    if not isinstance(filter_atoms, Sequence):
-        filter_atoms = (filter_atoms,)
-    if not isinstance(filter_atoms[0], Sequence):
-        filter_atoms = (filter_atoms,)
-
+    if isinstance(atoms_filter, str) or not isinstance(atoms_filter, Sequence):
+        atoms_filter = (atoms_filter,)
+    if isinstance(atoms_filter[0], str) or not isinstance(atoms_filter[0], Sequence):
+        atoms_filter = (atoms_filter,)
     if filenames and not isinstance(filenames, Sequence):
         filenames = (filenames,)
 
-        if len(filenames) != len(filter_atoms):
+        if len(filenames) != len(atoms_filter):
             raise ValueError(
                 f"Different number of file names ({len(filenames)}) "
-                f"to number of samples ({len(filter_atoms)})"
+                f"to number of samples ({len(atoms_filter)})"
             )
 
     # Extract requested data
@@ -247,12 +248,32 @@ def compute_vaf(
     n_steps = len(momenta)
     n_atoms = len(momenta[0])
 
-    # If filter_atoms not specified use all atoms
-    filter_atoms = [
-        atom if atom[0] is not None else range(n_atoms) for atom in filter_atoms
-    ]
+    filtered_atoms = []
+    atom_symbols = data[0].get_chemical_symbols()
+    symbols = set(atom_symbols)
+    for atoms in atoms_filter:
+        if any(atom is None for atom in atoms):
+            # If atoms_filter not specified use all atoms.
+            filtered_atoms.append(range(n_atoms))
+        elif all(isinstance(a, str) for a in atoms):
+            # If all symbols, get the matching indices.
+            atoms = set(atoms)
+            if atoms.difference(symbols):
+                raise ValueError(
+                    f"{atoms.difference(symbols)} not allowed in VAF"
+                    f", allowed symbols are {symbols}"
+                )
+            filtered_atoms.append(
+                [i for i in range(len(atom_symbols)) if atom_symbols[i] in list(atoms)]
+            )
+        elif all(isinstance(a, int) for a in atoms):
+            filtered_atoms.append(atoms)
+        else:
+            raise ValueError(
+                "Cannot mix element symbols and indices in vaf atoms_filter"
+            )
 
-    used_atoms = {atom for atoms in filter_atoms for atom in atoms}
+    used_atoms = {atom for atoms in filtered_atoms for atom in atoms}
     used_atoms = {j: i for i, j in enumerate(used_atoms)}
 
     vafs = np.sum(
@@ -282,10 +303,9 @@ def compute_vaf(
         lags,
         [
             np.average([vafs[used_atoms[i]] for i in atoms], axis=0)
-            for atoms in filter_atoms
+            for atoms in filtered_atoms
         ],
     )
-
     if filenames:
         for vaf, filename in zip(vafs[1], filenames, strict=True):
             with open(filename, "w", encoding="utf-8") as out_file:

diff --git a/tests/test_correlator.py b/tests/test_correlator.py
@@ -124,7 +124,7 @@ def test_vaf(tmp_path):
 
     traj = read(traj_path, index=":")
     vaf_post = post_process.compute_vaf(
-        traj, use_velocities=True, filter_atoms=(na, cl)
+        traj, use_velocities=True, atoms_filter=(na, cl)
     )
     with open(cor_path) as cor:
         vaf = safe_load(cor)

diff --git a/tests/test_post_process.py b/tests/test_post_process.py
@@ -6,6 +6,7 @@
 
 from ase.io import read
 import numpy as np
+import pytest
 from pytest import approx
 from typer.testing import CliRunner
 
@@ -72,8 +73,8 @@ def test_md_pp_cli(tmp_path):
     log_path = tmp_path / "test.log"
     summary_path = tmp_path / "summary.yml"
     rdf_path = tmp_path / "nve-T300-rdf.dat"
-    vaf_path = tmp_path / "nve-T300-vaf.dat"
-
+    vaf_na_path = Path("vaf_na.dat")
+    vaf_cl_path = Path("vaf_cl.dat")
     result = runner.invoke(
         app,
         [
@@ -94,6 +95,8 @@ def test_md_pp_cli(tmp_path):
             summary_path,
             "--post-process-kwargs",
             """{'vaf_compute': True,
+              'vaf_atoms': (('Na',),('Cl',)),
+              'vaf_output_files': ('vaf_na.dat', 'vaf_cl.dat'),
               'rdf_compute': True,
               'rdf_rmax': 2.5}""",
         ],
@@ -108,7 +111,8 @@ def test_md_pp_cli(tmp_path):
     # Cell too small to really compute RDF
     assert np.all(rdf[:, 1] == 0)
 
-    assert vaf_path.exists()
+    assert vaf_na_path.exists()
+    assert vaf_cl_path.exists()
 
 
 def test_rdf():
@@ -196,7 +200,7 @@ def test_vaf(tmp_path):
     assert isinstance(vaf[0], np.ndarray)
 
     lags, vaf = post_process.compute_vaf(
-        data, filter_atoms=vaf_filter, filenames=[tmp_path / name for name in vaf_names]
+        data, atoms_filter=vaf_filter, filenames=[tmp_path / name for name in vaf_names]
     )
 
     assert isinstance(vaf, list)
@@ -212,3 +216,29 @@ def test_vaf(tmp_path):
         assert vaf[i] == approx(expected, rel=1e-9)
         assert lags == approx(w_lag, rel=1e-9)
         assert vaf[i] == approx(w_vaf, rel=1e-9)
+
+
+def test_vaf_by_symbols(tmp_path):
+    """Test vaf using element symbols."""
+    vaf_names = ("vaf-Na-by-indices.dat", "vaf-Na-by-element.dat")
+    vaf_filter = ((0, 2, 4, 6), ("Na",))
+
+    data = read(DATA_PATH / "NaCl-traj.xyz", index=":")
+    lags, vaf = post_process.compute_vaf(
+        data, atoms_filter=vaf_filter, filenames=[tmp_path / name for name in vaf_names]
+    )
+    expected = np.loadtxt(tmp_path / "vaf-Na-by-indices.dat")
+    actual = np.loadtxt(tmp_path / "vaf-Na-by-element.dat")
+
+    assert expected == approx(actual, rel=1e-9)
+
+
+def test_vaf_invalid_symbols(tmp_path):
+    """Test vaf using invalid element symbols."""
+    data = read(DATA_PATH / "NaCl-traj.xyz", index=":")
+    with pytest.raises(ValueError):
+        # "C" is not an atom in data.
+        post_process.compute_vaf(data, atoms_filter=(("C",)))
+    with pytest.raises(ValueError):
+        # str is also iterable.
+        post_process.compute_vaf(data, atoms_filter="NOT AN ATOM")