Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions docs/source/user_guide/command_line.rst
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,38 @@ running an additional 1000 steps, and appending the statistics and trajectory fi

For all options, run ``janus md --help``.

Post-processing
+++++++++++++++

After MD, velocity autocorrelation functions (VAFs) and radial distribution functions (RDFs) may be calculated as a post-processing step. It is possible to build partial VAFs and RDFs from arbitrary sequences of atom indices or species names.

To compute the VAF across all atoms from the command line the following options could be passed:

.. code-block:: bash

janus md --ensemble nve --struct tests/data/NaCl.cif --steps 1000 --traj-every 10 --post-process-kwargs "{'vaf_compute': True, 'vaf_start': 10, 'vaf_stop': None, 'vaf_step': 2}"

In this case a file ``NaCl-nve-T300.0-vaf.dat`` would be created containing correlation values and lag times. The file will contain 45 correlation values with an implied time step of 20 fs. That is starting at time step 100 and correlating every 20 steps.

The VAF is computed from the trajectory file, so ``--traj-every`` controls the possible temporal resolution of the VAF. Additionally ``'vaf_start'``, ``'vaf_stop'``, and ``'vaf_step'`` in ``-post-process-kwargs`` control which trajectory frames are used to calculate the VAF.

To compute partial VAFs of Na and Cl the following kwargs may be passed:

.. code-block:: bash

janus md --ensemble nve --struct tests/data/NaCl.cif --steps 100 --traj-every 10 --post-process-kwargs "{'vaf_compute': True, 'vaf_atoms': (('Na',), ('Cl',)), 'vaf_output_files': ('vaf_na.dat', 'vaf_cl.dat')}"

Where ``'vaf_atoms'`` is a ``Sequence`` of ``Sequence`` of element names (or atom indices) included in each VAF. The output files must also be specified in the case of multiple VAFs, by default defined relative to the working directory.

Computing the RDF is similar, for example:

.. code-block:: bash

janus md --ensemble nve --struct tests/data/NaCl.cif --steps 100 --traj-every 10 --post-process-kwargs "{'rdf_compute': True, 'rdf_rmax': 2.0, 'rdf_elements': ('Na', 'Cl'), 'rdf_by_elements': True}"

will compute the RDFs for ``Na`` and ``Cl`` atoms. Seperately for each paring, up to a maximum cutoff distance of ``2.0`` Angstroms. These will be written to three seperate files ``NaCl-nve-T300.0-Na_Na-rdf.dat``, ``NaCl-nve-T300.0-Cl_Cl-rdf.dat``, and ``NaCl-nve-T300.0-Na_Cl-rdf.dat``. Setting ``'rdf_by_elements': False`` will generate one RDF of all atoms saved to ``NaCl-nve-T300.0-rdf.dat``.

The RDF is also computed from the trajectory file and the options ``rdf_start``, ``rdf_stop``, and ``rdf_step`` may be used to control which trajectory frames are utilised.

Heating
-------
Expand Down
16 changes: 11 additions & 5 deletions janus_core/calculations/md.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

from collections.abc import Sequence
import datetime
from functools import partial
from itertools import combinations_with_replacement
Expand Down Expand Up @@ -979,13 +980,18 @@ def _post_process(self) -> None:
compute_rdf(data, ana, filenames=out_paths, **rdf_args)

if self.post_process_kwargs.get("vaf_compute", False):
file_name = self.post_process_kwargs.get("vaf_output_file", None)
file_names = self.post_process_kwargs.get("vaf_output_files", None)
use_vel = self.post_process_kwargs.get("vaf_velocities", False)
fft = self.post_process_kwargs.get("vaf_fft", False)

out_path = self._build_filename(
"vaf.dat", self.param_prefix, filename=file_name
if not isinstance(file_names, Sequence):
file_names = (file_names,)

out_paths = tuple(
self._build_filename("vaf.dat", self.param_prefix, filename=file_name)
for file_name in file_names
)

slice_ = (
self.post_process_kwargs.get("vaf_start", 0),
self.post_process_kwargs.get("vaf_stop", None),
Expand All @@ -994,11 +1000,11 @@ def _post_process(self) -> None:

compute_vaf(
data,
out_path,
out_paths,
use_velocities=use_vel,
fft=fft,
index=slice_,
filter_atoms=self.post_process_kwargs.get("vaf_atoms", None),
atoms_filter=self.post_process_kwargs.get("vaf_atoms", None),
)

def _write_restart(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion janus_core/helpers/janus_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class PostProcessKwargs(TypedDict, total=False):
vaf_start: int
vaf_stop: int | None
vaf_step: int
vaf_output_file: PathLike | None
vaf_output_files: Sequence[PathLike] | None


class CorrelationKwargs(TypedDict, total=True):
Expand Down
62 changes: 41 additions & 21 deletions janus_core/processing/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def compute_vaf(
use_velocities: bool = False,
fft: bool = False,
index: SliceLike = (0, None, 1),
filter_atoms: MaybeSequence[MaybeSequence[int | None]] = ((None),),
atoms_filter: MaybeSequence[MaybeSequence[int | str | None]] = ((None,),),
time_step: float = 1.0,
) -> tuple[NDArray[float64], list[NDArray[float64]]]:
"""
Expand All @@ -186,7 +186,7 @@ def compute_vaf(
index
Images to analyze as `start`, `stop`, `step`.
Default is all images.
filter_atoms
atoms_filter
Compute the VAF averaged over subsets of the system.
Default is all atoms.
time_step
Expand All @@ -202,37 +202,38 @@ def compute_vaf(

Notes
-----
`filter_atoms` is given as a series of sequences of atoms, where
each element in the series denotes a VAF subset to calculate and
each sequence determines the atoms (by index) to be included in that VAF.
`atoms_filter` is given as a series of sequences of atoms or elements,
where each value in the series denotes a VAF subset to calculate and
each sequence determines the atoms (by index or element)
to be included in that VAF.

E.g.

.. code-block: Python

# Species indices in cell
na = (1, 3, 5, 7)
cl = (2, 4, 6, 8)
# Species by name
cl = ('Cl')

compute_vaf(..., filter_atoms=(na, cl))
compute_vaf(..., atoms_filter=(na, cl))

Would compute separate VAFs for each species.

By default, one VAF will be computed for all atoms in the structure.
"""
# Ensure if passed scalars they are turned into correct dimensionality
if not isinstance(filter_atoms, Sequence):
filter_atoms = (filter_atoms,)
if not isinstance(filter_atoms[0], Sequence):
filter_atoms = (filter_atoms,)

if isinstance(atoms_filter, str) or not isinstance(atoms_filter, Sequence):
atoms_filter = (atoms_filter,)
if isinstance(atoms_filter[0], str) or not isinstance(atoms_filter[0], Sequence):
atoms_filter = (atoms_filter,)
if filenames and not isinstance(filenames, Sequence):
filenames = (filenames,)

if len(filenames) != len(filter_atoms):
if len(filenames) != len(atoms_filter):
raise ValueError(
f"Different number of file names ({len(filenames)}) "
f"to number of samples ({len(filter_atoms)})"
f"to number of samples ({len(atoms_filter)})"
)

# Extract requested data
Expand All @@ -247,12 +248,32 @@ def compute_vaf(
n_steps = len(momenta)
n_atoms = len(momenta[0])

# If filter_atoms not specified use all atoms
filter_atoms = [
atom if atom[0] is not None else range(n_atoms) for atom in filter_atoms
]
filtered_atoms = []
atom_symbols = data[0].get_chemical_symbols()
symbols = set(atom_symbols)
for atoms in atoms_filter:
if any(atom is None for atom in atoms):
# If atoms_filter not specified use all atoms.
filtered_atoms.append(range(n_atoms))
elif all(isinstance(a, str) for a in atoms):
# If all symbols, get the matching indices.
atoms = set(atoms)
if atoms.difference(symbols):
raise ValueError(
f"{atoms.difference(symbols)} not allowed in VAF"
f", allowed symbols are {symbols}"
)
filtered_atoms.append(
[i for i in range(len(atom_symbols)) if atom_symbols[i] in list(atoms)]
)
elif all(isinstance(a, int) for a in atoms):
filtered_atoms.append(atoms)
else:
raise ValueError(
"Cannot mix element symbols and indices in vaf atoms_filter"
)

used_atoms = {atom for atoms in filter_atoms for atom in atoms}
used_atoms = {atom for atoms in filtered_atoms for atom in atoms}
used_atoms = {j: i for i, j in enumerate(used_atoms)}

vafs = np.sum(
Expand Down Expand Up @@ -282,10 +303,9 @@ def compute_vaf(
lags,
[
np.average([vafs[used_atoms[i]] for i in atoms], axis=0)
for atoms in filter_atoms
for atoms in filtered_atoms
],
)

if filenames:
for vaf, filename in zip(vafs[1], filenames, strict=True):
with open(filename, "w", encoding="utf-8") as out_file:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_correlator.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def test_vaf(tmp_path):

traj = read(traj_path, index=":")
vaf_post = post_process.compute_vaf(
traj, use_velocities=True, filter_atoms=(na, cl)
traj, use_velocities=True, atoms_filter=(na, cl)
)
with open(cor_path) as cor:
vaf = safe_load(cor)
Expand Down
38 changes: 34 additions & 4 deletions tests/test_post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from ase.io import read
import numpy as np
import pytest
from pytest import approx
from typer.testing import CliRunner

Expand Down Expand Up @@ -72,8 +73,8 @@ def test_md_pp_cli(tmp_path):
log_path = tmp_path / "test.log"
summary_path = tmp_path / "summary.yml"
rdf_path = tmp_path / "nve-T300-rdf.dat"
vaf_path = tmp_path / "nve-T300-vaf.dat"

vaf_na_path = Path("vaf_na.dat")
vaf_cl_path = Path("vaf_cl.dat")
result = runner.invoke(
app,
[
Expand All @@ -94,6 +95,8 @@ def test_md_pp_cli(tmp_path):
summary_path,
"--post-process-kwargs",
"""{'vaf_compute': True,
'vaf_atoms': (('Na',),('Cl',)),
'vaf_output_files': ('vaf_na.dat', 'vaf_cl.dat'),
'rdf_compute': True,
'rdf_rmax': 2.5}""",
],
Expand All @@ -108,7 +111,8 @@ def test_md_pp_cli(tmp_path):
# Cell too small to really compute RDF
assert np.all(rdf[:, 1] == 0)

assert vaf_path.exists()
assert vaf_na_path.exists()
assert vaf_cl_path.exists()


def test_rdf():
Expand Down Expand Up @@ -196,7 +200,7 @@ def test_vaf(tmp_path):
assert isinstance(vaf[0], np.ndarray)

lags, vaf = post_process.compute_vaf(
data, filter_atoms=vaf_filter, filenames=[tmp_path / name for name in vaf_names]
data, atoms_filter=vaf_filter, filenames=[tmp_path / name for name in vaf_names]
)

assert isinstance(vaf, list)
Expand All @@ -212,3 +216,29 @@ def test_vaf(tmp_path):
assert vaf[i] == approx(expected, rel=1e-9)
assert lags == approx(w_lag, rel=1e-9)
assert vaf[i] == approx(w_vaf, rel=1e-9)


def test_vaf_by_symbols(tmp_path):
"""Test vaf using element symbols."""
vaf_names = ("vaf-Na-by-indices.dat", "vaf-Na-by-element.dat")
vaf_filter = ((0, 2, 4, 6), ("Na",))

data = read(DATA_PATH / "NaCl-traj.xyz", index=":")
lags, vaf = post_process.compute_vaf(
data, atoms_filter=vaf_filter, filenames=[tmp_path / name for name in vaf_names]
)
expected = np.loadtxt(tmp_path / "vaf-Na-by-indices.dat")
actual = np.loadtxt(tmp_path / "vaf-Na-by-element.dat")

assert expected == approx(actual, rel=1e-9)


def test_vaf_invalid_symbols(tmp_path):
"""Test vaf using invalid element symbols."""
data = read(DATA_PATH / "NaCl-traj.xyz", index=":")
with pytest.raises(ValueError):
# "C" is not an atom in data.
post_process.compute_vaf(data, atoms_filter=(("C",)))
with pytest.raises(ValueError):
# str is also iterable.
post_process.compute_vaf(data, atoms_filter="NOT AN ATOM")