aiidateam · tsthakur · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/src/aiida_quantumespresso/calculations/functions/md/__init__.py b/src/aiida_quantumespresso/calculations/functions/md/__init__.py
diff --git a/src/aiida_quantumespresso/calculations/functions/md/concatenate_trajectory.py b/src/aiida_quantumespresso/calculations/functions/md/concatenate_trajectory.py
@@ -0,0 +1,48 @@
+from aiida import orm
+from aiida.engine import calcfunction
+import numpy as np
+
+SINGULAR_TRAJ_KEYS = ('symbols', 'atomic_species_name')
+
+
+@calcfunction
+def concatenate_trajectory(**kwargs):
+    remove_repeated_last_step = kwargs.pop('remove_repeated_last_step')
+    for k, v in kwargs.items():
+        if not isinstance(v, orm.TrajectoryData):
+            raise Exception('All my inputs have to be instances of TrajectoryData')
+    sorted_trajectories = list(zip(*sorted(kwargs.items())))[1]
+    # Assumming they store the same arrays
+    arraynames = sorted_trajectories[0].get_arraynames()
+    traj = orm.TrajectoryData()
+    for arrname in arraynames:
+        if arrname in SINGULAR_TRAJ_KEYS:
+            traj.set_array(arrname, sorted_trajectories[0].get_array(arrname))
+        else:
+            # concatenate arrays
+            if len(sorted_trajectories) > 1:
+                if remove_repeated_last_step:
+                    # remove last step that is repeated when restarting, keep the very last
+                    # typically used for pinball binary
+                    traj.set_array(
+                        arrname,
+                        np.concatenate(
+                            [
+                                np.concatenate([t.get_array(arrname)[:-1] for t in sorted_trajectories[:-1]]),
+                                sorted_trajectories[-1].get_array(arrname),
+                            ]
+                        ),
+                    )
+                else:
+                    # just concatenate, typically used for vanilla QE
+                    traj.set_array(arrname, np.concatenate([t.get_array(arrname) for t in sorted_trajectories]))
+            else:
+                traj.set_array(arrname, np.concatenate([t.get_array(arrname) for t in sorted_trajectories]))
+    [traj.set_attribute(k, v) for k, v in sorted_trajectories[0].attributes_items() if not k.startswith('array|')]
+    # TODO: if the timestep is different, then stride the denser trajectory so that each step is at the same
+    # level, and they also both must have the same strides
+    if 'timestep_in_fs' in sorted_trajectories[0].attributes:
+        traj.set_attribute(
+            'sim_time_fs', traj.get_array('steps').size * sorted_trajectories[0].get_attribute('timestep_in_fs')
+        )
+    return {'concatenated_trajectory': traj}
diff --git a/src/aiida_quantumespresso/calculations/functions/md/get_last_step_from_trajectory.py b/src/aiida_quantumespresso/calculations/functions/md/get_last_step_from_trajectory.py
@@ -0,0 +1,27 @@
+from aiida import orm
+from aiida.engine import calcfunction
+import numpy as np
+
+
+@calcfunction
+def get_last_step_from_trajectory(trajectory):
+    """The last step of a trajectory is extracted and used as a thermalised trajectory
+    from which a subsequent calculation can be performed. This is useful to separate
+    the equilibration phase from the production phase in an MD simulation.
+    """
+
+    if not isinstance(trajectory, orm.TrajectoryData):
+        raise Exception('All my inputs have to be instances of TrajectoryData')
+    traj = orm.TrajectoryData()
+    for arrname in trajectory.get_arraynames():
+        if arrname in ('symbols', 'atomic_species_name'):
+            traj.set_array(arrname, trajectory.get_array(arrname))
+        elif arrname in ('steps', 'times', 'walltimes'):
+            traj.set_array(arrname, np.array([trajectory.get_array(arrname)[0]]))
+        else:
+            traj.set_array(arrname, np.array([trajectory.get_array(arrname)[-1]]))
+
+    [traj.set_attribute(k, v) for k, v in trajectory.attributes_items() if not k.startswith('array|')]
+    if 'timestep_in_fs' in trajectory.attributes:
+        traj.set_attribute('sim_time_fs', traj.get_array('steps').size * trajectory.get_attribute('timestep_in_fs'))
+    return {'last_step_trajectory': traj}
diff --git a/src/aiida_quantumespresso/calculations/functions/md/get_structure_from_trajectory.py b/src/aiida_quantumespresso/calculations/functions/md/get_structure_from_trajectory.py
@@ -0,0 +1,96 @@
+from aiida import orm
+from aiida.engine import calcfunction
+from qe_tools import CONSTANTS
+import numpy as np
+
+
+@calcfunction
+def get_structure_from_trajectory(trajectory, parameters, structure=None, settings=None):
+    """
+    Get a structure from a trajectory, given the step index.
+
+    :param trajectory: A trajectory data instance.
+    :param parameters: An instance of parameter data. Needs to store the key step_index,
+        optionally also the keys:
+        * create_settings: whether to also create the settings (an instance of ParameterData) that stores the velocities.
+            Of course, that assumes that the velocities are in stored in the trajectory data.
+        * complete_missing: If the trajectory does not store all the information required to create the structure,
+            i.e. certain atoms were excluded. This will use the structure to complete these atoms.
+            An example is the optimized pinball parser, which only stores the positions/velocities of the pinballs.
+            Another example: if the cell trajectory is not found, the structure's cell will be used.
+        * recenter: When true, set the center of mass momentum to 0 (when restarting from a trajectory that doesn't preserve the center of mass.
+    :param structure: If comlete_missing is True, I need a structure
+    :param  : If create_settings is True, I can (if provided) just update the dictionary of this instance.
+    """
+    from aiida.common.exceptions import InputValidationError
+
+    step_index = parameters.dict.step_index
+    recenter = parameters.get_attribute('recenter', False)
+    create_settings = parameters.get_attribute('create_settings', False)
+    complete_missing = parameters.get_attribute('complete_missing', False)
+
+    if complete_missing and structure is None:
+        raise InputValidationError('You need to pass a structure when completing missing atoms.')
+
+    pos_units = trajectory.get_attribute('units|positions', 'angstrom')
+    atoms = trajectory.get_step_structure(step_index).get_ase()
+
+    if ('cells' not in trajectory.get_arraynames()) and complete_missing:
+        cell_units = trajectory.get_attribute('units|cells', 'angstrom')
+        if cell_units == 'angstrom':
+            atoms.set_cell(structure.cell)
+        elif cell_units == 'atomic':
+            atoms.set_cell(np.array(structure.cell) * CONSTANTS.bohr_to_ang)
+        else:
+            raise Exception("Can't deal with units of cells {}.".format(cell_units))
+
+    if pos_units == 'angstrom':
+        pass
+    elif pos_units == 'atomic':
+        for atom in atoms:
+            atom.position *= CONSTANTS.bohr_to_ang
+    else:
+        raise Exception("Can't deal with units of positions {}".format(pos_units))
+
+    if create_settings:
+        vel_units = trajectory.get_attribute('units|velocities', 'atomic')
+        velocities = trajectory.get_step_data(step_index)[-1]
+        if recenter:
+            com = np.zeros(3)
+            M = 0.0
+            # Calculate the center of mass displacement:
+            for atom, vel in zip(atoms, velocities):
+                com = com + atom.mass * vel
+                M += atom.mass
+            velocities[:, 0:3] -= com[0:3] / M
+            # check:
+            com = np.zeros(3)
+            for atom, vel in zip(atoms, velocities):
+                com = com + atom.mass * vel
+            assert abs(np.linalg.norm(com)) < 1e-12, 'COM did not disappear'
+
+        velocities = velocities.tolist()
+        if vel_units == 'atomic':
+            pass
+        else:
+            raise Exception(f"Can't deal with units of velocities {vel_units}")
+
+    if complete_missing:
+        for atom in structure.get_ase()[len(atoms) :]:
+            atoms.append(atom)
+            if create_settings:
+                velocities.append([0.0, 0.0, 0.0])
+
+    newstruc = orm.StructureData(ase=atoms)
+    newstruc.label = newstruc.get_formula(mode='count')
+    return_dict = dict(structure=newstruc)
+
+    if create_settings:
+        if settings is not None:
+            settings_d = settings.get_dict()
+        else:
+            settings_d = {}
+        settings_d['ATOMIC_VELOCITIES'] = velocities
+        return_dict['settings'] = orm.Dict(dict=settings_d)
+
+    return return_dict
diff --git a/src/aiida_quantumespresso/calculations/functions/md/md_utils.py b/src/aiida_quantumespresso/calculations/functions/md/md_utils.py
@@ -0,0 +1,58 @@
+# Utility functions for MD workchains that are not supposed to be calcfunctions
+
+from aiida import orm
+from aiida.common.links import LinkType
+
+
+def get_completed_number_of_steps(calc):
+    """Read the number of steps from the trajectory."""
+    traj = calc.outputs.output_trajectory
+    nstep = (
+        calc.inputs.parameters.get_attribute('CONTROL').get('iprint', 1) * (traj.get_attribute('array|positions')[0])
+    )
+    return nstep
+
+
+def get_total_trajectory(workchain, previous_trajectory=None, store=False, remove_repeated_last_step=False):
+    """Collect all the trajectory segment and concatenate them."""
+
+    from aiida_quantumespresso.calculations.functions.md.concatenate_trajectory import concatenate_trajectory
+
+    qb = orm.QueryBuilder()
+    qb.append(orm.WorkChainNode, filters={'uuid': workchain.uuid}, tag='replay')
+    # TODO: Are filters on the state of the calculation needed here?
+    # TODO: add project on extras.discard_trajectory, traj_d defined to skip them
+    qb.append(
+        orm.CalcJobNode,
+        with_incoming='replay',
+        edge_filters={'type': LinkType.CALL_CALC.value, 'label': {'like': 'iteration_%'}},
+        edge_project='label',
+        tag='calc',
+        edge_tag='rc',
+    )
+    qb.append(
+        orm.TrajectoryData, with_incoming='calc', edge_filters={'label': 'output_trajectory'}, project=['*'], tag='traj'
+    )
+    traj_d = {
+        item['rc']['label'].replace('iteration_', 'trajectory_'): item['traj']['*'] for item in qb.iterdict()
+    }  ## if not extras.discard_trajectory
+
+    # adding the trajectory of previous MD run, if it exists
+    if previous_trajectory:
+        traj_d.update({'trajectory_00': previous_trajectory})
+
+    # if I have produced several trajectories, I concatenate them here: (no need to sort them)
+    if len(traj_d) > 1:
+        traj_d['metadata'] = {'call_link_label': 'concatenate_trajectory', 'store_provenance': store}
+        # If I want to start the next MD from the position and velocities of the previous one,
+        # i.e. using `previous_trajectory`, I need to ensure that I do not duplicate
+        # the last step from the previous trajectory.
+        # In case of restarting from wavefunctions this must be false
+        traj_d.update({'remove_repeated_last_step': orm.Bool(remove_repeated_last_step)})
+        res = concatenate_trajectory(**traj_d)
+        return res['concatenated_trajectory']
+    elif len(traj_d) == 1:
+        # no reason to concatenate if I have only one trajectory (saves space in repository)
+        return list(traj_d.values())[0]
+    else:
+        return None
diff --git a/src/aiida_quantumespresso/parsers/parse_raw/pw.py b/src/aiida_quantumespresso/parsers/parse_raw/pw.py
@@ -531,7 +531,22 @@ def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None,
     # (cell, initial positions, kpoints, ...) and I skip them.
     # In case, parse for them before this point.
     # Put everything in a trajectory_data dictionary
-    relax_steps = stdout.split('Self-consistent Calculation')[1:]
+
+    # For MD calculations, I now skip the data calculated for the 0th step i.e. for the
+    # input structure. And nothing is calculated for the final structure, so in the trajectory
+    # I pop out the final structure from `positions` as no other data is available for this configuration
+
+    if input_parameters.get('CONTROL', {}).get('calculation', 'scf') in ['md', 'vc-md']:
+        # For MD calculation it is better to check for the following keyword
+        # Using this keyword I am dropping the forces and energy information of the initial
+        # structure, which is printed in `.out` file before the parsing starts
+        relax_steps = stdout.split('Entering Dynamics:')[1:]
+    else:
+        # For eveything else, I continue as before
+        relax_steps = stdout.split('Self-consistent Calculation')[1:]
+        # For sirius calculations only following keyword works
+        # relax_steps = stdout.split('* running SCF ground state *')[1:]
+
     relax_steps = [i.split('\n') for i in relax_steps]
 
     # now I create a bunch of arrays for every step.
@@ -694,6 +709,10 @@ def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None,
                     pass
 
             # grep energy and possibly, magnetization
+            # It might be worthwhile to add an extra space to distinguish from SIRIUS calculations,
+            # which prints multiple '!' during its self-consistent loop.
+            # so ' !' instead of '!', but I don't want to break compatibility with other use-cases
+            # that I may not have considered, and I leave this as a note for future.
             elif '!' in line:
                 try:
                     energy = float(line.split('=')[1].split('Ry')[0]) * CONSTANTS.ry_to_ev
@@ -821,9 +840,17 @@ def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None,
                 try:
                     stress = []
                     count2 = None
+                    # If we use the above way of dividing the steps, then a frame finishes after 8 to 10 lines of
+                    # 'Computing stress' line, so it's better to break the loop once 'P=' line is found,
+                    # this might be a bad idea if there are multiple separate pressure values.
+                    # This is the case for MD calculations and the loop needs to be broken, otherwise it
+                    # tries to parse that which should be present in the next MD step, and throws an error
+                    # indicating the same.
+                    # Of course this is a non-issue for other calculations like relax, vc-relax, etc.
                     for k in range(15):  # Up to 15 lines later - more than 10 are needed if vdW is turned on
                         if 'P=' in data_step[count + k + 1]:
                             count2 = count + k + 1
+                            break
                     if count2 is None:
                         logs.warning.append(
                             'Error while parsing stress tensor: '
@@ -932,6 +959,8 @@ def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None,
     # Ionic calculations and BFGS algorithm did not print that calculation is converged
     if 'atomic_positions_relax' in trajectory_data and not marker_bfgs_converged:
         logs.error.append('ERROR_IONIC_CONVERGENCE_NOT_REACHED')
+        # this error makes no sense for md calculation
+        # TODO: add option to skip this test for MD calculation
 
     # Ionic calculation that hit the maximum number of ionic steps. Note: does not necessarily mean that convergence was
     # not reached as it could have occurred in the last step.