Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ Chronological list of authors
- Mingyi Xue
- Meghan Osato
- Anirvinya G
- Rishabh Shukla
- Rishabh Shukla
- Manish Kumar
- Aditi Tripathi
- Sukeerti T
Expand Down Expand Up @@ -247,24 +247,25 @@ Chronological list of authors
- Jia-Xin Zhu
- Tanish Yelgoe
2025
- Joshua Raphael Uy
- Joshua Raphael Uy
- Namir Oues
- Pavel Buslaev
- Pavel Buslaev
- Lexi Xu
- BHM-Bob G
- BHM-Bob G
- Yu-Yuan (Stuart) Yang
- James Rowe
- Debasish Mohanty
- Debasish Mohanty
- Abdulrahman Elbanna
- Tulga-Erdene Sodjargal
- Gareth Elliott
- Marc Schuh
- Sirsha Ganguly
- Amruthesh Thirumalaiswamy
- Ch Zhang
- Raúl Lois-Cuns
- Raúl Lois-Cuns
- Pranay Pelapkar
- Shreejan Dolai
- Brady Johnston

External code
-------------
Expand Down
4 changes: 3 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The rules for this file:

-------------------------------------------------------------------------------
??/??/?? IAlibay, orbeckst, marinegor, tylerjereddy, ljwoods2, marinegor,
spyke7, talagayev
spyke7, talagayev, bradyajohnston

* 2.11.0

Expand All @@ -30,6 +30,8 @@ Fixes
DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913)

Enhancements
* Change `get_hbond_map` to use `distance_capped` to speedup finding H bonding pairs
and also take into account the periodic boundaries (PR #5182)
* Enables parallelization for analysis.diffusionmap.DistanceMatrix
(Issue #4679, PR #4745)

Expand Down
6 changes: 5 additions & 1 deletion package/MDAnalysis/analysis/dssp/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,11 @@ def _get_coords(self) -> np.ndarray:

def _single_frame(self):
coords = self._get_coords()
dssp = assign(coords, donor_mask=self._donor_mask)
dssp = assign(
coords,
donor_mask=self._donor_mask,
box=self._trajectory.ts.dimensions,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if dimensions are (1, 1, 1, 90, 90, 90) (cryoEM placeholder), just set to None.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be done here, but rather when loading the PDB - there is an a bad edge case where you might actually just have that unit box.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On further thought, I wouldn't be completely against this check - maybe a warning would be ok too if we don't want to be too strict

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On further thought, I wouldn't be completely against this check - maybe a warning would be ok too if we don't want to be too strict

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% with how this should be handled. If I am raising a warning in _single_frame won't that warning be raised a bunch of times for every frame?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def _single_frame(self):
        coords = self._get_coords()
        DEFAULT_BOX_DIMENSIONS = (1, 1, 1, 90, 90, 90)
        box = self._trajectory.ts.dimensions

        if box == DEFAULT_BOX_DIMENSIONS:
            box = None
            warnings.warn(
                f"Default box dimensions {DEFAULT_BOX_DIMENSIONS} are treated as no box"
            )
        dssp = assign(
            coords,
            donor_mask=self._donor_mask,
            box=self._trajectory.ts.dimensions,
        )
        self.results.dssp_ndarray.append(dssp)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BradyAJohnston - how about making that warning happen in init? The atomgroups are set there, so it would be best to do it when the class is created.

)
self.results.dssp_ndarray.append(dssp)

def _conclude(self):
Expand Down
93 changes: 59 additions & 34 deletions package/MDAnalysis/analysis/dssp/pydssp_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@

import numpy as np

from MDAnalysis.lib.distances import capped_distance

CONST_Q1Q2 = 0.084
CONST_F = 332
DEFAULT_CUTOFF = -0.5
DEFAULT_MARGIN = 1.0
HBOND_SEARCH_CUTOFF = 5.0


def _upsample(a: np.ndarray, window: int) -> np.ndarray:
Expand Down Expand Up @@ -118,10 +121,11 @@ def _get_hydrogen_atom_position(coord: np.ndarray) -> np.ndarray:

def get_hbond_map(
coord: np.ndarray,
donor_mask: np.ndarray = None,
donor_mask: np.ndarray | None = None,
cutoff: float = DEFAULT_CUTOFF,
margin: float = DEFAULT_MARGIN,
return_e: bool = False,
box: np.ndarray | None = None,
) -> np.ndarray:
"""Returns hydrogen bond map

Expand All @@ -145,6 +149,14 @@ def get_hbond_map(
return_e : bool, optional
if to return energy instead of hbond map, by default False

box : array_like, optional
The unitcell dimensions of the system, which can be orthogonal or
triclinic and must be provided in the same format as returned by
:attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`:
``[lx, ly, lz, alpha, beta, gamma]``.

.. versionadded:: 2.11.0

Returns
-------
np.ndarray
Expand All @@ -156,6 +168,8 @@ def get_hbond_map(
.. versionchanged:: 2.10.0
Support masking of hydrogen donors via `donor_mask` (especially needed
for ignoring HN on proline residues). Backport of PRO fix from pydssp 0.9.1.
.. versionchanged:: 2.11.0
Speedup with KDTree and support periodic boundary checking via `box` param.
"""
n_residues, n_atom_types, _xyz = coord.shape
assert n_atom_types in (
Expand All @@ -176,58 +190,59 @@ def get_hbond_map(
# h.shape == (n_residues, 3)
# coord.shape == (n_residues, 4, 3)

# distance matrix
n_1, c_0, o_0 = coord[1:, 0], coord[0:-1, 2], coord[0:-1, 3]
n_atoms, c_atoms, o_atoms = coord[1:, 0], coord[:-1, 2], coord[:-1, 3]

# We can reduce the need for a complete pairwise distance matrix by first searching for
# candidate pairs within a certain distance cutoff and only computing the energy
# for these relevant pairs rather than "potential" HBonds between far apart residues
pairs = capped_distance(
n_atoms,
o_atoms,
max_cutoff=HBOND_SEARCH_CUTOFF,
return_distances=False,
box=box,
)

# Exclude local pairs (i, i), (i, i+1), (i, i+2) that are too close for SS HBonds
pairs = pairs[abs(pairs[:, 0] - pairs[:, 1]) >= 2]

n = n_residues - 1
cmap = np.tile(c_0, (n, 1, 1))
omap = np.tile(o_0, (n, 1, 1))
nmap = np.tile(n_1, (1, 1, n)).reshape(n, n, 3)
hmap = np.tile(h_1, (1, 1, n)).reshape(n, n, 3)
# Exclude donor H absence (Proline)
if donor_mask is not None:
donor_indices = np.where(np.array(donor_mask) == 0)[0]
mask = ~np.isin(pairs[:, 0], donor_indices - 1)
pairs = pairs[mask]

d_on = np.linalg.norm(omap - nmap, axis=-1)
d_ch = np.linalg.norm(cmap - hmap, axis=-1)
d_oh = np.linalg.norm(omap - hmap, axis=-1)
d_cn = np.linalg.norm(cmap - nmap, axis=-1)
# compute distances and energy as previously but only for the potential pairs
# still returning the same energy matrix that would have otherwise been made
o_indices = pairs[:, 1]
n_indices = pairs[:, 0]
d_on = np.linalg.norm(o_atoms[o_indices] - n_atoms[n_indices], axis=-1)
d_ch = np.linalg.norm(c_atoms[o_indices] - h_1[n_indices], axis=-1)
d_oh = np.linalg.norm(o_atoms[o_indices] - h_1[n_indices], axis=-1)
d_cn = np.linalg.norm(c_atoms[o_indices] - n_atoms[n_indices], axis=-1)

# electrostatic interaction energy
# e[i, j] = e(CO_i) - e(NH_j)
e = np.pad(
e = np.zeros((n_residues, n_residues))
e[n_indices + 1, o_indices] = (
CONST_Q1Q2
* (1.0 / d_on + 1.0 / d_ch - 1.0 / d_oh - 1.0 / d_cn)
* CONST_F,
[[1, 0], [0, 1]],
* CONST_F
)

if return_e: # pragma: no cover
return e

# mask for local pairs (i,i), (i,i+1), (i,i+2)
local_mask = ~np.eye(n_residues, dtype=bool)
local_mask *= ~np.diag(np.ones(n_residues - 1, dtype=bool), k=-1)
local_mask *= ~np.diag(np.ones(n_residues - 2, dtype=bool), k=-2)
# mask for donor H absence (Proline)
donor_mask = (
np.array(donor_mask).astype(float)
if donor_mask is not None
else np.ones(n_residues, dtype=float)
)
donor_mask = np.tile(donor_mask[:, np.newaxis], (1, n_residues))
# hydrogen bond map (continuous value extension of original definition)
hbond_map = np.clip(cutoff - margin - e, a_min=-margin, a_max=margin)
hbond_map = (np.sin(hbond_map / margin * np.pi / 2) + 1.0) / 2

assert hbond_map.shape == local_mask.shape == donor_mask.shape

hbond_map *= local_mask
hbond_map *= donor_mask

return hbond_map


def assign(
coord: np.ndarray,
donor_mask: np.ndarray = None,
donor_mask: np.ndarray | None = None,
box: np.ndarray | None = None,
) -> np.ndarray:
"""Assigns secondary structure for a given coordinate array,
either with or without assigned hydrogens
Expand All @@ -240,12 +255,20 @@ def assign(
(N, CA, C, O) atoms coordinates (if k=4), or (N, CA, C, O, H) coordinates
(when k=5).

donor_mask : np.array
donor_mask : np.array | None, optional
Mask out any hydrogens that should not be considered (in particular HN
in PRO). If ``None`` then all H will be used (behavior up to 2.9.0).

.. versionadded:: 2.10.0

box : array_like, optional
The unitcell dimensions of the system, which can be orthogonal or
triclinic and must be provided in the same format as returned by
:attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`:
``[lx, ly, lz, alpha, beta, gamma]``.

.. versionadded:: 2.11.0

Returns
-------
np.ndarray
Expand All @@ -257,6 +280,8 @@ def assign(

.. versionchanged:: 2.10.0
Support masking of donors.
.. versionchanged:: 2.11.0
Speedup with KDTree and support periodic boundary checking via `box` param.

"""
# get hydrogen bond map
Expand Down
16 changes: 15 additions & 1 deletion testsuite/MDAnalysisTests/analysis/test_dssp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import glob

import MDAnalysis as mda
import numpy as np
import pytest
from MDAnalysis.analysis.dssp import DSSP, translate
from MDAnalysis.analysis.dssp import DSSP, assign, translate

from MDAnalysisTests.datafiles import DSSP as DSSP_FOLDER
from MDAnalysisTests.datafiles import TPR, XTC
Expand Down Expand Up @@ -35,6 +36,19 @@ def test_trajectory(client_DSSP):
)


# ensure that we get different assigned results when using and not using the
# donor mask which filters out prolines from being potential HBond donors as they
# are missing hydrogens
def test_donor_mask(client_DSSP):
u = mda.Universe(TPR, XTC).select_atoms("protein").universe
dssp = DSSP(u)
coords = dssp._get_coords()

assert not np.array_equal(
assign(coords), assign(coords, donor_mask=dssp._donor_mask)
)


def test_atomgroup(client_DSSP):
protein = mda.Universe(TPR, XTC).select_atoms("protein")
run = DSSP(protein).run(**client_DSSP, stop=10)
Expand Down
Loading