Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ Chronological list of authors
- Mingyi Xue
- Meghan Osato
- Anirvinya G
- Rishabh Shukla
- Rishabh Shukla
- Manish Kumar
- Aditi Tripathi
- Sukeerti T
Expand Down Expand Up @@ -247,24 +247,25 @@ Chronological list of authors
- Jia-Xin Zhu
- Tanish Yelgoe
2025
- Joshua Raphael Uy
- Joshua Raphael Uy
- Namir Oues
- Pavel Buslaev
- Pavel Buslaev
- Lexi Xu
- BHM-Bob G
- BHM-Bob G
- Yu-Yuan (Stuart) Yang
- James Rowe
- Debasish Mohanty
- Debasish Mohanty
- Abdulrahman Elbanna
- Tulga-Erdene Sodjargal
- Gareth Elliott
- Marc Schuh
- Sirsha Ganguly
- Amruthesh Thirumalaiswamy
- Ch Zhang
- Raúl Lois-Cuns
- Raúl Lois-Cuns
- Pranay Pelapkar
- Shreejan Dolai
- Brady Johnston

External code
-------------
Expand Down
4 changes: 3 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The rules for this file:

-------------------------------------------------------------------------------
??/??/?? IAlibay, orbeckst, marinegor, tylerjereddy, ljwoods2, marinegor,
spyke7, talagayev
spyke7, talagayev, bradyajohnston

* 2.11.0

Expand All @@ -30,6 +30,8 @@ Fixes
DSSP by porting upstream PyDSSP 0.9.1 fix (Issue #4913)

Enhancements
* Change `get_hbond_map` to use `distance_capped` to speedup finding H bonding pairs
and also take into account the periodic boundaries (PR #5182)
* Enables parallelization for analysis.diffusionmap.DistanceMatrix
(Issue #4679, PR #4745)

Expand Down
13 changes: 12 additions & 1 deletion package/MDAnalysis/analysis/dssp/dssp.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
.. autofunction:: translate
"""

import warnings
from typing import Optional, Union

import numpy as np
Expand Down Expand Up @@ -323,6 +324,12 @@ def __init__(
for t in heavyatom_names
}
self._donor_mask: Optional[np.ndarray] = ag.residues.resnames != "PRO"
self._box = self._trajectory.ts.dimensions
if np.array_equal(self._box, (1, 1, 1, 90, 90, 90)):
self._box = None
warnings.warn(
"Box dimensions are (1, 1, 1, 90, 90, 90), not using periodic boundary conditions in DSSP calculations"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% on what the warning should be - but this is how we are imagining it? @IAlibay

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that looks about right. With regards to the warning, your message is close enough to what I was thinking, i.e. something like "Default unit cells of 1 A cubed found. This likely comes from a cryo-em file. To avoid usses, we are disabling periodic boundary conditions in the DSSP calculation".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A warning is currently already raised twice: once when loading the universe and again when running the DSSP (not sure why it is happening for the run(). As the unit cell dimensions are already set to None it's not triggering the new warning. In most cases the unit cell wouldn't make it "all the way" to DSSP but I'll change it to just be the same warning message.

In [9]: p = Path("/Users/brady/Downloads/9N9M.pdb")

In [10]: u = mda.Universe(p)
/Users/brady/git/mdanalysis/package/MDAnalysis/coordinates/PDB.py:479: UserWarning: 1 A^3 CRYST1 record, this is usually a placeholder. Unit cell dimensions will be set to None.
  warnings.warn(

In [11]: a = dssp.DSSP(u)

In [12]: a.run()
/Users/brady/git/mdanalysis/package/MDAnalysis/coordinates/PDB.py:479: UserWarning: 1 A^3 CRYST1 record, this is usually a placeholder. Unit cell dimensions will be set to None.
  warnings.warn(
/Users/brady/git/mdanalysis/package/MDAnalysis/analysis/base.py:562: UserWarning: Reader has no dt information, set to 1.0 ps
  self.times[idx] = ts.time

Out[12]: <MDAnalysis.analysis.dssp.dssp.DSSP at 0x110d4c910>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is what I was trying to convey earlier in the conversation when I was saying that the PDB reader takes care of that edge case already. I'm not sure what the best approach is here, thoughts @marinegor ?

)
self._hydrogens: list["AtomGroup"] = [
res.atoms.select_atoms(f"name {hydrogen_name}")
for res in ag.residues
Expand Down Expand Up @@ -399,7 +406,11 @@ def _get_coords(self) -> np.ndarray:

def _single_frame(self):
coords = self._get_coords()
dssp = assign(coords, donor_mask=self._donor_mask)
dssp = assign(
coords,
donor_mask=self._donor_mask,
box=self._box,
)
self.results.dssp_ndarray.append(dssp)

def _conclude(self):
Expand Down
106 changes: 71 additions & 35 deletions package/MDAnalysis/analysis/dssp/pydssp_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@

import numpy as np

from MDAnalysis.lib.distances import capped_distance, minimize_vectors

CONST_Q1Q2 = 0.084
CONST_F = 332
DEFAULT_CUTOFF = -0.5
DEFAULT_MARGIN = 1.0
HBOND_SEARCH_CUTOFF = 5.0


def _upsample(a: np.ndarray, window: int) -> np.ndarray:
Expand Down Expand Up @@ -118,10 +121,11 @@ def _get_hydrogen_atom_position(coord: np.ndarray) -> np.ndarray:

def get_hbond_map(
coord: np.ndarray,
donor_mask: np.ndarray = None,
donor_mask: np.ndarray | None = None,
cutoff: float = DEFAULT_CUTOFF,
margin: float = DEFAULT_MARGIN,
return_e: bool = False,
box: np.ndarray | None = None,
) -> np.ndarray:
"""Returns hydrogen bond map

Expand All @@ -145,6 +149,14 @@ def get_hbond_map(
return_e : bool, optional
if to return energy instead of hbond map, by default False

box : array_like, optional
The unitcell dimensions of the system, which can be orthogonal or
triclinic and must be provided in the same format as returned by
:attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`:
``[lx, ly, lz, alpha, beta, gamma]``.

.. versionadded:: 2.11.0

Returns
-------
np.ndarray
Expand All @@ -156,6 +168,8 @@ def get_hbond_map(
.. versionchanged:: 2.10.0
Support masking of hydrogen donors via `donor_mask` (especially needed
for ignoring HN on proline residues). Backport of PRO fix from pydssp 0.9.1.
.. versionchanged:: 2.11.0
Speedup with KDTree and support periodic boundary checking via `box` param.
"""
n_residues, n_atom_types, _xyz = coord.shape
assert n_atom_types in (
Expand All @@ -176,58 +190,70 @@ def get_hbond_map(
# h.shape == (n_residues, 3)
# coord.shape == (n_residues, 4, 3)

# distance matrix
n_1, c_0, o_0 = coord[1:, 0], coord[0:-1, 2], coord[0:-1, 3]
n_atoms, c_atoms, o_atoms = coord[1:, 0], coord[:-1, 2], coord[:-1, 3]

n = n_residues - 1
cmap = np.tile(c_0, (n, 1, 1))
omap = np.tile(o_0, (n, 1, 1))
nmap = np.tile(n_1, (1, 1, n)).reshape(n, n, 3)
hmap = np.tile(h_1, (1, 1, n)).reshape(n, n, 3)
# We can reduce the need for a complete pairwise distance matrix by first searching for
# candidate pairs within a certain distance cutoff and only computing the energy
# for these relevant pairs rather than "potential" HBonds between far apart residues
pairs, d_on = capped_distance(
n_atoms,
o_atoms,
max_cutoff=HBOND_SEARCH_CUTOFF,
box=box,
)

d_on = np.linalg.norm(omap - nmap, axis=-1)
d_ch = np.linalg.norm(cmap - hmap, axis=-1)
d_oh = np.linalg.norm(omap - hmap, axis=-1)
d_cn = np.linalg.norm(cmap - nmap, axis=-1)
# Exclude local pairs (i, i), (i, i+1), (i, i+2) that are too close for SS HBonds
local_mask = abs(pairs[:, 0] - pairs[:, 1]) >= 2
pairs = pairs[local_mask]

# Exclude donor H absence (Proline)
if donor_mask is not None:
donor_indices = np.where(np.array(donor_mask) == 0)[0]
mask = ~np.isin(pairs[:, 0], donor_indices - 1)
pairs = pairs[mask]

# compute distances and energy as previously but only for the potential pairs
# still returning the same energy matrix that would have otherwise been made
o_indices = pairs[:, 1]
n_indices = pairs[:, 0]

# d_on = d_on[local_mask]
def _distances(x, y, box=None):
if box is None:
return np.linalg.norm(x[o_indices] - y[n_indices], axis=-1)
else:
return np.linalg.norm(
minimize_vectors(x[o_indices] - y[n_indices], box=box),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or use directly MDAnalysis.lib.distances.calc_bonds

return calc_bonds(x[o_indices], y[n_indices], box=box)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah this is good to know! (and yeah I missed it from naming). Will update to use that instead of this combo.

axis=-1,
)

d_on = _distances(o_atoms, n_atoms, box)
d_ch = _distances(c_atoms, h_1, box)
d_oh = _distances(o_atoms, h_1, box)
d_cn = _distances(c_atoms, n_atoms, box)

# electrostatic interaction energy
# e[i, j] = e(CO_i) - e(NH_j)
e = np.pad(
e = np.zeros((n_residues, n_residues))
e[n_indices + 1, o_indices] = (
CONST_Q1Q2
* (1.0 / d_on + 1.0 / d_ch - 1.0 / d_oh - 1.0 / d_cn)
* CONST_F,
[[1, 0], [0, 1]],
* CONST_F
)

if return_e: # pragma: no cover
return e

# mask for local pairs (i,i), (i,i+1), (i,i+2)
local_mask = ~np.eye(n_residues, dtype=bool)
local_mask *= ~np.diag(np.ones(n_residues - 1, dtype=bool), k=-1)
local_mask *= ~np.diag(np.ones(n_residues - 2, dtype=bool), k=-2)
# mask for donor H absence (Proline)
donor_mask = (
np.array(donor_mask).astype(float)
if donor_mask is not None
else np.ones(n_residues, dtype=float)
)
donor_mask = np.tile(donor_mask[:, np.newaxis], (1, n_residues))
# hydrogen bond map (continuous value extension of original definition)
hbond_map = np.clip(cutoff - margin - e, a_min=-margin, a_max=margin)
hbond_map = (np.sin(hbond_map / margin * np.pi / 2) + 1.0) / 2

assert hbond_map.shape == local_mask.shape == donor_mask.shape

hbond_map *= local_mask
hbond_map *= donor_mask

return hbond_map


def assign(
coord: np.ndarray,
donor_mask: np.ndarray = None,
donor_mask: np.ndarray | None = None,
box: np.ndarray | None = None,
) -> np.ndarray:
"""Assigns secondary structure for a given coordinate array,
either with or without assigned hydrogens
Expand All @@ -240,12 +266,20 @@ def assign(
(N, CA, C, O) atoms coordinates (if k=4), or (N, CA, C, O, H) coordinates
(when k=5).

donor_mask : np.array
donor_mask : np.array | None, optional
Mask out any hydrogens that should not be considered (in particular HN
in PRO). If ``None`` then all H will be used (behavior up to 2.9.0).

.. versionadded:: 2.10.0

box : array_like, optional
The unitcell dimensions of the system, which can be orthogonal or
triclinic and must be provided in the same format as returned by
:attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`:
``[lx, ly, lz, alpha, beta, gamma]``.

.. versionadded:: 2.11.0

Returns
-------
np.ndarray
Expand All @@ -257,10 +291,12 @@ def assign(

.. versionchanged:: 2.10.0
Support masking of donors.
.. versionchanged:: 2.11.0
Speedup with KDTree and support periodic boundary checking via `box` param.

"""
# get hydrogen bond map
hbmap = get_hbond_map(coord, donor_mask=donor_mask)
hbmap = get_hbond_map(coord, donor_mask=donor_mask, box=box)
hbmap = np.swapaxes(hbmap, -1, -2) # convert into "i:C=O, j:N-H" form

# identify turn 3, 4, 5
Expand Down
16 changes: 15 additions & 1 deletion testsuite/MDAnalysisTests/analysis/test_dssp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import glob

import MDAnalysis as mda
import numpy as np
import pytest
from MDAnalysis.analysis.dssp import DSSP, translate
from MDAnalysis.analysis.dssp import DSSP, assign, translate

from MDAnalysisTests.datafiles import DSSP as DSSP_FOLDER
from MDAnalysisTests.datafiles import TPR, XTC
Expand Down Expand Up @@ -35,6 +36,19 @@ def test_trajectory(client_DSSP):
)


# ensure that we get different assigned results when using and not using the
# donor mask which filters out prolines from being potential HBond donors as they
# are missing hydrogens
def test_donor_mask(client_DSSP):
u = mda.Universe(TPR, XTC).select_atoms("protein").universe
dssp = DSSP(u)
coords = dssp._get_coords()

assert not np.array_equal(
assign(coords), assign(coords, donor_mask=dssp._donor_mask)
)


def test_atomgroup(client_DSSP):
protein = mda.Universe(TPR, XTC).select_atoms("protein")
run = DSSP(protein).run(**client_DSSP, stop=10)
Expand Down
Loading