Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 126 additions & 37 deletions pdbfixer/pdbfixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,61 @@ def _dihedralRotation(points, angle):
[axis[2]*axis[0]*(1-ct) - axis[1]*st, axis[2]*axis[1]*(1-ct) + axis[0]*st, axis[2]*axis[2]*(1-ct) + ct ]
])

def _placeAtomNeRF(a1, a2, a3, bond_length, bond_angle, dihedral):
"""Place atom X bonded to a3 using the NeRF algorithm (Parsons et al. 2005).

Given three reference points a1, a2, a3, compute the position of X such that
dist(a3, X) = bond_length, angle(a2, a3, X) = bond_angle, and
dihedral(a1, a2, a3, X) = dihedral.

All lengths in nanometers, angles in radians.
"""
bc = a3 - a2
bc_hat = bc / np.linalg.norm(bc)
ab = a2 - a1
n = np.cross(ab, bc)
norm_n = np.linalg.norm(n)
if norm_n < 1e-10:
raise ValueError(
"Reference points a1, a2, a3 are collinear; cannot define a NeRF frame.")
n_hat = n / norm_n
m = np.cross(n_hat, bc_hat)
d = np.array([
-bond_length * np.cos(bond_angle),
bond_length * np.sin(bond_angle) * np.cos(dihedral),
bond_length * np.sin(bond_angle) * np.sin(dihedral),
])
return a3 + d[0] * bc_hat + d[1] * m + d[2] * n_hat

def _buildAceCap(n, ca, c):
"""Build ACE cap positions from the adjacent residue's N, CA, C (in nm).

Returns dict mapping atom name to numpy position array.
Atom names match ACE.pdb template: C, O, CH3.

Bond lengths and angles from Engh & Huber (1991), Acta Cryst. A47, 392-400;
tabulated at https://www.ebi.ac.uk/thornton-srv/software/PROCHECK/manual/manappa.html
CH3-C uses the CA-C value (both sp3 C bonded to carbonyl C).
"""
ace_c = _placeAtomNeRF(c, ca, n, 0.1329, 2.124, np.pi) # C-N 1.329 A, C-N-CA 121.7 deg
ch3 = _placeAtomNeRF(ca, n, ace_c, 0.1525, 2.028, np.pi) # ~CA-C 1.525 A, CA-C-N 116.2 deg
o = _placeAtomNeRF(ca, n, ace_c, 0.1231, 2.147, 0.0) # C=O 1.231 A, O-C-N 123.0 deg
return {'C': ace_c, 'O': o, 'CH3': ch3}

def _buildNmeCap(n, ca, c):
"""Build NME cap positions from the adjacent residue's N, CA, C (in nm).

Returns dict mapping atom name to numpy position array.
Atom names match NME.pdb template: N, C.

Bond lengths and angles from Engh & Huber (1991), Acta Cryst. A47, 392-400;
tabulated at https://www.ebi.ac.uk/thornton-srv/software/PROCHECK/manual/manappa.html
N-CH3 uses the N-CA value (both sp3 C bonded to amide N).
"""
nme_n = _placeAtomNeRF(n, ca, c, 0.1329, 2.028, np.pi) # C-N 1.329 A, CA-C-N 116.2 deg
ch3 = _placeAtomNeRF(ca, c, nme_n, 0.1458, 2.124, np.pi) # ~N-CA 1.458 A, C-N-CA 121.7 deg
return {'N': nme_n, 'C': ch3}

def _findUnoccupiedDirection(point, positions):
"""Given a point in space and a list of atom positions, find the direction in which the local density of atoms is lowest."""

Expand Down Expand Up @@ -765,43 +820,77 @@ def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosi
for i, residueName in enumerate(residueNames):
template = self._getTemplate(residueName)

# Find a translation that best matches the adjacent residue.

points1 = []
points2 = []
for atom in template.topology.atoms():
if atom.name in orientToPositions:
points1.append(orientToPositions[atom.name].value_in_unit(unit.nanometer))
points2.append(template.positions[atom.index].value_in_unit(unit.nanometer))
(translate2, rotate, translate1) = _overlayPoints(points1, points2)

# Create the new residue.

newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000))
fraction = (i+1.0)/(numResidues+1.0)
translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection
templateAtoms = list(template.topology.atoms())
if newResidue == next(chain.residues()):
templateAtoms = [atom for atom in templateAtoms if atom.name not in ('P', 'OP1', 'OP2')]
for atom in templateAtoms:
newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append(mm.Vec3(*np.dot(rotate, templatePosition))*unit.nanometer+translate)
if prevResidue is not None:
atoms1 = {atom.name: atom for atom in prevResidue.atoms()}
atoms2 = {atom.name: atom for atom in newResidue.atoms()}
if 'CA' in atoms1 and 'C' in atoms1 and 'N' in atoms2 and 'CA' in atoms2:

# We're adding a peptide bond between this residue and the previous one. Rotate it to try to
# put the peptide bond into the trans conformation.

atoms = (atoms1['CA'], atoms1['C'], atoms2['N'], atoms2['CA'])
points = [newPositions[a.index] for a in atoms]
rotation = _dihedralRotation(points, np.pi)
for atom in newResidue.atoms():
d = (newPositions[atom.index]-points[2]).value_in_unit(unit.nanometer)
newPositions[atom.index] = mm.Vec3(*np.dot(rotation, d))*unit.nanometer + points[2]
if residueName == 'ACE' and all(k in orientToPositions for k in ('N', 'CA', 'C')):
# Use NeRF placement for ACE cap. Requires N, CA, C on the
# adjacent residue; falls back to Kabsch overlay if any are missing.
refN = np.array(orientToPositions['N'].value_in_unit(unit.nanometer))
refCA = np.array(orientToPositions['CA'].value_in_unit(unit.nanometer))
refC = np.array(orientToPositions['C'].value_in_unit(unit.nanometer))
capPositions = _buildAceCap(refN, refCA, refC)
newResidue = chain.topology.addResidue(residueName, chain,
"%d" % ((firstIndex+i)%10000))
for atom in template.topology.atoms():
newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
newPositions.append(mm.Vec3(*capPositions[atom.name])*unit.nanometer)

elif residueName == 'NME' and prevResidue is not None and \
all(k in {a.name for a in prevResidue.atoms()} for k in ('N', 'CA', 'C')):
# Use NeRF placement for NME cap. Requires N, CA, C on the
# previous residue; falls back to Kabsch overlay if any are missing.
prevAtoms = {a.name: a for a in prevResidue.atoms()}
refN = np.array(newPositions[prevAtoms['N'].index].value_in_unit(unit.nanometer))
refCA = np.array(newPositions[prevAtoms['CA'].index].value_in_unit(unit.nanometer))
refC = np.array(newPositions[prevAtoms['C'].index].value_in_unit(unit.nanometer))
capPositions = _buildNmeCap(refN, refCA, refC)
newResidue = chain.topology.addResidue(residueName, chain,
"%d" % ((firstIndex+i)%10000))
for atom in template.topology.atoms():
newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
newPositions.append(mm.Vec3(*capPositions[atom.name])*unit.nanometer)

else:
# Standard residue (or cap missing backbone reference atoms):
# use Kabsch overlay + arc placement.

# Find a translation that best matches the adjacent residue.

points1 = []
points2 = []
for atom in template.topology.atoms():
if atom.name in orientToPositions:
points1.append(orientToPositions[atom.name].value_in_unit(unit.nanometer))
points2.append(template.positions[atom.index].value_in_unit(unit.nanometer))
(translate2, rotate, translate1) = _overlayPoints(points1, points2)

# Create the new residue.

newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000))
fraction = (i+1.0)/(numResidues+1.0)
translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection
templateAtoms = list(template.topology.atoms())
if newResidue == next(chain.residues()):
templateAtoms = [atom for atom in templateAtoms if atom.name not in ('P', 'OP1', 'OP2')]
for atom in templateAtoms:
newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append(mm.Vec3(*np.dot(rotate, templatePosition))*unit.nanometer+translate)
if prevResidue is not None:
atoms1 = {atom.name: atom for atom in prevResidue.atoms()}
atoms2 = {atom.name: atom for atom in newResidue.atoms()}
if 'CA' in atoms1 and 'C' in atoms1 and 'N' in atoms2 and 'CA' in atoms2:

# We're adding a peptide bond between this residue and the previous one. Rotate it to try to
# put the peptide bond into the trans conformation.

atoms = (atoms1['CA'], atoms1['C'], atoms2['N'], atoms2['CA'])
points = [newPositions[a.index] for a in atoms]
rotation = _dihedralRotation(points, np.pi)
for atom in newResidue.atoms():
d = (newPositions[atom.index]-points[2]).value_in_unit(unit.nanometer)
newPositions[atom.index] = mm.Vec3(*np.dot(rotation, d))*unit.nanometer + points[2]

prevResidue = newResidue

Expand Down
143 changes: 143 additions & 0 deletions pdbfixer/tests/test_nerf_caps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""Tests for NeRF-based ACE/NME cap placement."""

from io import StringIO

import numpy as np
import pytest
from openmm.app import ForceField
from pdbfixer import PDBFixer
from pdbfixer.pdbfixer import _placeAtomNeRF, _buildAceCap, _buildNmeCap


def _dist(a, b):
return np.linalg.norm(a - b)


def _angle(a, b, c):
ba = a - b
bc = c - b
cos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
return np.arccos(np.clip(cos_angle, -1, 1))


def _dihedral(a, b, c, d):
b1 = b - a
b2 = c - b
b3 = d - c
n1 = np.cross(b1, b2)
n2 = np.cross(b2, b3)
m = np.cross(n1, b2 / np.linalg.norm(b2))
return np.arctan2(np.dot(m, n2), np.dot(n1, n2))


# Reference backbone atoms for a typical residue (in nm).
REF_N = np.array([0.0, 0.0, 0.0])
REF_CA = np.array([0.1458, 0.0, 0.0])
REF_C = np.array([0.2058, 0.1230, 0.0])


def _rotation_matrix(axis, angle):
"""Rodrigues' rotation formula."""
axis = axis / np.linalg.norm(axis)
K = np.array([
[0, -axis[2], axis[1]],
[axis[2], 0, -axis[0]],
[-axis[1], axis[0], 0],
])
return np.identity(3) + np.sin(angle) * K + (1 - np.cos(angle)) * K @ K


@pytest.fixture(params=[
(np.identity(3), np.zeros(3)),
(_rotation_matrix(np.array([1.0, 2.0, 3.0]), 1.23), np.array([0.5, -0.3, 0.7])),
], ids=["z=0 plane", "general 3D"])
def backbone(request):
"""Yield (N, CA, C) reference backbone, optionally rotated+translated."""
rot, shift = request.param
return rot @ REF_N + shift, rot @ REF_CA + shift, rot @ REF_C + shift


class TestPlaceAtomNeRF:
# Use non-collinear reference points (collinear points make cross product zero).
A1 = np.array([0.0, 0.1, 0.0])
A2 = np.array([0.0, 0.0, 0.0])
A3 = np.array([0.15, 0.0, 0.0])

def test_bond_length_and_angle(self):
bond_length = 0.1335
bond_angle = 2.035
x = _placeAtomNeRF(self.A1, self.A2, self.A3, bond_length, bond_angle, np.pi / 3)
assert abs(_dist(self.A3, x) - bond_length) < 1e-6
assert abs(_angle(self.A2, self.A3, x) - bond_angle) < 1e-6

def test_collinear_raises(self):
a1 = np.array([0.0, 0.0, 0.0])
a2 = np.array([0.15, 0.0, 0.0])
a3 = np.array([0.30, 0.0, 0.0])
with pytest.raises(ValueError, match="collinear"):
_placeAtomNeRF(a1, a2, a3, 0.15, 2.0, 1.0)

def test_dihedral_pi(self):
"""Test with pi dihedral, which is sign-invariant and matches cap usage."""
x = _placeAtomNeRF(self.A1, self.A2, self.A3, 0.15, 2.0, np.pi)
measured = _dihedral(self.A1, self.A2, self.A3, x)
assert abs(abs(measured) - np.pi) < 1e-5


class TestBuildAceCap:
def test_ace_bond_lengths(self, backbone):
n, ca, c = backbone
caps = _buildAceCap(n, ca, c)
assert abs(_dist(caps['C'], n) - 0.1329) < 1e-4
assert abs(_dist(caps['O'], caps['C']) - 0.1231) < 1e-4
assert abs(_dist(caps['CH3'], caps['C']) - 0.1525) < 1e-4

def test_ace_omega_trans(self, backbone):
n, ca, c = backbone
caps = _buildAceCap(n, ca, c)
omega = _dihedral(caps['CH3'], caps['C'], n, ca)
assert abs(abs(omega) - np.pi) < 0.05


class TestBuildNmeCap:
def test_nme_bond_lengths(self, backbone):
n, ca, c = backbone
caps = _buildNmeCap(n, ca, c)
assert abs(_dist(caps['N'], c) - 0.1329) < 1e-4
assert abs(_dist(caps['C'], caps['N']) - 0.1458) < 1e-4

def test_nme_omega_trans(self, backbone):
n, ca, c = backbone
caps = _buildNmeCap(n, ca, c)
omega = _dihedral(ca, c, caps['N'], caps['C'])
assert abs(abs(omega) - np.pi) < 0.05


class TestCapIntegration:
def test_ace_nme_forcefield_compatible(self):
"""Full PDBFixer pipeline: add caps and verify ForceField accepts them."""
pdb_text = """\
SEQRES 1 A 4 ACE ALA ALA NME
ATOM 1 N ALA A 2 1.458 0.000 0.000 1.00 0.00 N
ATOM 2 CA ALA A 2 2.009 1.420 0.000 1.00 0.00 C
ATOM 3 C ALA A 2 3.530 1.411 0.000 1.00 0.00 C
ATOM 4 O ALA A 2 4.135 0.348 0.000 1.00 0.00 O
ATOM 5 CB ALA A 2 1.499 2.156 1.231 1.00 0.00 C
ATOM 6 N ALA A 3 4.090 2.600 0.000 1.00 0.00 N
ATOM 7 CA ALA A 3 5.540 2.700 0.000 1.00 0.00 C
ATOM 8 C ALA A 3 6.100 4.100 0.000 1.00 0.00 C
ATOM 9 O ALA A 3 5.400 5.100 0.000 1.00 0.00 O
ATOM 10 CB ALA A 3 6.050 1.964 1.231 1.00 0.00 C
END
"""
fixer = PDBFixer(pdbfile=StringIO(pdb_text))
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.0)

assert fixer.topology.getNumAtoms() == 32

# If geometry is bad, ForceField will raise an exception.
ff = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
ff.createSystem(fixer.topology)