diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index 35c36b7..9067de0 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -275,6 +275,61 @@ def _dihedralRotation(points, angle): [axis[2]*axis[0]*(1-ct) - axis[1]*st, axis[2]*axis[1]*(1-ct) + axis[0]*st, axis[2]*axis[2]*(1-ct) + ct ] ]) +def _placeAtomNeRF(a1, a2, a3, bond_length, bond_angle, dihedral): + """Place atom X bonded to a3 using the NeRF algorithm (Parsons et al. 2005). + + Given three reference points a1, a2, a3, compute the position of X such that + dist(a3, X) = bond_length, angle(a2, a3, X) = bond_angle, and + dihedral(a1, a2, a3, X) = dihedral. + + All lengths in nanometers, angles in radians. + """ + bc = a3 - a2 + bc_hat = bc / np.linalg.norm(bc) + ab = a2 - a1 + n = np.cross(ab, bc) + norm_n = np.linalg.norm(n) + if norm_n < 1e-10: + raise ValueError( + "Reference points a1, a2, a3 are collinear; cannot define a NeRF frame.") + n_hat = n / norm_n + m = np.cross(n_hat, bc_hat) + d = np.array([ + -bond_length * np.cos(bond_angle), + bond_length * np.sin(bond_angle) * np.cos(dihedral), + bond_length * np.sin(bond_angle) * np.sin(dihedral), + ]) + return a3 + d[0] * bc_hat + d[1] * m + d[2] * n_hat + +def _buildAceCap(n, ca, c): + """Build ACE cap positions from the adjacent residue's N, CA, C (in nm). + + Returns dict mapping atom name to numpy position array. + Atom names match ACE.pdb template: C, O, CH3. + + Bond lengths and angles from Engh & Huber (1991), Acta Cryst. A47, 392-400; + tabulated at https://www.ebi.ac.uk/thornton-srv/software/PROCHECK/manual/manappa.html + CH3-C uses the CA-C value (both sp3 C bonded to carbonyl C). + """ + ace_c = _placeAtomNeRF(c, ca, n, 0.1329, 2.124, np.pi) # C-N 1.329 A, C-N-CA 121.7 deg + ch3 = _placeAtomNeRF(ca, n, ace_c, 0.1525, 2.028, np.pi) # ~CA-C 1.525 A, CA-C-N 116.2 deg + o = _placeAtomNeRF(ca, n, ace_c, 0.1231, 2.147, 0.0) # C=O 1.231 A, O-C-N 123.0 deg + return {'C': ace_c, 'O': o, 'CH3': ch3} + +def _buildNmeCap(n, ca, c): + """Build NME cap positions from the adjacent residue's N, CA, C (in nm). + + Returns dict mapping atom name to numpy position array. + Atom names match NME.pdb template: N, C. + + Bond lengths and angles from Engh & Huber (1991), Acta Cryst. A47, 392-400; + tabulated at https://www.ebi.ac.uk/thornton-srv/software/PROCHECK/manual/manappa.html + N-CH3 uses the N-CA value (both sp3 C bonded to amide N). + """ + nme_n = _placeAtomNeRF(n, ca, c, 0.1329, 2.028, np.pi) # C-N 1.329 A, CA-C-N 116.2 deg + ch3 = _placeAtomNeRF(ca, c, nme_n, 0.1458, 2.124, np.pi) # ~N-CA 1.458 A, C-N-CA 121.7 deg + return {'N': nme_n, 'C': ch3} + def _findUnoccupiedDirection(point, positions): """Given a point in space and a list of atom positions, find the direction in which the local density of atoms is lowest.""" @@ -765,43 +820,77 @@ def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosi for i, residueName in enumerate(residueNames): template = self._getTemplate(residueName) - # Find a translation that best matches the adjacent residue. - - points1 = [] - points2 = [] - for atom in template.topology.atoms(): - if atom.name in orientToPositions: - points1.append(orientToPositions[atom.name].value_in_unit(unit.nanometer)) - points2.append(template.positions[atom.index].value_in_unit(unit.nanometer)) - (translate2, rotate, translate1) = _overlayPoints(points1, points2) - - # Create the new residue. - - newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000)) - fraction = (i+1.0)/(numResidues+1.0) - translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection - templateAtoms = list(template.topology.atoms()) - if newResidue == next(chain.residues()): - templateAtoms = [atom for atom in templateAtoms if atom.name not in ('P', 'OP1', 'OP2')] - for atom in templateAtoms: - newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue) - newAtoms.append(newAtom) - templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) - newPositions.append(mm.Vec3(*np.dot(rotate, templatePosition))*unit.nanometer+translate) - if prevResidue is not None: - atoms1 = {atom.name: atom for atom in prevResidue.atoms()} - atoms2 = {atom.name: atom for atom in newResidue.atoms()} - if 'CA' in atoms1 and 'C' in atoms1 and 'N' in atoms2 and 'CA' in atoms2: - - # We're adding a peptide bond between this residue and the previous one. Rotate it to try to - # put the peptide bond into the trans conformation. - - atoms = (atoms1['CA'], atoms1['C'], atoms2['N'], atoms2['CA']) - points = [newPositions[a.index] for a in atoms] - rotation = _dihedralRotation(points, np.pi) - for atom in newResidue.atoms(): - d = (newPositions[atom.index]-points[2]).value_in_unit(unit.nanometer) - newPositions[atom.index] = mm.Vec3(*np.dot(rotation, d))*unit.nanometer + points[2] + if residueName == 'ACE' and all(k in orientToPositions for k in ('N', 'CA', 'C')): + # Use NeRF placement for ACE cap. Requires N, CA, C on the + # adjacent residue; falls back to Kabsch overlay if any are missing. + refN = np.array(orientToPositions['N'].value_in_unit(unit.nanometer)) + refCA = np.array(orientToPositions['CA'].value_in_unit(unit.nanometer)) + refC = np.array(orientToPositions['C'].value_in_unit(unit.nanometer)) + capPositions = _buildAceCap(refN, refCA, refC) + newResidue = chain.topology.addResidue(residueName, chain, + "%d" % ((firstIndex+i)%10000)) + for atom in template.topology.atoms(): + newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue) + newAtoms.append(newAtom) + newPositions.append(mm.Vec3(*capPositions[atom.name])*unit.nanometer) + + elif residueName == 'NME' and prevResidue is not None and \ + all(k in {a.name for a in prevResidue.atoms()} for k in ('N', 'CA', 'C')): + # Use NeRF placement for NME cap. Requires N, CA, C on the + # previous residue; falls back to Kabsch overlay if any are missing. + prevAtoms = {a.name: a for a in prevResidue.atoms()} + refN = np.array(newPositions[prevAtoms['N'].index].value_in_unit(unit.nanometer)) + refCA = np.array(newPositions[prevAtoms['CA'].index].value_in_unit(unit.nanometer)) + refC = np.array(newPositions[prevAtoms['C'].index].value_in_unit(unit.nanometer)) + capPositions = _buildNmeCap(refN, refCA, refC) + newResidue = chain.topology.addResidue(residueName, chain, + "%d" % ((firstIndex+i)%10000)) + for atom in template.topology.atoms(): + newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue) + newAtoms.append(newAtom) + newPositions.append(mm.Vec3(*capPositions[atom.name])*unit.nanometer) + + else: + # Standard residue (or cap missing backbone reference atoms): + # use Kabsch overlay + arc placement. + + # Find a translation that best matches the adjacent residue. + + points1 = [] + points2 = [] + for atom in template.topology.atoms(): + if atom.name in orientToPositions: + points1.append(orientToPositions[atom.name].value_in_unit(unit.nanometer)) + points2.append(template.positions[atom.index].value_in_unit(unit.nanometer)) + (translate2, rotate, translate1) = _overlayPoints(points1, points2) + + # Create the new residue. + + newResidue = chain.topology.addResidue(residueName, chain, "%d" % ((firstIndex+i)%10000)) + fraction = (i+1.0)/(numResidues+1.0) + translate = startPosition + (endPosition-startPosition)*fraction + loopHeight*math.sin(fraction*math.pi)*loopDirection + templateAtoms = list(template.topology.atoms()) + if newResidue == next(chain.residues()): + templateAtoms = [atom for atom in templateAtoms if atom.name not in ('P', 'OP1', 'OP2')] + for atom in templateAtoms: + newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue) + newAtoms.append(newAtom) + templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) + newPositions.append(mm.Vec3(*np.dot(rotate, templatePosition))*unit.nanometer+translate) + if prevResidue is not None: + atoms1 = {atom.name: atom for atom in prevResidue.atoms()} + atoms2 = {atom.name: atom for atom in newResidue.atoms()} + if 'CA' in atoms1 and 'C' in atoms1 and 'N' in atoms2 and 'CA' in atoms2: + + # We're adding a peptide bond between this residue and the previous one. Rotate it to try to + # put the peptide bond into the trans conformation. + + atoms = (atoms1['CA'], atoms1['C'], atoms2['N'], atoms2['CA']) + points = [newPositions[a.index] for a in atoms] + rotation = _dihedralRotation(points, np.pi) + for atom in newResidue.atoms(): + d = (newPositions[atom.index]-points[2]).value_in_unit(unit.nanometer) + newPositions[atom.index] = mm.Vec3(*np.dot(rotation, d))*unit.nanometer + points[2] prevResidue = newResidue diff --git a/pdbfixer/tests/test_nerf_caps.py b/pdbfixer/tests/test_nerf_caps.py new file mode 100644 index 0000000..795d6df --- /dev/null +++ b/pdbfixer/tests/test_nerf_caps.py @@ -0,0 +1,143 @@ +"""Tests for NeRF-based ACE/NME cap placement.""" + +from io import StringIO + +import numpy as np +import pytest +from openmm.app import ForceField +from pdbfixer import PDBFixer +from pdbfixer.pdbfixer import _placeAtomNeRF, _buildAceCap, _buildNmeCap + + +def _dist(a, b): + return np.linalg.norm(a - b) + + +def _angle(a, b, c): + ba = a - b + bc = c - b + cos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc)) + return np.arccos(np.clip(cos_angle, -1, 1)) + + +def _dihedral(a, b, c, d): + b1 = b - a + b2 = c - b + b3 = d - c + n1 = np.cross(b1, b2) + n2 = np.cross(b2, b3) + m = np.cross(n1, b2 / np.linalg.norm(b2)) + return np.arctan2(np.dot(m, n2), np.dot(n1, n2)) + + +# Reference backbone atoms for a typical residue (in nm). +REF_N = np.array([0.0, 0.0, 0.0]) +REF_CA = np.array([0.1458, 0.0, 0.0]) +REF_C = np.array([0.2058, 0.1230, 0.0]) + + +def _rotation_matrix(axis, angle): + """Rodrigues' rotation formula.""" + axis = axis / np.linalg.norm(axis) + K = np.array([ + [0, -axis[2], axis[1]], + [axis[2], 0, -axis[0]], + [-axis[1], axis[0], 0], + ]) + return np.identity(3) + np.sin(angle) * K + (1 - np.cos(angle)) * K @ K + + +@pytest.fixture(params=[ + (np.identity(3), np.zeros(3)), + (_rotation_matrix(np.array([1.0, 2.0, 3.0]), 1.23), np.array([0.5, -0.3, 0.7])), +], ids=["z=0 plane", "general 3D"]) +def backbone(request): + """Yield (N, CA, C) reference backbone, optionally rotated+translated.""" + rot, shift = request.param + return rot @ REF_N + shift, rot @ REF_CA + shift, rot @ REF_C + shift + + +class TestPlaceAtomNeRF: + # Use non-collinear reference points (collinear points make cross product zero). + A1 = np.array([0.0, 0.1, 0.0]) + A2 = np.array([0.0, 0.0, 0.0]) + A3 = np.array([0.15, 0.0, 0.0]) + + def test_bond_length_and_angle(self): + bond_length = 0.1335 + bond_angle = 2.035 + x = _placeAtomNeRF(self.A1, self.A2, self.A3, bond_length, bond_angle, np.pi / 3) + assert abs(_dist(self.A3, x) - bond_length) < 1e-6 + assert abs(_angle(self.A2, self.A3, x) - bond_angle) < 1e-6 + + def test_collinear_raises(self): + a1 = np.array([0.0, 0.0, 0.0]) + a2 = np.array([0.15, 0.0, 0.0]) + a3 = np.array([0.30, 0.0, 0.0]) + with pytest.raises(ValueError, match="collinear"): + _placeAtomNeRF(a1, a2, a3, 0.15, 2.0, 1.0) + + def test_dihedral_pi(self): + """Test with pi dihedral, which is sign-invariant and matches cap usage.""" + x = _placeAtomNeRF(self.A1, self.A2, self.A3, 0.15, 2.0, np.pi) + measured = _dihedral(self.A1, self.A2, self.A3, x) + assert abs(abs(measured) - np.pi) < 1e-5 + + +class TestBuildAceCap: + def test_ace_bond_lengths(self, backbone): + n, ca, c = backbone + caps = _buildAceCap(n, ca, c) + assert abs(_dist(caps['C'], n) - 0.1329) < 1e-4 + assert abs(_dist(caps['O'], caps['C']) - 0.1231) < 1e-4 + assert abs(_dist(caps['CH3'], caps['C']) - 0.1525) < 1e-4 + + def test_ace_omega_trans(self, backbone): + n, ca, c = backbone + caps = _buildAceCap(n, ca, c) + omega = _dihedral(caps['CH3'], caps['C'], n, ca) + assert abs(abs(omega) - np.pi) < 0.05 + + +class TestBuildNmeCap: + def test_nme_bond_lengths(self, backbone): + n, ca, c = backbone + caps = _buildNmeCap(n, ca, c) + assert abs(_dist(caps['N'], c) - 0.1329) < 1e-4 + assert abs(_dist(caps['C'], caps['N']) - 0.1458) < 1e-4 + + def test_nme_omega_trans(self, backbone): + n, ca, c = backbone + caps = _buildNmeCap(n, ca, c) + omega = _dihedral(ca, c, caps['N'], caps['C']) + assert abs(abs(omega) - np.pi) < 0.05 + + +class TestCapIntegration: + def test_ace_nme_forcefield_compatible(self): + """Full PDBFixer pipeline: add caps and verify ForceField accepts them.""" + pdb_text = """\ +SEQRES 1 A 4 ACE ALA ALA NME +ATOM 1 N ALA A 2 1.458 0.000 0.000 1.00 0.00 N +ATOM 2 CA ALA A 2 2.009 1.420 0.000 1.00 0.00 C +ATOM 3 C ALA A 2 3.530 1.411 0.000 1.00 0.00 C +ATOM 4 O ALA A 2 4.135 0.348 0.000 1.00 0.00 O +ATOM 5 CB ALA A 2 1.499 2.156 1.231 1.00 0.00 C +ATOM 6 N ALA A 3 4.090 2.600 0.000 1.00 0.00 N +ATOM 7 CA ALA A 3 5.540 2.700 0.000 1.00 0.00 C +ATOM 8 C ALA A 3 6.100 4.100 0.000 1.00 0.00 C +ATOM 9 O ALA A 3 5.400 5.100 0.000 1.00 0.00 O +ATOM 10 CB ALA A 3 6.050 1.964 1.231 1.00 0.00 C +END +""" + fixer = PDBFixer(pdbfile=StringIO(pdb_text)) + fixer.findMissingResidues() + fixer.findMissingAtoms() + fixer.addMissingAtoms() + fixer.addMissingHydrogens(7.0) + + assert fixer.topology.getNumAtoms() == 32 + + # If geometry is bad, ForceField will raise an exception. + ff = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') + ff.createSystem(fixer.topology)