Skip to content

Commit a4487ad

Browse files
authored
Merge pull request #1176 from haddocking/check_combination_chains
refactored check for chain combinations
2 parents 0a4f4a8 + 3a68823 commit a4487ad

File tree

4 files changed

+50
-14
lines changed

4 files changed

+50
-14
lines changed

src/haddock/libs/libcns.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from haddock.libs.libfunc import false, true
1414
from haddock.libs.libmath import RandomNumberGenerator
1515
from haddock.libs.libontology import PDBFile
16+
from haddock.libs.libpdb import check_combination_chains
1617
from haddock.libs.libutil import transform_to_list
1718

1819

@@ -335,25 +336,14 @@ def prepare_cns_input(
335336
# prepare chain/seg IDs
336337
segid_str = ""
337338
if native_segid:
338-
chainid_list: list[str] = []
339339
if isinstance(input_element, (list, tuple)):
340-
for pdb in input_element:
341-
342-
segids, chains = libpdb.identify_chainseg(pdb.rel_path, sort=False)
343-
344-
chainsegs = sorted(list(set(segids) | set(chains)))
345-
# check if any of chainsegs is already in chainid_list
346-
if not identifier.endswith("scoring"):
347-
if any(chainseg in chainid_list for chainseg in chainsegs):
348-
raise ValueError(
349-
f"Chain/seg IDs are not unique for pdbs {input_element}."
350-
)
351-
chainid_list.extend(chainsegs)
340+
chainid_list = check_combination_chains(input_element)
352341

353342
for i, _chainseg in enumerate(chainid_list, start=1):
354343
segid_str += write_eval_line(f"prot_segid_{i}", _chainseg)
355344

356345
else:
346+
chainid_list: list[str] = []
357347
segids, chains = libpdb.identify_chainseg(
358348
input_element.rel_path, sort=False
359349
)

src/haddock/libs/libpdb.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
Optional,
1818
Union,
1919
)
20-
from haddock.libs.libio import working_directory
20+
from haddock.libs.libio import working_directory, PDBFile
2121
from haddock.libs.libutil import get_result_or_same_in_list, sort_numbered_paths
2222

2323

@@ -296,3 +296,18 @@ def read_RECORD_section(
296296

297297
read_chainids = partial(read_RECORD_section, section_slice=slc_chainid, func=list) # noqa: E501
298298
read_segids = partial(read_RECORD_section, section_slice=slc_segid, func=list)
299+
300+
301+
def check_combination_chains(combination: list[PDBFile]) -> list[str]:
302+
"""Check if chain IDs are unique for each pdb in combination."""
303+
chainid_list: list[str] = []
304+
for pdb in combination:
305+
segids, chains = identify_chainseg(pdb.rel_path, sort=False)
306+
chainsegs = sorted(list(set(segids) | set(chains)))
307+
# check if any of chainsegs is already in chainid_list
308+
if any(chainseg in chainid_list for chainseg in chainsegs):
309+
raise ValueError(
310+
f"Chain/seg IDs are not unique for pdbs {combination}."
311+
)
312+
chainid_list.extend(chainsegs)
313+
return chainid_list

src/haddock/modules/sampling/rigidbody/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from haddock.libs.libcns import prepare_cns_input
3939
from haddock.libs.libontology import PDBFile
4040
from haddock.libs.libparallel import GenericTask, Scheduler
41+
from haddock.libs.libpdb import check_combination_chains
4142
from haddock.libs.libsubprocess import CNSJob
4243
from haddock.modules import get_engine
4344
from haddock.modules.base_cns_module import BaseCNSModule
@@ -132,6 +133,7 @@ def prepare_cns_input_parallel(
132133
_l = []
133134
idx = 1
134135
for combination in models_to_dock:
136+
check_combination_chains(combination)
135137
for _ in range(sampling_factor):
136138
ambig_fname = (
137139
ambig_fnames[idx - 1]

tests/test_libpdb.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
"""Test lib PDB."""
2+
from pathlib import Path
23
import pytest
34

5+
46
from haddock.libs import libpdb
7+
from haddock.libs.libio import PDBFile
58

9+
from . import golden_data
610

711
chainC = [
812
'ATOM 3 CA ARG C 4 37.080 43.455 -3.421 1.00 0.00 C C ', # noqa: E501
@@ -31,3 +35,28 @@ def test_read_chain_ids(lines, expected):
3135
def test_read_seg_ids(lines, expected):
3236
result = libpdb.read_segids(lines)
3337
assert result == expected
38+
39+
40+
@pytest.fixture(name="wrong_rigid_molecules")
41+
def fixture_wrong_rigidbody_molecules():
42+
"""fixture for wrong rigidbody input molecules."""
43+
receptor = PDBFile(Path(golden_data, "protprot_complex_1.pdb"))
44+
ligand = PDBFile(Path(golden_data, "protprot_complex_2.pdb"))
45+
return [receptor, ligand]
46+
47+
@pytest.fixture(name="good_rigid_molecules")
48+
def fixture_good_rigidbody_molecules():
49+
"""fixture for good rigidbody input molecules."""
50+
receptor = PDBFile(Path(golden_data, "e2aP_1F3G_haddock.pdb"))
51+
ligand = PDBFile(Path(golden_data, "hpr_ensemble_1_haddock.pdb"))
52+
return [receptor, ligand]
53+
54+
def test_check_combination_chains(good_rigid_molecules, wrong_rigid_molecules):
55+
"""Test check_combination_chains."""
56+
exp_chains = ["A", "B"]
57+
obs_chains = libpdb.check_combination_chains(good_rigid_molecules)
58+
assert obs_chains == exp_chains
59+
# when input molecules share chains there should be a ValueError
60+
with pytest.raises(ValueError):
61+
libpdb.check_combination_chains(wrong_rigid_molecules)
62+

0 commit comments

Comments
 (0)