Skip to content

Commit 9194147

Browse files
MILESTONE: Working version for benzene and porphyrin (with cyclic allenes disabled)
1 parent 0f8883a commit 9194147

File tree

8 files changed

+275
-124
lines changed

8 files changed

+275
-124
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PYTHON_BIN_DIR = /usr/local/python35/bin
22

33
test_tautomers:
4-
-@rm graphs/benzene* graphs/ethanal* graphs/porphyrin*.pdf
4+
-@rm graphs/benzene* graphs/ethanal* graphs/propadiene*.pdf graphs/cyclooctyne*.pdf graphs/methylimidazole*.pdf graphs/porphyrin*.pdf
55
python3 test_tautomers.py
66
.PHONY: test_tautomers.py
77

helpers/graphs.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def lewis_graph(molecule: 'Molecule') -> Graph:
1717

1818
return G
1919

20-
def unique_molecules(molecules: List['Molecule']) -> List['Molecule']:
20+
def unique_molecules(molecules: List['Molecule'], debug: bool = False) -> List['Molecule']:
2121
'''
2222
Return list of one-by-one non-isomorphic graphs (including bond order, chemical elements and number of lone pairs)
2323
'''
@@ -28,8 +28,10 @@ def unique_molecules(molecules: List['Molecule']) -> List['Molecule']:
2828

2929
unique_molecules: List[Tuple['Molecules', Graph]] = []
3030

31+
print_if_debug = lambda *args, **kwargs: print(*args, **Kwargs) if debug else None
32+
3133
for (molecule_1, graph_1) in zip(molecules, lewis_graphs):
32-
print('Assessing {0} (unique_molecules={1})'.format(molecule_1.name, [m.name for (m, _) in unique_molecules]))
34+
print_if_debug('Assessing {0} (unique_molecules={1})'.format(molecule_1.name, [m.name for (m, _) in unique_molecules]))
3335
if all(
3436
not is_isomorphic(
3537
graph_1,
@@ -39,12 +41,12 @@ def unique_molecules(molecules: List['Molecule']) -> List['Molecule']:
3941
)
4042
for (_, graph_2) in unique_molecules
4143
):
42-
print('UNIQUE: {0}'.format(molecule_1.name))
44+
print_if_debug('UNIQUE: {0}'.format(molecule_1.name))
4345
unique_molecules.append(
4446
(molecule_1, graph_1),
4547
)
4648
else:
47-
print('NOT UNIQUE: {0}'.format(molecule_1.name))
49+
print_if_debug('NOT UNIQUE: {0}'.format(molecule_1.name))
4850

4951
return [
5052
molecule

helpers/misc.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
from typing import Optional, TextIO, Sequence, Any
22

3+
from fragment_capping.helpers.types_helpers import Atom
4+
35
def write_to_debug(debug: Optional[TextIO], *objects: Sequence[Any]) -> None:
46
if debug is not None:
57
debug.write(' '.join(map(str, objects)) + '\n')
8+
9+
def atom_short_desc(atom: Atom) -> str:
10+
return '{element}_{index}'.format(element=atom.element, index=atom.index)

helpers/rings.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import List, Set
22
from functools import reduce
33

4-
from fragment_capping.helpers.types_helpers import ATOM_INDEX, Bond
4+
from fragment_capping.helpers.types_helpers import ATOM_INDEX, Bond, Atom
55

66
SMALL_RING = 7
77

@@ -11,6 +11,16 @@ def bonds_for_ring(ring: List[ATOM_INDEX]) -> List[Bond]:
1111
for atom_indices in zip(ring, ring[1:] + (ring[0],))
1212
]
1313

14+
def atoms_in_small_rings_for(molecule: 'Molecule') -> Set[Atom]:
15+
return [
16+
molecule.atoms[atom_index]
17+
for atom_index in reduce(
18+
lambda acc, e: acc | set(e),
19+
[ring for ring in molecule.rings() if 0 < len(ring) < SMALL_RING],
20+
set(),
21+
)
22+
]
23+
1424
def bonds_in_small_rings_for(molecule: 'Molecule') -> Set[Bond]:
1525
return reduce(
1626
lambda acc, e: acc | set(e),

helpers/tautomers.py

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from fragment_capping.helpers.parameters import MAX_ABSOLUTE_CHARGE, MIN_ABSOLUTE_CHARGE, MAX_NONBONDED_ELECTRONS, \
1010
MAX_BOND_ORDER, MIN_BOND_ORDER, VALENCE_ELECTRONS, ELECTRONS_PER_BOND, MUST_BE_INT, Capping_Strategy, NO_CAP, H_CAP, \
1111
H2_CAP, H3_CAP, H4_CAP, ELECTRONEGATIVITIES, new_atom_for_capping_strategy, min_valence_for
12-
from fragment_capping.helpers.misc import write_to_debug
12+
from fragment_capping.helpers.misc import write_to_debug, atom_short_desc
1313
from fragment_capping.helpers.graphs import unique_molecules
14-
from fragment_capping.helpers.rings import bonds_in_small_rings_for, SMALL_RING
14+
from fragment_capping.helpers.rings import bonds_in_small_rings_for, SMALL_RING, atoms_in_small_rings_for
1515

1616
def get_all_tautomers_naive(
1717
molecule: 'Molecule',
@@ -54,6 +54,8 @@ def maximum_number_hydrogens_for(atom: Atom) -> int:
5454
if len(bond & capping_atom_ids) != 0
5555
}
5656

57+
non_capping_bonds = molecule.bonds - capping_bonds
58+
5759
charges = {
5860
atom.index: LpVariable("C_{i}".format(i=atom.index), -MAX_ABSOLUTE_CHARGE, MAX_ABSOLUTE_CHARGE, LpInteger)
5961
for atom in molecule.atoms.values()
@@ -107,7 +109,7 @@ def maximum_number_hydrogens_for(atom: Atom) -> int:
107109
OBJECTIVES.append(MAX(sum(non_bonded_electrons.values())))
108110

109111
for atom in map(lambda atom_index: molecule.atoms[atom_index], charges.keys()):
110-
problem += charges[atom.index] == VALENCE_ELECTRONS[atom.element] - sum([bond_orders[bond] for bond in molecule.bonds if atom.index in bond]) - ELECTRON_MULTIPLIER * non_bonded_electrons[atom.index], '{element}_{index}'.format(element=atom.element, index=atom.index)
112+
problem += charges[atom.index] == VALENCE_ELECTRONS[atom.element] - sum([bond_orders[bond] for bond in molecule.bonds if atom.index in bond]) - ELECTRON_MULTIPLIER * non_bonded_electrons[atom.index], atom_short_desc(atom)
111113

112114
# Deal with absolute values
113115
problem += charges[atom.index] <= absolute_charges[atom.index], 'Absolute charge contraint left {i}'.format(i=atom.index)
@@ -180,7 +182,7 @@ def debug_failed_ILP(n: Optional[int] = None) -> None:
180182

181183
# Solve once to find optimal solution
182184
try:
183-
print('Solving')
185+
write_to_debug(debug, 'Solving')
184186
problem.sequentialSolve(OBJECTIVES)
185187
except Exception as e:
186188
problem.writeLP('debug.lp')
@@ -205,7 +207,7 @@ def debug_failed_ILP(n: Optional[int] = None) -> None:
205207
'Solution {n}'.format(n=n)
206208

207209
s = encode_solution()
208-
print('Excluding solution {0}'.format(s))
210+
write_to_debug(debug, 'Excluding solution {0}'.format(s))
209211
solutions.append(s)
210212

211213
try:
@@ -217,7 +219,7 @@ def debug_failed_ILP(n: Optional[int] = None) -> None:
217219
if problem.status == 1:
218220
pass
219221
else:
220-
print(problem.status, LpStatus[problem.status])
222+
write_to_debug(debug, problem.status, LpStatus[problem.status])
221223
break
222224

223225
all_tautomers.append(new_molecule_for_current_solution(n=n))
@@ -232,9 +234,10 @@ def get_all_tautomers(
232234
net_charge: Optional[int] = None,
233235
enforce_octet_rule: bool = True,
234236
allow_radicals: bool = False,
235-
max_tautomers: Optional[int] = 100,
237+
max_tautomers: Optional[int] = 2000,
236238
disallow_triple_bond_in_small_rings: bool = True,
237-
use_gurobi: bool = False,
239+
disallow_allenes_in_small_rings: bool = True,
240+
use_gurobi: bool = True,
238241
debug: Optional[TextIO] = None,
239242
) -> 'Molecule':
240243
if len([1 for atom in molecule.atoms.values() if atom.element == 'H']) > 0:
@@ -291,7 +294,6 @@ def possible_capping_strategies_for_atom(atom: Atom) -> List[Capping_Strategy]:
291294
possible_capping_strategies = possible_capping_strategies_for_atom(uncapped_atom)
292295
if len(possible_capping_strategies) == 0 or len(possible_capping_strategies) == 1 and possible_capping_strategies[0] == NO_CAP:
293296
pass
294-
print('PASS')
295297
else:
296298
for (i, capping_strategy) in enumerate(sorted(possible_capping_strategies), start=1):
297299
write_to_debug(debug, uncapped_atom, capping_strategy, i)
@@ -311,7 +313,7 @@ def possible_capping_strategies_for_atom(atom: Atom) -> List[Capping_Strategy]:
311313
fragment_H_scores[uncapped_atom.index, i] = len([atom for atom in capping_atoms_for[uncapped_atom.index, i] if atom.element == 'H'])
312314

313315
# Only choose one capping strategy at a time
314-
problem += (lpSum(F_i for ((atom_id, _), F_i) in fragment_switches.items() if atom_id == uncapped_atom.index) == 1, 'Single capping strategy for atom {element}_{index}'.format(element=uncapped_atom.element, index=uncapped_atom.index))
316+
problem += (lpSum(F_i for ((atom_id, _), F_i) in fragment_switches.items() if atom_id == uncapped_atom.index) == 1, 'Single capping strategy for atom {atom_desc}'.format(atom_desc=atom_short_desc(uncapped_atom)))
315317

316318
all_capping_atoms = {atom for atoms in capping_atoms_for.values() for atom in atoms}
317319
all_capping_atom_ids = {atom.index for atom in all_capping_atoms}
@@ -342,6 +344,11 @@ def possible_capping_strategies_for_atom(atom: Atom) -> List[Capping_Strategy]:
342344
)
343345
}
344346

347+
non_capping_bonds = {
348+
bond for bond in molecule.bonds
349+
if len(bond & all_capping_atom_ids) == 0
350+
}
351+
345352
if True:
346353
molecule.write_graph('debug')
347354

@@ -436,6 +443,13 @@ def possible_capping_strategies_for_atom(atom: Atom) -> List[Capping_Strategy]:
436443
'Octet for atom {element}_{index}'.format(element=atom.element, index=atom.index),
437444
)
438445

446+
for atom in atoms_in_small_rings_for(molecule):
447+
if disallow_allenes_in_small_rings:
448+
if atom.element in ['C', 'N']:
449+
adjacent_non_hydrogen_bonds = [bond for bond in non_capping_bonds if atom.index in bond]
450+
if len(adjacent_non_hydrogen_bonds) == 2:
451+
problem += sum(bond_orders[bond] for bond in adjacent_non_hydrogen_bonds) <= 3, 'No allenes for atom {atom_desc} in short ring'.format(atom_desc=atom_short_desc(atom))
452+
439453
def debug_failed_ILP(n: Optional[int] = None) -> None:
440454
debug_filename = 'debug{0}.lp'.format('' if n is None else '_{n}'.format(n=n))
441455
problem.writeLP(debug_filename)
@@ -509,7 +523,9 @@ def new_molecule_for_current_solution(n: Optional[int] = None) -> 'Molecule':
509523
debug_failed_ILP(0)
510524
raise
511525

512-
debug_failed_ILP(0)
526+
if debug is not None:
527+
debug_failed_ILP(0)
528+
513529
all_tautomers = [new_molecule_for_current_solution(n=0)]
514530

515531
# Remove redundant constraints from previous multi-objective optimistion
@@ -526,7 +542,7 @@ def new_molecule_for_current_solution(n: Optional[int] = None) -> 'Molecule':
526542
'Solution {n}'.format(n=n)
527543

528544
s = encode_solution()
529-
print('Excluding solution {0}'.format(s))
545+
write_to_debug(debug, 'Excluding solution {0}'.format(s))
530546
solutions.append(s)
531547

532548
try:
@@ -540,8 +556,9 @@ def new_molecule_for_current_solution(n: Optional[int] = None) -> 'Molecule':
540556
if problem.status == 1:
541557
pass
542558
else:
543-
print(problem.status, LpStatus[problem.status])
544-
debug_failed_ILP(n)
559+
write_to_debug(debug, problem.status, LpStatus[problem.status])
560+
if debug is not None:
561+
debug_failed_ILP(n)
545562
break
546563

547564
all_tautomers.append(new_molecule_for_current_solution(n=n))

pdbs/cyclooctyne.pdb

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
HEADER UNCLASSIFIED 14-Jan-18
2+
TITLE ALL ATOM STRUCTURE FOR MOLECULE UNK
3+
AUTHOR GROMOS AUTOMATIC TOPOLOGY BUILDER REVISION 2018-01-12 13:04:34
4+
AUTHOR 2 http://compbio.biosci.uq.edu.au/atb
5+
HETATM 1 C1 H2PM 0 -0.604 -1.460 -0.047 1.00 0.00 C
6+
HETATM 2 C8 H2PM 0 -1.949 -0.895 -0.174 1.00 0.00 C
7+
HETATM 3 H19 H2PM 0 -2.291 -0.960 -1.216 1.00 0.00 H
8+
HETATM 4 H20 H2PM 0 -2.697 -1.421 0.433 1.00 0.00 H
9+
HETATM 5 C2 H2PM 0 0.604 -1.460 0.048 1.00 0.00 C
10+
HETATM 6 C3 H2PM 0 1.949 -0.896 0.174 1.00 0.00 C
11+
HETATM 7 H9 H2PM 0 2.291 -0.960 1.216 1.00 0.00 H
12+
HETATM 8 H10 H2PM 0 2.697 -1.421 -0.433 1.00 0.00 H
13+
HETATM 9 C4 H2PM 0 1.857 0.594 -0.263 1.00 0.00 C
14+
HETATM 10 H11 H2PM 0 2.811 1.075 -0.007 1.00 0.00 H
15+
HETATM 11 H12 H2PM 0 1.775 0.631 -1.357 1.00 0.00 H
16+
HETATM 12 C5 H2PM 0 0.695 1.412 0.360 1.00 0.00 C
17+
HETATM 13 H13 H2PM 0 0.565 1.120 1.412 1.00 0.00 H
18+
HETATM 14 H14 H2PM 0 1.037 2.455 0.387 1.00 0.00 H
19+
HETATM 15 C6 H2PM 0 -0.695 1.412 -0.360 1.00 0.00 C
20+
HETATM 16 H15 H2PM 0 -1.036 2.455 -0.387 1.00 0.00 H
21+
HETATM 17 H16 H2PM 0 -0.565 1.120 -1.412 1.00 0.00 H
22+
HETATM 18 C7 H2PM 0 -1.857 0.594 0.263 1.00 0.00 C
23+
HETATM 19 H17 H2PM 0 -1.775 0.631 1.357 1.00 0.00 H
24+
HETATM 20 H18 H2PM 0 -2.811 1.075 0.007 1.00 0.00 H
25+
CONECT 1 2 5
26+
CONECT 2 1 3 4 18
27+
CONECT 3 2
28+
CONECT 4 2
29+
CONECT 5 1 6
30+
CONECT 6 5 7 8 9
31+
CONECT 7 6
32+
CONECT 8 6
33+
CONECT 9 6 10 11 12
34+
CONECT 10 9
35+
CONECT 11 9
36+
CONECT 12 9 13 14 15
37+
CONECT 13 12
38+
CONECT 14 12
39+
CONECT 15 12 16 17 18
40+
CONECT 16 15
41+
CONECT 17 15
42+
CONECT 18 2 15 19 20
43+
CONECT 19 18
44+
CONECT 20 18
45+
END

pdbs/propadiene.pdb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
HEADER UNCLASSIFIED 17-Jan-18
2+
TITLE ALL ATOM STRUCTURE FOR MOLECULE UNK
3+
AUTHOR GROMOS AUTOMATIC TOPOLOGY BUILDER REVISION 2018-01-12 13:04:34
4+
AUTHOR 2 http://compbio.biosci.uq.edu.au/atb
5+
HETATM 1 H5 74U8 0 -1.876 -0.680 -0.632 1.00 0.00 H
6+
HETATM 2 C2 74U8 0 -1.308 -0.000 0.001 1.00 0.00 C
7+
HETATM 3 H4 74U8 0 -1.880 0.678 0.632 1.00 0.00 H
8+
HETATM 4 C1 74U8 0 0.000 0.003 0.000 1.00 0.00 C
9+
HETATM 5 C3 74U8 0 1.308 -0.000 -0.001 1.00 0.00 C
10+
HETATM 6 H6 74U8 0 1.880 0.631 -0.681 1.00 0.00 H
11+
HETATM 7 H7 74U8 0 1.876 -0.632 0.679 1.00 0.00 H
12+
CONECT 1 2
13+
CONECT 2 1 3 4
14+
CONECT 3 2
15+
CONECT 4 2 5
16+
CONECT 5 4 6 7
17+
CONECT 6 5
18+
CONECT 7 5
19+
END

0 commit comments

Comments
 (0)