Skip to content

Commit b6bb8ae

Browse files
committed
changed on-the-fly auto cut strategy to minimize the number of each cutting label in product fragments, added tools to automatically generate partial reattachment reactions, added reaction reversibility to yml writer
1 parent 6a5ebf4 commit b6bb8ae

File tree

4 files changed

+342
-63
lines changed

4 files changed

+342
-63
lines changed

rmgpy/molecule/fragment.py

Lines changed: 93 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from rmgpy.molecule.molecule import Atom, Bond, Molecule
4040
from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType
4141
from rdkit import Chem
42-
42+
from numpy.random import randint
4343
# this variable is used to name atom IDs so that there are as few conflicts by
4444
# using the entire space of integer objects
4545
ATOM_ID_COUNTER = -(2**15)
@@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non
888888
frag_list.append(res_frag)
889889
return frag_list
890890

891-
def sliceitup_arom(self, molecule, size_threshold=None):
891+
def sliceitup_arom(self, molecule, size_threshold=5):
892892
"""
893893
Several specified aromatic patterns
894894
"""
895-
# set min size for each aliphatic fragment size
896-
if size_threshold:
897-
size_threshold = size_threshold
898-
else:
899-
size_threshold = 5
900895
# if input is smiles string, output smiles
901896
if isinstance(molecule, str):
902897
molecule_smiles = molecule
@@ -950,29 +945,52 @@ def sliceitup_arom(self, molecule, size_threshold=None):
950945
# mol_set contains new set of fragments
951946
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
952947
# check all fragments' size
953-
if all(
954-
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
955-
>= size_threshold
956-
for mol in mol_set
957-
):
958-
# replace * at cutting position with cutting label
959-
for ind, rdmol in enumerate(mol_set):
960-
frag = Chem.MolToSmiles(rdmol)
961-
if len(mol_set) > 2: # means it cut into 3 fragments
962-
if frag.count("*") > 1:
963-
# replace both with R
964-
frag_smi = frag.replace("*", "R")
965-
else:
966-
frag_smi = frag.replace("*", "L")
967-
else: # means it only cut once, generate 2 fragments
968-
if ind == 0:
969-
frag_smi = frag.replace("*", "R")
948+
try:
949+
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
950+
if len(mol_set) == 2:
951+
frag1 = Chem.MolToSmiles(mol_set[0])
952+
frag2 = Chem.MolToSmiles(mol_set[1])
953+
954+
frag1_R = frag1.count("Na")
955+
frag1_L = frag1.count("K")
956+
frag2_R = frag2.count("Na")
957+
frag2_L = frag2.count("K")
958+
959+
if frag1_R > frag2_R and frag1_L <= frag2_L:
960+
frag1_smi = frag1.replace("*", "L")
961+
frag2_smi = frag2.replace("*", "R")
962+
elif frag1_L > frag2_L and frag1_R <= frag2_R:
963+
frag1_smi = frag1.replace("*", "R")
964+
frag2_smi = frag2.replace("*", "L")
965+
elif frag2_L > frag1_L and frag2_R <= frag1_R:
966+
frag1_smi = frag1.replace("*", "L")
967+
frag2_smi = frag2.replace("*", "R")
968+
elif frag2_R > frag1_R and frag2_L <= frag1_L:
969+
frag1_smi = frag1.replace("*", "R")
970+
frag2_smi = frag2.replace("*", "L")
971+
elif randint(0,1)==1:
972+
frag1_smi = frag1.replace("*", "L")
973+
frag2_smi = frag2.replace("*", "R")
970974
else:
971-
frag_smi = frag.replace("*", "L")
972-
frag_list.append(frag_smi)
973-
break
974-
else:
975-
# turn to next matched_atom_map
975+
frag1_smi = frag1.replace("*", "R")
976+
frag2_smi = frag2.replace("*", "L")
977+
978+
frag_list = [frag1_smi, frag2_smi]
979+
980+
elif len(mol_set) > 2: # means it cut into 3 fragments
981+
frag_list = []
982+
for ind, rdmol in enumerate(mol_set):
983+
frag = Chem.MolToSmiles(rdmol)
984+
if frag.count("*") > 1:
985+
frag_smi = frag.replace("*", "R")
986+
else:
987+
frag_smi = frag.replace("*", "L")
988+
frag_list.append(frag_smi)
989+
break
990+
else:
991+
# turn to next matched_atom_map
992+
continue
993+
except:
976994
continue
977995
else:
978996
# no match for this pattern
@@ -1014,15 +1032,10 @@ def sliceitup_arom(self, molecule, size_threshold=None):
10141032
frag_list_new.append(res_frag)
10151033
return frag_list_new
10161034

1017-
def sliceitup_aliph(self, molecule, size_threshold=None):
1035+
def sliceitup_aliph(self, molecule, size_threshold=5):
10181036
"""
10191037
Several specified aliphatic patterns
10201038
"""
1021-
# set min size for each aliphatic fragment size
1022-
if size_threshold:
1023-
size_threshold = size_threshold
1024-
else:
1025-
size_threshold = 5
10261039
# if input is smiles string, output smiles
10271040
if isinstance(molecule, str):
10281041
molecule_smiles = molecule
@@ -1079,29 +1092,52 @@ def sliceitup_aliph(self, molecule, size_threshold=None):
10791092
# mol_set contains new set of fragments
10801093
mol_set = Chem.GetMolFrags(new_mol, asMols=True)
10811094
# check all fragments' size
1082-
if all(
1083-
sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6)
1084-
>= size_threshold
1085-
for mol in mol_set
1086-
):
1087-
# replace * at cutting position with cutting label
1088-
for ind, rdmol in enumerate(mol_set):
1089-
frag = Chem.MolToSmiles(rdmol)
1090-
if len(mol_set) > 2: # means it cut into 3 fragments
1091-
if frag.count("*") > 1:
1092-
# replace both with R
1093-
frag_smi = frag.replace("*", "R")
1095+
try:
1096+
if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set):
1097+
if len(mol_set) == 2:
1098+
frag1 = Chem.MolToSmiles(mol_set[0])
1099+
frag2 = Chem.MolToSmiles(mol_set[1])
1100+
1101+
frag1_R = frag1.count("Na")
1102+
frag1_L = frag1.count("K")
1103+
frag2_R = frag2.count("Na")
1104+
frag2_L = frag2.count("K")
1105+
1106+
if frag1_R > frag2_R and frag1_L <= frag2_L:
1107+
frag1_smi = frag1.replace("*", "L")
1108+
frag2_smi = frag2.replace("*", "R")
1109+
elif frag1_L > frag2_L and frag1_R <= frag2_R:
1110+
frag1_smi = frag1.replace("*", "R")
1111+
frag2_smi = frag2.replace("*", "L")
1112+
elif frag2_L > frag1_L and frag2_R <= frag1_R:
1113+
frag1_smi = frag1.replace("*", "R")
1114+
frag2_smi = frag2.replace("*", "L")
1115+
elif frag2_R > frag1_R and frag2_L <= frag1_L:
1116+
frag1_smi = frag1.replace("*", "R")
1117+
frag2_smi = frag2.replace("*", "L")
1118+
elif randint(0,1)==1:
1119+
frag1_smi = frag1.replace("*", "L")
1120+
frag2_smi = frag2.replace("*", "R")
10941121
else:
1095-
frag_smi = frag.replace("*", "L")
1096-
else: # means it only cut once, generate 2 fragments
1097-
if ind == 0:
1098-
frag_smi = frag.replace("*", "R")
1099-
else:
1100-
frag_smi = frag.replace("*", "L")
1101-
frag_list.append(frag_smi)
1102-
break
1103-
else:
1104-
# turn to next matched_atom_map
1122+
frag1_smi = frag1.replace("*", "R")
1123+
frag2_smi = frag2.replace("*", "L")
1124+
1125+
frag_list = [frag1_smi, frag2_smi]
1126+
1127+
elif len(mol_set) > 2: # means it cut into 3 fragments
1128+
frag_list = []
1129+
for ind, rdmol in enumerate(mol_set):
1130+
frag = Chem.MolToSmiles(rdmol)
1131+
if frag.count("*") > 1:
1132+
frag_smi = frag.replace("*", "R")
1133+
else:
1134+
frag_smi = frag.replace("*", "L")
1135+
frag_list.append(frag_smi)
1136+
break
1137+
else:
1138+
# turn to next matched_atom_map
1139+
continue
1140+
except:
11051141
continue
11061142
else:
11071143
# no match for this pattern

0 commit comments

Comments
 (0)