|
39 | 39 | from rmgpy.molecule.molecule import Atom, Bond, Molecule |
40 | 40 | from rmgpy.molecule.atomtype import get_atomtype, AtomTypeError, ATOMTYPES, AtomType |
41 | 41 | from rdkit import Chem |
42 | | - |
| 42 | +from numpy.random import randint |
43 | 43 | # this variable is used to name atom IDs so that there are as few conflicts by |
44 | 44 | # using the entire space of integer objects |
45 | 45 | ATOM_ID_COUNTER = -(2**15) |
@@ -888,15 +888,10 @@ def cut_molecule(self, output_smiles=False, cut_through=True, size_threshold=Non |
888 | 888 | frag_list.append(res_frag) |
889 | 889 | return frag_list |
890 | 890 |
|
891 | | - def sliceitup_arom(self, molecule, size_threshold=None): |
| 891 | + def sliceitup_arom(self, molecule, size_threshold=5): |
892 | 892 | """ |
893 | 893 | Several specified aromatic patterns |
894 | 894 | """ |
895 | | - # set min size for each aliphatic fragment size |
896 | | - if size_threshold: |
897 | | - size_threshold = size_threshold |
898 | | - else: |
899 | | - size_threshold = 5 |
900 | 895 | # if input is smiles string, output smiles |
901 | 896 | if isinstance(molecule, str): |
902 | 897 | molecule_smiles = molecule |
@@ -950,29 +945,52 @@ def sliceitup_arom(self, molecule, size_threshold=None): |
950 | 945 | # mol_set contains new set of fragments |
951 | 946 | mol_set = Chem.GetMolFrags(new_mol, asMols=True) |
952 | 947 | # check all fragments' size |
953 | | - if all( |
954 | | - sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) |
955 | | - >= size_threshold |
956 | | - for mol in mol_set |
957 | | - ): |
958 | | - # replace * at cutting position with cutting label |
959 | | - for ind, rdmol in enumerate(mol_set): |
960 | | - frag = Chem.MolToSmiles(rdmol) |
961 | | - if len(mol_set) > 2: # means it cut into 3 fragments |
962 | | - if frag.count("*") > 1: |
963 | | - # replace both with R |
964 | | - frag_smi = frag.replace("*", "R") |
965 | | - else: |
966 | | - frag_smi = frag.replace("*", "L") |
967 | | - else: # means it only cut once, generate 2 fragments |
968 | | - if ind == 0: |
969 | | - frag_smi = frag.replace("*", "R") |
| 948 | + try: |
| 949 | + if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set): |
| 950 | + if len(mol_set) == 2: |
| 951 | + frag1 = Chem.MolToSmiles(mol_set[0]) |
| 952 | + frag2 = Chem.MolToSmiles(mol_set[1]) |
| 953 | + |
| 954 | + frag1_R = frag1.count("Na") |
| 955 | + frag1_L = frag1.count("K") |
| 956 | + frag2_R = frag2.count("Na") |
| 957 | + frag2_L = frag2.count("K") |
| 958 | + |
| 959 | + if frag1_R > frag2_R and frag1_L <= frag2_L: |
| 960 | + frag1_smi = frag1.replace("*", "L") |
| 961 | + frag2_smi = frag2.replace("*", "R") |
| 962 | + elif frag1_L > frag2_L and frag1_R <= frag2_R: |
| 963 | + frag1_smi = frag1.replace("*", "R") |
| 964 | + frag2_smi = frag2.replace("*", "L") |
| 965 | + elif frag2_L > frag1_L and frag2_R <= frag1_R: |
| 966 | + frag1_smi = frag1.replace("*", "L") |
| 967 | + frag2_smi = frag2.replace("*", "R") |
| 968 | + elif frag2_R > frag1_R and frag2_L <= frag1_L: |
| 969 | + frag1_smi = frag1.replace("*", "R") |
| 970 | + frag2_smi = frag2.replace("*", "L") |
| 971 | + elif randint(0,1)==1: |
| 972 | + frag1_smi = frag1.replace("*", "L") |
| 973 | + frag2_smi = frag2.replace("*", "R") |
970 | 974 | else: |
971 | | - frag_smi = frag.replace("*", "L") |
972 | | - frag_list.append(frag_smi) |
973 | | - break |
974 | | - else: |
975 | | - # turn to next matched_atom_map |
| 975 | + frag1_smi = frag1.replace("*", "R") |
| 976 | + frag2_smi = frag2.replace("*", "L") |
| 977 | + |
| 978 | + frag_list = [frag1_smi, frag2_smi] |
| 979 | + |
| 980 | + elif len(mol_set) > 2: # means it cut into 3 fragments |
| 981 | + frag_list = [] |
| 982 | + for ind, rdmol in enumerate(mol_set): |
| 983 | + frag = Chem.MolToSmiles(rdmol) |
| 984 | + if frag.count("*") > 1: |
| 985 | + frag_smi = frag.replace("*", "R") |
| 986 | + else: |
| 987 | + frag_smi = frag.replace("*", "L") |
| 988 | + frag_list.append(frag_smi) |
| 989 | + break |
| 990 | + else: |
| 991 | + # turn to next matched_atom_map |
| 992 | + continue |
| 993 | + except: |
976 | 994 | continue |
977 | 995 | else: |
978 | 996 | # no match for this pattern |
@@ -1014,15 +1032,10 @@ def sliceitup_arom(self, molecule, size_threshold=None): |
1014 | 1032 | frag_list_new.append(res_frag) |
1015 | 1033 | return frag_list_new |
1016 | 1034 |
|
1017 | | - def sliceitup_aliph(self, molecule, size_threshold=None): |
| 1035 | + def sliceitup_aliph(self, molecule, size_threshold=5): |
1018 | 1036 | """ |
1019 | 1037 | Several specified aliphatic patterns |
1020 | 1038 | """ |
1021 | | - # set min size for each aliphatic fragment size |
1022 | | - if size_threshold: |
1023 | | - size_threshold = size_threshold |
1024 | | - else: |
1025 | | - size_threshold = 5 |
1026 | 1039 | # if input is smiles string, output smiles |
1027 | 1040 | if isinstance(molecule, str): |
1028 | 1041 | molecule_smiles = molecule |
@@ -1079,29 +1092,52 @@ def sliceitup_aliph(self, molecule, size_threshold=None): |
1079 | 1092 | # mol_set contains new set of fragments |
1080 | 1093 | mol_set = Chem.GetMolFrags(new_mol, asMols=True) |
1081 | 1094 | # check all fragments' size |
1082 | | - if all( |
1083 | | - sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) |
1084 | | - >= size_threshold |
1085 | | - for mol in mol_set |
1086 | | - ): |
1087 | | - # replace * at cutting position with cutting label |
1088 | | - for ind, rdmol in enumerate(mol_set): |
1089 | | - frag = Chem.MolToSmiles(rdmol) |
1090 | | - if len(mol_set) > 2: # means it cut into 3 fragments |
1091 | | - if frag.count("*") > 1: |
1092 | | - # replace both with R |
1093 | | - frag_smi = frag.replace("*", "R") |
| 1095 | + try: |
| 1096 | + if all(sum(1 for atom in mol.GetAtoms() if atom.GetAtomicNum() == 6) >= size_threshold for mol in mol_set): |
| 1097 | + if len(mol_set) == 2: |
| 1098 | + frag1 = Chem.MolToSmiles(mol_set[0]) |
| 1099 | + frag2 = Chem.MolToSmiles(mol_set[1]) |
| 1100 | + |
| 1101 | + frag1_R = frag1.count("Na") |
| 1102 | + frag1_L = frag1.count("K") |
| 1103 | + frag2_R = frag2.count("Na") |
| 1104 | + frag2_L = frag2.count("K") |
| 1105 | + |
| 1106 | + if frag1_R > frag2_R and frag1_L <= frag2_L: |
| 1107 | + frag1_smi = frag1.replace("*", "L") |
| 1108 | + frag2_smi = frag2.replace("*", "R") |
| 1109 | + elif frag1_L > frag2_L and frag1_R <= frag2_R: |
| 1110 | + frag1_smi = frag1.replace("*", "R") |
| 1111 | + frag2_smi = frag2.replace("*", "L") |
| 1112 | + elif frag2_L > frag1_L and frag2_R <= frag1_R: |
| 1113 | + frag1_smi = frag1.replace("*", "R") |
| 1114 | + frag2_smi = frag2.replace("*", "L") |
| 1115 | + elif frag2_R > frag1_R and frag2_L <= frag1_L: |
| 1116 | + frag1_smi = frag1.replace("*", "R") |
| 1117 | + frag2_smi = frag2.replace("*", "L") |
| 1118 | + elif randint(0,1)==1: |
| 1119 | + frag1_smi = frag1.replace("*", "L") |
| 1120 | + frag2_smi = frag2.replace("*", "R") |
1094 | 1121 | else: |
1095 | | - frag_smi = frag.replace("*", "L") |
1096 | | - else: # means it only cut once, generate 2 fragments |
1097 | | - if ind == 0: |
1098 | | - frag_smi = frag.replace("*", "R") |
1099 | | - else: |
1100 | | - frag_smi = frag.replace("*", "L") |
1101 | | - frag_list.append(frag_smi) |
1102 | | - break |
1103 | | - else: |
1104 | | - # turn to next matched_atom_map |
| 1122 | + frag1_smi = frag1.replace("*", "R") |
| 1123 | + frag2_smi = frag2.replace("*", "L") |
| 1124 | + |
| 1125 | + frag_list = [frag1_smi, frag2_smi] |
| 1126 | + |
| 1127 | + elif len(mol_set) > 2: # means it cut into 3 fragments |
| 1128 | + frag_list = [] |
| 1129 | + for ind, rdmol in enumerate(mol_set): |
| 1130 | + frag = Chem.MolToSmiles(rdmol) |
| 1131 | + if frag.count("*") > 1: |
| 1132 | + frag_smi = frag.replace("*", "R") |
| 1133 | + else: |
| 1134 | + frag_smi = frag.replace("*", "L") |
| 1135 | + frag_list.append(frag_smi) |
| 1136 | + break |
| 1137 | + else: |
| 1138 | + # turn to next matched_atom_map |
| 1139 | + continue |
| 1140 | + except: |
1105 | 1141 | continue |
1106 | 1142 | else: |
1107 | 1143 | # no match for this pattern |
|
0 commit comments