@@ -22,160 +22,140 @@ def is_unique(arr):
2222
2323HUMAN_AMINO_ACIDS = dict (
2424 A = dict (
25- smile = 'CC(C(=O)O)N' ,
26- # template_smile = 'NC(C=O)C',
25+ smile = 'NC(C=O)C' ,
2726 first_atom_idx = 5 ,
2827 last_atom_idx = 2 ,
2928 distogram_atom_idx = 1 ,
3029 token_center_atom_idx = 1 ,
3130 ),
3231 R = dict (
33- smile = 'C(CC(C(=O)O)N)CN=C(N)N' ,
34- # template_smile = 'NC(C=O)CCCNC(N)=N',
32+ smile = 'NC(C=O)CCCNC(N)=N' ,
3533 first_atom_idx = 6 ,
3634 last_atom_idx = 3 ,
3735 distogram_atom_idx = 2 ,
3836 token_center_atom_idx = 2 ,
3937 ),
4038 N = dict (
41- smile = 'C(C(C(=O)O)N)C(=O)N' ,
42- # template_smile = 'NC(C=O)CC(=O)N',
39+ smile = 'NC(C=O)CC(=O)N' ,
4340 first_atom_idx = 5 ,
4441 last_atom_idx = 2 ,
4542 distogram_atom_idx = 1 ,
4643 token_center_atom_idx = 1 ,
4744 ),
4845 D = dict (
49- smile = 'C(C(C(=O)O)N)C(=O)O' ,
50- # template_smile = 'NC(C=O)CC(=O)O',
46+ smile = 'NC(C=O)CC(=O)O' ,
5147 first_atom_idx = 5 ,
5248 last_atom_idx = 2 ,
5349 distogram_atom_idx = 1 ,
5450 token_center_atom_idx = 1 ,
5551 ),
5652 C = dict (
57- smile = 'C(C(C(=O)O)N)S' ,
58- # template_smile = 'NC(C=O)CS',
53+ smile = 'NC(C=O)CS' ,
5954 first_atom_idx = 5 ,
6055 last_atom_idx = 2 ,
6156 distogram_atom_idx = 1 ,
6257 token_center_atom_idx = 1 ,
6358 ),
6459 Q = dict (
65- smile = 'C(CC(=O)N)C(C(=O)O)N' ,
66- # template_smile = 'NC(C=O)CCC(=O)N',
60+ smile = 'NC(C=O)CCC(=O)N' ,
6761 first_atom_idx = 9 ,
6862 last_atom_idx = 6 ,
6963 distogram_atom_idx = 5 ,
7064 token_center_atom_idx = 5 ,
7165 ),
7266 E = dict (
73- smile = 'C(CC(=O)O)C(C(=O)O)N' ,
74- # template_smile = 'NC(C=O)CCC(=O)O',
67+ smile = 'NC(C=O)CCC(=O)O' ,
7568 first_atom_idx = 9 ,
7669 last_atom_idx = 6 ,
7770 distogram_atom_idx = 5 ,
7871 token_center_atom_idx = 5 ,
7972 ),
8073 G = dict (
81- smile = 'C(C(=O)O)N' ,
82- # template_smile = 'NCC=O',
74+ smile = 'NCC=O' ,
8375 first_atom_idx = 4 ,
8476 last_atom_idx = 1 ,
8577 distogram_atom_idx = 0 ,
8678 token_center_atom_idx = 0 ,
8779 ),
8880 H = dict (
89- smile = 'C1=C(NC=N1)CC(C(=O)O)N' ,
90- # template_smile = 'NC(C=O)CC1=CNC=N1',
81+ smile = 'NC(C=O)CC1=CNC=N1' ,
9182 first_atom_idx = 10 ,
9283 last_atom_idx = 7 ,
9384 distogram_atom_idx = 0 ,
9485 token_center_atom_idx = 0 ,
9586 ),
9687 I = dict (
97- smile = 'CCC(C)C(C(=O)O)N' ,
98- # template_smile = 'NC(C=O)C(CC)C',
88+ smile = 'NC(C=O)C(CC)C' ,
9989 first_atom_idx = 8 ,
10090 last_atom_idx = 5 ,
10191 distogram_atom_idx = 0 ,
10292 token_center_atom_idx = 0 ,
10393 ),
10494 L = dict (
105- smile = 'CC(C)CC(C(=O)O)N' ,
106- # template_smile = 'NC(C=O)CC(C)C',
95+ smile = 'NC(C=O)CC(C)C' ,
10796 first_atom_idx = 8 ,
10897 last_atom_idx = 5 ,
10998 distogram_atom_idx = 4 ,
11099 token_center_atom_idx = 4 ,
111100 ),
112101 K = dict (
113- smile = 'C(CCN)CC(C(=O)O)N' ,
114- # template_smile = 'NC(C=O)CCCCN',
102+ smile = 'NC(C=O)CCCCN' ,
115103 first_atom_idx = 9 ,
116104 last_atom_idx = 6 ,
117105 distogram_atom_idx = 5 ,
118106 token_center_atom_idx = 5 ,
119107 ),
120108 M = dict (
121- smile = 'CSCCC(C(=O)O)N' ,
122- # template_smile = 'NC(C=O)CCSC',
109+ smile = 'NC(C=O)CCSC' ,
123110 first_atom_idx = 8 ,
124111 last_atom_idx = 5 ,
125112 distogram_atom_idx = 4 ,
126113 token_center_atom_idx = 4 ,
127114 ),
128115 F = dict (
129- smile = 'C1=CC=C(C=C1)CC(C(=O)O)N' ,
130- # template_smile = 'NC(C=O)CC1=CC=CC=C1',
116+ smile = 'NC(C=O)CC1=CC=CC=C1' ,
131117 first_atom_idx = 11 ,
132118 last_atom_idx = 8 ,
133119 distogram_atom_idx = 7 ,
134120 token_center_atom_idx = 7 ,
135121 ),
136122 P = dict (
137- smile = 'C1CC(NC1)C(=O)O' ,
138- # template_smile = 'N1C(C=O)CCC1',
123+ smile = 'N1C(C=O)CCC1' ,
139124 first_atom_idx = 3 ,
140125 last_atom_idx = 5 ,
141126 distogram_atom_idx = 2 ,
142127 token_center_atom_idx = 2 ,
143128 ),
144129 S = dict (
145- smile = 'C(C(C(=O)O)N)O' ,
146- # template_smile = 'NC(C=O)CO',
130+ smile = 'NC(C=O)CO' ,
147131 first_atom_idx = 5 ,
148132 last_atom_idx = 2 ,
149133 distogram_atom_idx = 1 ,
150134 token_center_atom_idx = 1 ,
151135 ),
152136 T = dict (
153- smile = 'CC(C(C(=O)O)N)O' ,
154- # template_smile = 'NC(C=O)C(O)C',
137+ smile = 'NC(C=O)C(O)C' ,
155138 first_atom_idx = 6 ,
156139 last_atom_idx = 3 ,
157140 distogram_atom_idx = 2 ,
158141 token_center_atom_idx = 2 ,
159142 ),
160143 W = dict (
161- smile = 'C1=CC=C2C(=C1)C(=CN2)CC(C(=O)O)N' ,
162- # template_smile = 'NC(C=O)CC1=CNC2=C1C=CC=C2',
144+ smile = 'NC(C=O)CC1=CNC2=C1C=CC=C2' ,
163145 first_atom_idx = 14 ,
164146 last_atom_idx = 11 ,
165147 distogram_atom_idx = 10 ,
166148 token_center_atom_idx = 10 ,
167149 ),
168150 Y = dict (
169- smile = 'C1=CC(=CC=C1CC(C(=O)O)N)O' ,
170- # template_smile = 'NC(C=O)CC1=CC=C(O)C=C1',
151+ smile = 'NC(C=O)CC1=CC=C(O)C=C1' ,
171152 first_atom_idx = 11 ,
172153 last_atom_idx = 8 ,
173154 distogram_atom_idx = 7 ,
174155 token_center_atom_idx = 7 ,
175156 ),
176157 V = dict (
177- smile = 'CC(C)C(C(=O)O)N' ,
178- # template_smile = 'NC(C=O)C(C)C',
158+ smile = 'NC(C=O)C(C)C' ,
179159 first_atom_idx = 7 ,
180160 last_atom_idx = 4 ,
181161 distogram_atom_idx = 3 ,
@@ -188,35 +168,31 @@ def is_unique(arr):
188168
189169DNA_NUCLEOTIDES = dict (
190170 A = dict (
191- smile = 'C1C(C(OC1N2C=NC3=C(N=CN=C32)N)COP(=O)(O)O)O' ,
192- # template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)CC1O',
171+ smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)CC1O' ,
193172 complement = 'T' ,
194173 first_atom_idx = 20 ,
195174 last_atom_idx = 1 ,
196175 distogram_atom_idx = 4 ,
197176 token_center_atom_idx = 4 ,
198177 ),
199178 C = dict (
200- smile = 'C1C(C(OC1N2C=CC(=NC2=O)N)COP(=O)(O)O)O' ,
201- # template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)CC1O',
179+ smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)CC1O' ,
202180 complement = 'G' ,
203181 first_atom_idx = 17 ,
204182 last_atom_idx = 1 ,
205183 distogram_atom_idx = 4 ,
206184 token_center_atom_idx = 4 ,
207185 ),
208186 G = dict (
209- smile = 'C1C(C(OC1N2C=NC3=C2N=C(NC3=O)N)COP(=O)(O)O)O' ,
210- # template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)CC1O',
187+ smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)CC1O' ,
211188 complement = 'C' ,
212189 first_atom_idx = 21 ,
213190 last_atom_idx = 1 ,
214191 distogram_atom_idx = 4 ,
215192 token_center_atom_idx = 4 ,
216193 ),
217194 T = dict (
218- smile = 'CC1=CN(C(=O)NC1=O)C2CC(C(O2)COP(=O)(O)O)O' ,
219- # template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C(C)=C2)CC1O',
195+ smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C(C)=C2)CC1O' ,
220196 complement = 'A' ,
221197 first_atom_idx = 19 ,
222198 last_atom_idx = 11 ,
@@ -227,35 +203,31 @@ def is_unique(arr):
227203
228204RNA_NUCLEOTIDES = dict (
229205 A = dict (
230- smile = 'C1=NC(=C2C(=N1)N(C=N2)C3C(C(C(O3)COP(=O)(O)O)O)O)N' ,
231- # template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)C(O)C1O',
206+ smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)C(O)C1O' ,
232207 complement = 'U' ,
233208 first_atom_idx = 19 ,
234209 last_atom_idx = 11 ,
235210 distogram_atom_idx = 9 ,
236211 token_center_atom_idx = 9 ,
237212 ),
238213 C = dict (
239- smile = 'C1=CN(C(=O)N=C1N)C2C(C(C(O2)COP(=O)([O-])[O-])O)O' ,
240- # template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)C(O)C1O',
214+ smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)C(O)C1O' ,
241215 complement = 'G' ,
242216 first_atom_idx = 17 ,
243217 last_atom_idx = 10 ,
244218 distogram_atom_idx = 8 ,
245219 token_center_atom_idx = 8 ,
246220 ),
247221 G = dict (
248- smile = 'C1=NC2=C(N1C3C(C(C(O3)COP(=O)(O)O)O)O)N=C(NC2=O)N' ,
249- # template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)C(O)C1O',
222+ smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)C(O)C1O' ,
250223 complement = 'C' ,
251224 first_atom_idx = 14 ,
252225 last_atom_idx = 7 ,
253226 distogram_atom_idx = 5 ,
254227 token_center_atom_idx = 5 ,
255228 ),
256229 U = dict (
257- smile = 'C1=CN(C(=O)NC1=O)C2C(C(C(O2)COP(=O)(O)O)O)O' ,
258- # template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C=C2)C(O)C1O',
230+ smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C=C2)C(O)C1O' ,
259231 complement = 'A' ,
260232 first_atom_idx = 18 ,
261233 last_atom_idx = 10 ,
@@ -517,4 +489,5 @@ def mol_from_template_mmcif_file(
517489 assert 0 <= entry ['distogram_atom_idx' ] < num_atoms
518490 assert 0 <= entry ['first_atom_idx' ] < num_atoms
519491 assert 0 <= entry ['last_atom_idx' ] < num_atoms
492+ assert entry ['first_atom_idx' ] != entry ['last_atom_idx' ]
520493 assert 0 <= entry ['token_center_atom_idx' ] < num_atoms
0 commit comments