Skip to content

Commit 7f6308a

Browse files
committed
switch over to the template SMILES that align with mmCIF pull requested in earlier by @amorehead
1 parent 1670065 commit 7f6308a

File tree

3 files changed

+33
-60
lines changed

3 files changed

+33
-60
lines changed

alphafold3_pytorch/life.py

Lines changed: 29 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -22,160 +22,140 @@ def is_unique(arr):
2222

2323
HUMAN_AMINO_ACIDS = dict(
2424
A = dict(
25-
smile = 'CC(C(=O)O)N',
26-
# template_smile = 'NC(C=O)C',
25+
smile = 'NC(C=O)C',
2726
first_atom_idx = 5,
2827
last_atom_idx = 2,
2928
distogram_atom_idx = 1,
3029
token_center_atom_idx = 1,
3130
),
3231
R = dict(
33-
smile = 'C(CC(C(=O)O)N)CN=C(N)N',
34-
# template_smile = 'NC(C=O)CCCNC(N)=N',
32+
smile = 'NC(C=O)CCCNC(N)=N',
3533
first_atom_idx = 6,
3634
last_atom_idx = 3,
3735
distogram_atom_idx = 2,
3836
token_center_atom_idx = 2,
3937
),
4038
N = dict(
41-
smile = 'C(C(C(=O)O)N)C(=O)N',
42-
# template_smile = 'NC(C=O)CC(=O)N',
39+
smile = 'NC(C=O)CC(=O)N',
4340
first_atom_idx = 5,
4441
last_atom_idx = 2,
4542
distogram_atom_idx = 1,
4643
token_center_atom_idx = 1,
4744
),
4845
D = dict(
49-
smile = 'C(C(C(=O)O)N)C(=O)O',
50-
# template_smile = 'NC(C=O)CC(=O)O',
46+
smile = 'NC(C=O)CC(=O)O',
5147
first_atom_idx = 5,
5248
last_atom_idx = 2,
5349
distogram_atom_idx = 1,
5450
token_center_atom_idx = 1,
5551
),
5652
C = dict(
57-
smile = 'C(C(C(=O)O)N)S',
58-
# template_smile = 'NC(C=O)CS',
53+
smile = 'NC(C=O)CS',
5954
first_atom_idx = 5,
6055
last_atom_idx = 2,
6156
distogram_atom_idx = 1,
6257
token_center_atom_idx = 1,
6358
),
6459
Q = dict(
65-
smile = 'C(CC(=O)N)C(C(=O)O)N',
66-
# template_smile = 'NC(C=O)CCC(=O)N',
60+
smile = 'NC(C=O)CCC(=O)N',
6761
first_atom_idx = 9,
6862
last_atom_idx = 6,
6963
distogram_atom_idx = 5,
7064
token_center_atom_idx = 5,
7165
),
7266
E = dict(
73-
smile = 'C(CC(=O)O)C(C(=O)O)N',
74-
# template_smile = 'NC(C=O)CCC(=O)O',
67+
smile = 'NC(C=O)CCC(=O)O',
7568
first_atom_idx = 9,
7669
last_atom_idx = 6,
7770
distogram_atom_idx = 5,
7871
token_center_atom_idx = 5,
7972
),
8073
G = dict(
81-
smile = 'C(C(=O)O)N',
82-
# template_smile = 'NCC=O',
74+
smile = 'NCC=O',
8375
first_atom_idx = 4,
8476
last_atom_idx = 1,
8577
distogram_atom_idx = 0,
8678
token_center_atom_idx = 0,
8779
),
8880
H = dict(
89-
smile = 'C1=C(NC=N1)CC(C(=O)O)N',
90-
# template_smile = 'NC(C=O)CC1=CNC=N1',
81+
smile = 'NC(C=O)CC1=CNC=N1',
9182
first_atom_idx = 10,
9283
last_atom_idx = 7,
9384
distogram_atom_idx = 0,
9485
token_center_atom_idx = 0,
9586
),
9687
I = dict(
97-
smile = 'CCC(C)C(C(=O)O)N',
98-
# template_smile = 'NC(C=O)C(CC)C',
88+
smile = 'NC(C=O)C(CC)C',
9989
first_atom_idx = 8,
10090
last_atom_idx = 5,
10191
distogram_atom_idx = 0,
10292
token_center_atom_idx = 0,
10393
),
10494
L = dict(
105-
smile = 'CC(C)CC(C(=O)O)N',
106-
# template_smile = 'NC(C=O)CC(C)C',
95+
smile = 'NC(C=O)CC(C)C',
10796
first_atom_idx = 8,
10897
last_atom_idx = 5,
10998
distogram_atom_idx = 4,
11099
token_center_atom_idx = 4,
111100
),
112101
K = dict(
113-
smile = 'C(CCN)CC(C(=O)O)N',
114-
# template_smile = 'NC(C=O)CCCCN',
102+
smile = 'NC(C=O)CCCCN',
115103
first_atom_idx = 9,
116104
last_atom_idx = 6,
117105
distogram_atom_idx = 5,
118106
token_center_atom_idx = 5,
119107
),
120108
M = dict(
121-
smile = 'CSCCC(C(=O)O)N',
122-
# template_smile = 'NC(C=O)CCSC',
109+
smile = 'NC(C=O)CCSC',
123110
first_atom_idx = 8,
124111
last_atom_idx = 5,
125112
distogram_atom_idx = 4,
126113
token_center_atom_idx = 4,
127114
),
128115
F = dict(
129-
smile = 'C1=CC=C(C=C1)CC(C(=O)O)N',
130-
# template_smile = 'NC(C=O)CC1=CC=CC=C1',
116+
smile = 'NC(C=O)CC1=CC=CC=C1',
131117
first_atom_idx = 11,
132118
last_atom_idx = 8,
133119
distogram_atom_idx = 7,
134120
token_center_atom_idx = 7,
135121
),
136122
P = dict(
137-
smile = 'C1CC(NC1)C(=O)O',
138-
# template_smile = 'N1C(C=O)CCC1',
123+
smile = 'N1C(C=O)CCC1',
139124
first_atom_idx = 3,
140125
last_atom_idx = 5,
141126
distogram_atom_idx = 2,
142127
token_center_atom_idx = 2,
143128
),
144129
S = dict(
145-
smile = 'C(C(C(=O)O)N)O',
146-
# template_smile = 'NC(C=O)CO',
130+
smile = 'NC(C=O)CO',
147131
first_atom_idx = 5,
148132
last_atom_idx = 2,
149133
distogram_atom_idx = 1,
150134
token_center_atom_idx = 1,
151135
),
152136
T = dict(
153-
smile = 'CC(C(C(=O)O)N)O',
154-
# template_smile = 'NC(C=O)C(O)C',
137+
smile = 'NC(C=O)C(O)C',
155138
first_atom_idx = 6,
156139
last_atom_idx = 3,
157140
distogram_atom_idx = 2,
158141
token_center_atom_idx = 2,
159142
),
160143
W = dict(
161-
smile = 'C1=CC=C2C(=C1)C(=CN2)CC(C(=O)O)N',
162-
# template_smile = 'NC(C=O)CC1=CNC2=C1C=CC=C2',
144+
smile = 'NC(C=O)CC1=CNC2=C1C=CC=C2',
163145
first_atom_idx = 14,
164146
last_atom_idx = 11,
165147
distogram_atom_idx = 10,
166148
token_center_atom_idx = 10,
167149
),
168150
Y = dict(
169-
smile = 'C1=CC(=CC=C1CC(C(=O)O)N)O',
170-
# template_smile = 'NC(C=O)CC1=CC=C(O)C=C1',
151+
smile = 'NC(C=O)CC1=CC=C(O)C=C1',
171152
first_atom_idx = 11,
172153
last_atom_idx = 8,
173154
distogram_atom_idx = 7,
174155
token_center_atom_idx = 7,
175156
),
176157
V = dict(
177-
smile = 'CC(C)C(C(=O)O)N',
178-
# template_smile = 'NC(C=O)C(C)C',
158+
smile = 'NC(C=O)C(C)C',
179159
first_atom_idx = 7,
180160
last_atom_idx = 4,
181161
distogram_atom_idx = 3,
@@ -188,35 +168,31 @@ def is_unique(arr):
188168

189169
DNA_NUCLEOTIDES = dict(
190170
A = dict(
191-
smile = 'C1C(C(OC1N2C=NC3=C(N=CN=C32)N)COP(=O)(O)O)O',
192-
# template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)CC1O',
171+
smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)CC1O',
193172
complement = 'T',
194173
first_atom_idx = 20,
195174
last_atom_idx = 1,
196175
distogram_atom_idx = 4,
197176
token_center_atom_idx = 4,
198177
),
199178
C = dict(
200-
smile = 'C1C(C(OC1N2C=CC(=NC2=O)N)COP(=O)(O)O)O',
201-
# template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)CC1O',
179+
smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)CC1O',
202180
complement = 'G',
203181
first_atom_idx = 17,
204182
last_atom_idx = 1,
205183
distogram_atom_idx = 4,
206184
token_center_atom_idx = 4,
207185
),
208186
G = dict(
209-
smile = 'C1C(C(OC1N2C=NC3=C2N=C(NC3=O)N)COP(=O)(O)O)O',
210-
# template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)CC1O',
187+
smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)CC1O',
211188
complement = 'C',
212189
first_atom_idx = 21,
213190
last_atom_idx = 1,
214191
distogram_atom_idx = 4,
215192
token_center_atom_idx = 4,
216193
),
217194
T = dict(
218-
smile = 'CC1=CN(C(=O)NC1=O)C2CC(C(O2)COP(=O)(O)O)O',
219-
# template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C(C)=C2)CC1O',
195+
smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C(C)=C2)CC1O',
220196
complement = 'A',
221197
first_atom_idx = 19,
222198
last_atom_idx = 11,
@@ -227,35 +203,31 @@ def is_unique(arr):
227203

228204
RNA_NUCLEOTIDES = dict(
229205
A = dict(
230-
smile = 'C1=NC(=C2C(=N1)N(C=N2)C3C(C(C(O3)COP(=O)(O)O)O)O)N',
231-
# template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)C(O)C1O',
206+
smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=CN=C3N)C(O)C1O',
232207
complement = 'U',
233208
first_atom_idx = 19,
234209
last_atom_idx = 11,
235210
distogram_atom_idx = 9,
236211
token_center_atom_idx = 9,
237212
),
238213
C = dict(
239-
smile = 'C1=CN(C(=O)N=C1N)C2C(C(C(O2)COP(=O)([O-])[O-])O)O',
240-
# template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)C(O)C1O',
214+
smile = 'OP(=O)(O)OCC1OC(N2C(=O)N=C(N)C=C2)C(O)C1O',
241215
complement = 'G',
242216
first_atom_idx = 17,
243217
last_atom_idx = 10,
244218
distogram_atom_idx = 8,
245219
token_center_atom_idx = 8,
246220
),
247221
G = dict(
248-
smile = 'C1=NC2=C(N1C3C(C(C(O3)COP(=O)(O)O)O)O)N=C(NC2=O)N',
249-
# template_smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)C(O)C1O',
222+
smile = 'OP(=O)(O)OCC1OC(N2C=NC3=C2N=C(N)NC3=O)C(O)C1O',
250223
complement = 'C',
251224
first_atom_idx = 14,
252225
last_atom_idx = 7,
253226
distogram_atom_idx = 5,
254227
token_center_atom_idx = 5,
255228
),
256229
U = dict(
257-
smile = 'C1=CN(C(=O)NC1=O)C2C(C(C(O2)COP(=O)(O)O)O)O',
258-
# template_smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C=C2)C(O)C1O',
230+
smile = 'OP(=O)(O)OCC1OC(N2C(=O)NC(=O)C=C2)C(O)C1O',
259231
complement = 'A',
260232
first_atom_idx = 18,
261233
last_atom_idx = 10,
@@ -517,4 +489,5 @@ def mol_from_template_mmcif_file(
517489
assert 0 <= entry['distogram_atom_idx'] < num_atoms
518490
assert 0 <= entry['first_atom_idx'] < num_atoms
519491
assert 0 <= entry['last_atom_idx'] < num_atoms
492+
assert entry['first_atom_idx'] != entry['last_atom_idx']
520493
assert 0 <= entry['token_center_atom_idx'] < num_atoms

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "alphafold3-pytorch"
3-
version = "0.1.123"
3+
version = "0.1.124"
44
description = "Alphafold 3 - Pytorch"
55
authors = [
66
{ name = "Phil Wang", email = "[email protected]" }

tests/test_input.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ def test_atompos_input():
8686
contrived_protein = 'AG'
8787

8888
mock_atompos = [
89-
torch.randn(6, 3), # alanine has 6 non-hydrogen atoms
90-
torch.randn(5, 3) # glycine has 5 non-hydrogen atoms
89+
torch.randn(5, 3), # alanine has 5 non-hydrogen atoms
90+
torch.randn(4, 3) # glycine has 4 non-hydrogen atoms
9191
]
9292

9393
train_alphafold3_input = Alphafold3Input(
@@ -138,7 +138,7 @@ def test_atompos_input():
138138
alphafold3.eval()
139139
sampled_atom_pos = alphafold3(**batched_eval_atom_input.dict())
140140

141-
assert sampled_atom_pos.shape == (1, (6 + 5), 3)
141+
assert sampled_atom_pos.shape == (1, (5 + 4), 3)
142142

143143
def test_pdbinput_input():
144144
pytest.skip("This unit test is currently disabled while the PDB featurization pipeline is under development.")

0 commit comments

Comments
 (0)