|
| 1 | +import rdkit |
| 2 | +from rdkit import Chem |
| 3 | + |
| 4 | +# human amino acids |
1 | 5 |
|
2 | 6 | HUMAN_AMINO_ACIDS = dict( |
3 | 7 | A = dict( |
4 | | - bonds = [[0,1], [1,2], [2,3], [1,4]], |
| 8 | + smile = 'CC(C(=O)O)N' |
5 | 9 | ), |
6 | 10 | R = dict( |
7 | | - bonds = [[0,1], [1,2], [2,3], [2,4], [4,5], [5,6], [6,7], [7,8], [8,9], [8,10]] |
| 11 | + smile = 'C(CC(C(=O)O)N)CN=C(N)N' |
8 | 12 | ), |
9 | 13 | N = dict( |
10 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [5,7]] |
| 14 | + smile = 'C(C(C(=O)O)N)C(=O)N' |
11 | 15 | ), |
12 | 16 | D = dict( |
13 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [5,7]] |
| 17 | + smile = 'C(C(C(=O)O)N)C(=O)O' |
14 | 18 | ), |
15 | 19 | C = dict( |
16 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5]] |
| 20 | + smile = 'C(C(C(=O)O)N)S' |
17 | 21 | ), |
18 | 22 | Q = dict( |
19 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [6,8]] |
| 23 | + smile = 'C(CC(=O)N)C(C(=O)O)N' |
20 | 24 | ), |
21 | 25 | E = dict( |
22 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [7,8]] |
| 26 | + smile = 'C(CC(=O)O)C(C(=O)O)N' |
23 | 27 | ), |
24 | 28 | G = dict( |
25 | | - bonds = [[0,1], [1,2], [2,3]] |
| 29 | + smile = 'C(C(=O)O)N' |
26 | 30 | ), |
27 | 31 | H = dict( |
28 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [7,8], [8,9], [5,9]] |
| 32 | + smile = 'C1=C(NC=N1)CC(C(=O)O)N' |
29 | 33 | ), |
30 | 34 | I = dict( |
31 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [4,7]] |
| 35 | + smile = 'CCC(C)C(C(=O)O)N' |
32 | 36 | ), |
33 | 37 | L = dict( |
34 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [5,7]] |
| 38 | + smile = 'CC(C)CC(C(=O)O)N' |
35 | 39 | ), |
36 | 40 | K = dict( |
37 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [7,8]] |
| 41 | + smile = 'C(CCN)CC(C(=O)O)N' |
38 | 42 | ), |
39 | 43 | M = dict( |
40 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7]] |
| 44 | + smile = 'CSCCC(C(=O)O)N' |
41 | 45 | ), |
42 | 46 | F = dict( |
43 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [7,8], [8,9], [9,10], [5,10]] |
| 47 | + smile = 'C1=CC=C(C=C1)CC(C(=O)O)N' |
44 | 48 | ), |
45 | 49 | P = dict( |
46 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [0,6]] |
| 50 | + smile = 'C1CC(NC1)C(=O)O' |
47 | 51 | ), |
48 | 52 | S = dict( |
49 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5]] |
| 53 | + smile = 'C(C(C(=O)O)N)O' |
50 | 54 | ), |
51 | 55 | T = dict( |
52 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]] |
| 56 | + smile = 'CC(C(C(=O)O)N)O' |
53 | 57 | ), |
54 | 58 | W = dict( |
55 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [7,8], [8,9], [9,10], [10,11], [11,12], [12, 13], [5,13], [8,13]] |
| 59 | + smile = 'C1=CC=C2C(=C1)C(=CN2)CC(C(=O)O)N' |
56 | 60 | ), |
57 | 61 | Y = dict( |
58 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], [6,7], [7,8], [8,9], [8,10], [10,11], [5,11]] |
| 62 | + smile = 'C1=CC(=CC=C1CC(C(=O)O)N)O' |
59 | 63 | ), |
60 | 64 | V = dict( |
61 | | - bonds = [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]] |
| 65 | + smile = 'CC(C)C(C(=O)O)N' |
| 66 | + ) |
| 67 | +) |
| 68 | + |
| 69 | +# nucleotides |
| 70 | + |
| 71 | +NUCLEOTIDES = dict( |
| 72 | + A = dict( |
| 73 | + smile = 'C1=NC2=NC=NC(=C2N1)N' |
| 74 | + ), |
| 75 | + G = dict( |
| 76 | + smile = 'C1=NC2=C(N1)C(=O)NC(=N2)N' |
62 | 77 | ), |
63 | | - _ = dict( |
64 | | - bonds = [] |
| 78 | + C = dict( |
| 79 | + smile = 'C1=C(NC(=O)N=C1)N' |
| 80 | + ), |
| 81 | + T = dict( |
| 82 | + smile = 'CC1=CN(C(=O)NC1=O)C2CC(C(O2)CO)O' |
| 83 | + ), |
| 84 | + U = dict( |
| 85 | + smile = 'C1=CNC(=O)NC1=O' |
65 | 86 | ) |
66 | 87 | ) |
| 88 | + |
| 89 | +# initialize rdkit.Chem with canonical SMILES |
| 90 | + |
| 91 | +for aa_dict in HUMAN_AMINO_ACIDS.values(): |
| 92 | + aa_dict['rdkit_chem'] = Chem.MolFromSmiles(aa_dict['smile']) |
| 93 | + |
| 94 | + |
| 95 | +for nuc_dict in NUCLEOTIDES.values(): |
| 96 | + nuc_dict['rdkit_chem'] = Chem.MolFromSmiles(nuc_dict['smile']) |
0 commit comments