|
1 | 1 | steps: |
2 | 2 |
|
3 | | - - extract_pdbbind_refined: |
4 | | - in: |
5 | | - # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html |
6 | | - # "The query() method uses a slightly modified Python syntax by default. |
7 | | - # For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or. |
8 | | - # This is syntactically valid Python, however the semantics are different." |
9 | | - query: !ii '(Kd_Ki == "Kd") and (value < 0.001)' |
10 | | - max_row: !ii 1 #25 # Use 1 for CI |
11 | | - convert_Kd_dG: !ii True |
12 | | - out: |
13 | | - - output_pdb_paths: !& pdbbind_pdbs |
14 | | - - output_sdf_paths: !& pdbbind_sdfs |
15 | | - - experimental_dGs: !& exp_dGs |
| 3 | + - id: extract_pdbbind_refined |
| 4 | + in: |
| 5 | + # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html |
| 6 | + # "The query() method uses a slightly modified Python syntax by default. |
| 7 | + # For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or. |
| 8 | + # This is syntactically valid Python, however the semantics are different." |
| 9 | + query: !ii '(Kd_Ki == "Kd") and (value < 0.001)' |
| 10 | + max_row: !ii 1 #25 # Use 1 for CI |
| 11 | + convert_Kd_dG: !ii True |
| 12 | + out: |
| 13 | + - output_pdb_paths: !& pdbbind_pdbs |
| 14 | + - output_sdf_paths: !& pdbbind_sdfs |
| 15 | + - experimental_dGs: !& exp_dGs |
16 | 16 |
|
17 | | - - fix_side_chain: |
18 | | - scatter: [input_pdb_path] |
19 | | - in: |
20 | | - input_pdb_path: !* pdbbind_pdbs |
21 | | - out: |
22 | | - - output_pdb_path: !& pdbbind_pdbs.pdb |
| 17 | + - id: fix_side_chain |
| 18 | + scatter: [input_pdb_path] |
| 19 | + in: |
| 20 | + input_pdb_path: !* pdbbind_pdbs |
| 21 | + out: |
| 22 | + - output_pdb_path: !& pdbbind_pdbs.pdb |
23 | 23 |
|
24 | | - - sanitize_ligand: |
25 | | - scatter: [input_small_mol_ligand] |
26 | | - in: |
27 | | - input_small_mol_ligand: !* pdbbind_sdfs |
28 | | - out: |
29 | | - - output_ligand: !& sanitized_sdfs |
30 | | - - valid_ligand: !& valid_ligands |
| 24 | + - id: sanitize_ligand |
| 25 | + scatter: [input_small_mol_ligand] |
| 26 | + in: |
| 27 | + input_small_mol_ligand: !* pdbbind_sdfs |
| 28 | + out: |
| 29 | + - output_ligand: !& sanitized_sdfs |
| 30 | + - valid_ligand: !& valid_ligands |
31 | 31 |
|
32 | | - - filter_array: # remove invalid ligands from sanitized_ligand, avoid using null |
33 | | - in: |
34 | | - input_array: !* sanitized_sdfs |
35 | | - input_bool_array: !* valid_ligands |
36 | | - out: |
37 | | - - output_array: !& final_sanitized_sdfs |
| 32 | + - id: filter_array # remove invalid ligands from sanitized_ligand, avoid using null |
| 33 | + in: |
| 34 | + input_array: !* sanitized_sdfs |
| 35 | + input_bool_array: !* valid_ligands |
| 36 | + out: |
| 37 | + - output_array: !& final_sanitized_sdfs |
38 | 38 |
|
39 | | - - filter_array: # remove proteins corresponding to invalid ligands from sanitized_ligand |
40 | | - in: |
41 | | - input_array: !* pdbbind_pdbs.pdb |
42 | | - input_bool_array: !* valid_ligands |
43 | | - out: |
44 | | - - output_array: !& final_pdbbind_pdbs.pdb |
| 39 | + - id: filter_array # remove proteins corresponding to invalid ligands from sanitized_ligand |
| 40 | + in: |
| 41 | + input_array: !* pdbbind_pdbs.pdb |
| 42 | + input_bool_array: !* valid_ligands |
| 43 | + out: |
| 44 | + - output_array: !& final_pdbbind_pdbs.pdb |
45 | 45 |
|
46 | | - - filter_array: # remove dGs corresponding to invalid ligands from sanitized_ligand |
47 | | - in: |
48 | | - input_array: !* exp_dGs |
49 | | - input_bool_array: !* valid_ligands |
50 | | - out: |
51 | | - - output_array: !& final_exp_dGs |
| 46 | + - id: filter_array # remove dGs corresponding to invalid ligands from sanitized_ligand |
| 47 | + in: |
| 48 | + input_array: !* exp_dGs |
| 49 | + input_bool_array: !* valid_ligands |
| 50 | + out: |
| 51 | + - output_array: !& final_exp_dGs |
52 | 52 |
|
53 | | - - diffdock: |
54 | | - scatter: [protein_path, ligand_path] |
55 | | - scatterMethod: dotproduct |
56 | | - in: |
57 | | - protein_path: !* final_pdbbind_pdbs.pdb |
58 | | - ligand_path: !* final_sanitized_sdfs |
59 | | - samples_per_complex: !ii 20 # figure 3 left in DiffDock paper |
60 | | - inference_steps: !ii 20 # figure S11 in DiffDock paper |
61 | | - out: |
62 | | - - output_files: !& diffdock_poses |
| 53 | + - id: diffdock |
| 54 | + scatter: [protein_path, ligand_path] |
| 55 | + scatterMethod: dotproduct |
| 56 | + in: |
| 57 | + protein_path: !* final_pdbbind_pdbs.pdb |
| 58 | + ligand_path: !* final_sanitized_sdfs |
| 59 | + samples_per_complex: !ii 20 # figure 3 left in DiffDock paper |
| 60 | + inference_steps: !ii 20 # figure S11 in DiffDock paper |
| 61 | + out: |
| 62 | + - output_files: !& diffdock_poses |
63 | 63 |
|
64 | | - - rank_diffdock_poses: |
65 | | - scatter: [diffdock_poses] |
66 | | - in: |
67 | | - top_n_confident: !ii 1000 # if only using top_percent_confidence, then set top_n_confident to trivially high number |
68 | | - # if only want to use top_n_confident, then set top_percent_confidence to 100 |
69 | | - top_percent_confidence: !ii 33 # take top third of most confident poses, see figure 3 right in DiffDock paper |
70 | | - diffdock_poses: !* diffdock_poses |
71 | | - out: |
72 | | - - output_poses: !& output_poses |
| 64 | + - id: rank_diffdock_poses |
| 65 | + scatter: [diffdock_poses] |
| 66 | + in: |
| 67 | + top_n_confident: !ii 1000 # if only using top_percent_confidence, then set top_n_confident to trivially high number |
| 68 | + # if only want to use top_n_confident, then set top_percent_confidence to 100 |
| 69 | + top_percent_confidence: !ii 33 # take top third of most confident poses, see figure 3 right in DiffDock paper |
| 70 | + diffdock_poses: !* diffdock_poses |
| 71 | + out: |
| 72 | + - output_poses: !& output_poses |
73 | 73 |
|
74 | | - - pose_cluster_filter: |
75 | | - scatter: [predicted_poses] |
76 | | - in: |
77 | | - predicted_poses: !* output_poses |
78 | | - centroid_cutoff: !ii 5 # if centroid of all poses are within cutoff then only keep most confident pose, requires visual inspection |
| 74 | + - id: pose_cluster_filter |
| 75 | + scatter: [predicted_poses] |
| 76 | + in: |
| 77 | + predicted_poses: !* output_poses |
| 78 | + centroid_cutoff: !ii 5 # if centroid of all poses are within cutoff then only keep most confident pose, requires visual inspection |
79 | 79 |
|
80 | 80 | wic: |
81 | 81 | graphviz: |
|
0 commit comments