Skip to content

Commit 1faea30

Browse files
committed
use canonical normal form(s)
1 parent da7db9e commit 1faea30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+672
-671
lines changed

examples/diffdock/run_diffdock.wic

Lines changed: 68 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,81 +1,81 @@
11
steps:
22

3-
- extract_pdbbind_refined:
4-
in:
5-
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
6-
# "The query() method uses a slightly modified Python syntax by default.
7-
# For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or.
8-
# This is syntactically valid Python, however the semantics are different."
9-
query: !ii '(Kd_Ki == "Kd") and (value < 0.001)'
10-
max_row: !ii 1 #25 # Use 1 for CI
11-
convert_Kd_dG: !ii True
12-
out:
13-
- output_pdb_paths: !& pdbbind_pdbs
14-
- output_sdf_paths: !& pdbbind_sdfs
15-
- experimental_dGs: !& exp_dGs
3+
- id: extract_pdbbind_refined
4+
in:
5+
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
6+
# "The query() method uses a slightly modified Python syntax by default.
7+
# For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or.
8+
# This is syntactically valid Python, however the semantics are different."
9+
query: !ii '(Kd_Ki == "Kd") and (value < 0.001)'
10+
max_row: !ii 1 #25 # Use 1 for CI
11+
convert_Kd_dG: !ii True
12+
out:
13+
- output_pdb_paths: !& pdbbind_pdbs
14+
- output_sdf_paths: !& pdbbind_sdfs
15+
- experimental_dGs: !& exp_dGs
1616

17-
- fix_side_chain:
18-
scatter: [input_pdb_path]
19-
in:
20-
input_pdb_path: !* pdbbind_pdbs
21-
out:
22-
- output_pdb_path: !& pdbbind_pdbs.pdb
17+
- id: fix_side_chain
18+
scatter: [input_pdb_path]
19+
in:
20+
input_pdb_path: !* pdbbind_pdbs
21+
out:
22+
- output_pdb_path: !& pdbbind_pdbs.pdb
2323

24-
- sanitize_ligand:
25-
scatter: [input_small_mol_ligand]
26-
in:
27-
input_small_mol_ligand: !* pdbbind_sdfs
28-
out:
29-
- output_ligand: !& sanitized_sdfs
30-
- valid_ligand: !& valid_ligands
24+
- id: sanitize_ligand
25+
scatter: [input_small_mol_ligand]
26+
in:
27+
input_small_mol_ligand: !* pdbbind_sdfs
28+
out:
29+
- output_ligand: !& sanitized_sdfs
30+
- valid_ligand: !& valid_ligands
3131

32-
- filter_array: # remove invalid ligands from sanitized_ligand, avoid using null
33-
in:
34-
input_array: !* sanitized_sdfs
35-
input_bool_array: !* valid_ligands
36-
out:
37-
- output_array: !& final_sanitized_sdfs
32+
- id: filter_array # remove invalid ligands from sanitized_ligand, avoid using null
33+
in:
34+
input_array: !* sanitized_sdfs
35+
input_bool_array: !* valid_ligands
36+
out:
37+
- output_array: !& final_sanitized_sdfs
3838

39-
- filter_array: # remove proteins corresponding to invalid ligands from sanitized_ligand
40-
in:
41-
input_array: !* pdbbind_pdbs.pdb
42-
input_bool_array: !* valid_ligands
43-
out:
44-
- output_array: !& final_pdbbind_pdbs.pdb
39+
- id: filter_array # remove proteins corresponding to invalid ligands from sanitized_ligand
40+
in:
41+
input_array: !* pdbbind_pdbs.pdb
42+
input_bool_array: !* valid_ligands
43+
out:
44+
- output_array: !& final_pdbbind_pdbs.pdb
4545

46-
- filter_array: # remove dGs corresponding to invalid ligands from sanitized_ligand
47-
in:
48-
input_array: !* exp_dGs
49-
input_bool_array: !* valid_ligands
50-
out:
51-
- output_array: !& final_exp_dGs
46+
- id: filter_array # remove dGs corresponding to invalid ligands from sanitized_ligand
47+
in:
48+
input_array: !* exp_dGs
49+
input_bool_array: !* valid_ligands
50+
out:
51+
- output_array: !& final_exp_dGs
5252

53-
- diffdock:
54-
scatter: [protein_path, ligand_path]
55-
scatterMethod: dotproduct
56-
in:
57-
protein_path: !* final_pdbbind_pdbs.pdb
58-
ligand_path: !* final_sanitized_sdfs
59-
samples_per_complex: !ii 20 # figure 3 left in DiffDock paper
60-
inference_steps: !ii 20 # figure S11 in DiffDock paper
61-
out:
62-
- output_files: !& diffdock_poses
53+
- id: diffdock
54+
scatter: [protein_path, ligand_path]
55+
scatterMethod: dotproduct
56+
in:
57+
protein_path: !* final_pdbbind_pdbs.pdb
58+
ligand_path: !* final_sanitized_sdfs
59+
samples_per_complex: !ii 20 # figure 3 left in DiffDock paper
60+
inference_steps: !ii 20 # figure S11 in DiffDock paper
61+
out:
62+
- output_files: !& diffdock_poses
6363

64-
- rank_diffdock_poses:
65-
scatter: [diffdock_poses]
66-
in:
67-
top_n_confident: !ii 1000 # if only using top_percent_confidence, then set top_n_confident to trivially high number
68-
# if only want to use top_n_confident, then set top_percent_confidence to 100
69-
top_percent_confidence: !ii 33 # take top third of most confident poses, see figure 3 right in DiffDock paper
70-
diffdock_poses: !* diffdock_poses
71-
out:
72-
- output_poses: !& output_poses
64+
- id: rank_diffdock_poses
65+
scatter: [diffdock_poses]
66+
in:
67+
top_n_confident: !ii 1000 # if only using top_percent_confidence, then set top_n_confident to trivially high number
68+
# if only want to use top_n_confident, then set top_percent_confidence to 100
69+
top_percent_confidence: !ii 33 # take top third of most confident poses, see figure 3 right in DiffDock paper
70+
diffdock_poses: !* diffdock_poses
71+
out:
72+
- output_poses: !& output_poses
7373

74-
- pose_cluster_filter:
75-
scatter: [predicted_poses]
76-
in:
77-
predicted_poses: !* output_poses
78-
centroid_cutoff: !ii 5 # if centroid of all poses are within cutoff then only keep most confident pose, requires visual inspection
74+
- id: pose_cluster_filter
75+
scatter: [predicted_poses]
76+
in:
77+
predicted_poses: !* output_poses
78+
centroid_cutoff: !ii 5 # if centroid of all poses are within cutoff then only keep most confident pose, requires visual inspection
7979

8080
wic:
8181
graphviz:

examples/diffdock/run_diffdock_weekly.wic

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
steps:
2-
- run_diffdock.wic:
2+
run_diffdock.wic:
33

44
wic:
55
steps:

examples/docking/align_protein_CA_pymol.wic

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
steps:
2-
- pymol_align_protein_ca:
2+
pymol_align_protein_ca:
33
in:
44
input_1_path: !* receptor.xyz
55
input_2_path: !* pose_ligand_1.pdb
@@ -9,7 +9,7 @@ steps:
99
# pymol only supports saving to pdb format which does not support storing the time of each frame.
1010
out:
1111
- output_file_path: !& prod_align_protein_ca.pdb
12-
- gmx_rms_nofit:
12+
gmx_rms_nofit:
1313
in:
1414
input_structure_path: !* npt.gro # prod.tpr ?
1515
input_traj_path: !* prod_align_protein_ca.pdb
@@ -19,7 +19,7 @@ steps:
1919
output_xvg_path: !ii rmsd_equil_ligand_notime.txt # Use .txt to avoid plotting
2020
out:
2121
- output_xvg_path: !& rmsd_equil_ligand_notime
22-
- bash_xvg:
22+
bash_xvg:
2323
in:
2424
script: !ii /replace_first_column.sh # NOTE: Initial / required
2525
# But we can recover the time column by copying it from another xvg file.

examples/docking/assign_partial_charges.wic

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ inputs:
66
- edam:format_3814
77

88
steps:
9-
- convert_mol2:
9+
convert_mol2:
1010
in:
1111
input_path: input_path #!* ligand_min.sdf
1212
# out:
1313
# - output_mol2_path: !& ligand_min.mol2
1414
# NOTE: If we directly convert from sdf to pdbqt, openbabel will NOT add
1515
# partial charges! We have to factor through mol2 format to trigger the
1616
# partial charge heuristics code path. This does affect the docking scores!
17-
- convert_pdbqt:
17+
convert_pdbqt:
1818
# in:
1919
# input_path: !* ligand_min.mol2 # conformer.mol2
2020
# out:

examples/docking/assign_partial_charges_batch.wic

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
steps:
2-
- convert_mol2:
2+
convert_mol2:
33
# in:
44
# input_path: !* ligand_min.sdf
55
# out:
66
# - output_mol2_path: !& ligand_min.mol2
77
# NOTE: If we directly convert from sdf to pdbqt, openbabel will NOT add
88
# partial charges! We have to factor through mol2 format to trigger the
99
# partial charge heuristics code path. This does affect the docking scores!
10-
- convert_pdbqt:
10+
convert_pdbqt:
1111
# in:
1212
# input_path: !* ligand_min.mol2 # conformer.mol2
1313
# out:

examples/docking/autodock_vina_rescore.wic

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,55 +7,55 @@ inputs:
77
format: edam:format_1476
88

99
steps:
10-
- python_script:
11-
in:
12-
script: !ii ../scripts/atomselect.py
13-
dockerPull: !ii jakefennick/atomselect
14-
selection_string: !ii protein # Extract the protein from the last timestep
15-
input_pdb_path: input_pdb_path # !* prod.pdb
10+
- id: python_script
11+
in:
12+
script: !ii ../scripts/atomselect.py
13+
dockerPull: !ii jakefennick/atomselect
14+
selection_string: !ii protein # Extract the protein from the last timestep
15+
input_pdb_path: input_pdb_path # !* prod.pdb
1616
# Assign partial charges (protein)
1717
# NOTE: Although we only start off with one protein, the final coordinates
1818
# after doing MD with different ligands will be different; hence scatter
19-
- convert_mol2:
20-
- convert_pdbqt:
21-
in:
22-
arg1: !ii -xr # Receptor needs to be rigid
23-
out:
24-
- output_pdb_path: !& protein_prod.pdbqt
25-
- python_script:
26-
in:
27-
script: !ii ../scripts/atomselect.py
28-
dockerPull: !ii jakefennick/atomselect
29-
selection_string: !ii resname MOL # Extract the ligand from the last timestep
30-
input_pdb_path: input_pdb_path # !* prod.pdb
31-
# out:
32-
# - output_pdb_path: !& ligand_temp.pdbqt
19+
- id: convert_mol2
20+
- id: convert_pdbqt
21+
in:
22+
arg1: !ii -xr # Receptor needs to be rigid
23+
out:
24+
- output_pdb_path: !& protein_prod.pdbqt
25+
- id: python_script
26+
in:
27+
script: !ii ../scripts/atomselect.py
28+
dockerPull: !ii jakefennick/atomselect
29+
selection_string: !ii resname MOL # Extract the ligand from the last timestep
30+
input_pdb_path: input_pdb_path # !* prod.pdb
31+
# out:
32+
# - output_pdb_path: !& ligand_temp.pdbqt
3333

3434
# It utilizes a helper PDB file to overwrite the atom element
35-
# types (last column) of the input PDB file
35+
# types (last column) of the input PDB file
3636
# Input pdb file
3737
# ATOM 4653 C26 MOL A 286 44.880 26.370 42.560 1.00 0.00 C
3838
# ATOM 4654 BR MOL A 286 48.190 27.540 40.260 1.00 0.00 B
3939
# Input helepr pdb file
4040
# ATOM 52 C26 MOL Z 1 -18.069 -34.542 22.368 1.00 0.00 C
4141
# ATOM 53 BR MOL Z 1 -14.786 -32.819 20.524 1.00 0.00 BR
42-
- fix_pdb_atom_column:
43-
in:
44-
input_helper_structure_path: input_ligand_pdb_path
42+
- id: fix_pdb_atom_column
43+
in:
44+
input_helper_structure_path: input_ligand_pdb_path
4545
# Assign partial charges (ligand)
46-
- convert_mol2:
47-
- convert_pdbqt:
48-
out:
49-
- output_pdb_path: !& ligand_prod.pdbqt
50-
- autodock_vina_rescore:
51-
in:
52-
input_ligand_pdbqt_path: !* ligand_prod.pdbqt
53-
input_receptor_pdbqt_path: !* protein_prod.pdbqt
54-
score_only: !ii True
55-
#local_only: !ii True
56-
out:
57-
- output_log_path: !& vina_rescore.log
58-
- docking_score: !& docking_rescores
46+
- id: convert_mol2
47+
- id: convert_pdbqt
48+
out:
49+
- output_pdb_path: !& ligand_prod.pdbqt
50+
- id: autodock_vina_rescore
51+
in:
52+
input_ligand_pdbqt_path: !* ligand_prod.pdbqt
53+
input_receptor_pdbqt_path: !* protein_prod.pdbqt
54+
score_only: !ii True
55+
#local_only: !ii True
56+
out:
57+
- output_log_path: !& vina_rescore.log
58+
- docking_score: !& docking_rescores
5959

6060
wic:
6161
graphviz:
@@ -81,7 +81,7 @@ wic:
8181
wic:
8282
graphviz:
8383
label: Correct atom\nelement types
84-
(6, convert_mol2):
84+
(6, convert_mol2):
8585
wic:
8686
graphviz:
8787
label: Assign Partial\nCharges
Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
11
steps:
2-
- python3_mol2_to_pdbqt:
3-
in:
4-
script: !ii /mol2_to_pdbqt.py # NOTE: Initial / required
5-
input_mol2_path: !* conformer.mol2
6-
# out:
7-
# - output_pdb_path: !& ligand_periods.pdbqt
8-
- bash_pdb:
9-
in:
10-
# MDAnalysis adds a . to the final atomtype column, which causes autodock to crash.
11-
script: !ii /pdbqt_remove_trailing_period.sh # NOTE: Initial / required
12-
# input_pdb_path: !* ligand_periods.pdbqt
13-
# out:
14-
# - output_pdb_path: !& ligand_keywords.pdbqt
15-
- bash_pdb:
16-
in:
17-
# Remove the MODEL and ENDMDL keywords.
18-
# Even if there is actually only one model, autodock simply looks for MODEL and ENDMDL.
19-
# Unfortunately, all of the errors crash with the same cryptic message:
20-
# Parse error on line ... in file ".../ligand.pdbqt": Unknown or inappropriate tag
21-
script: !ii /pdbqt_remove_keywords.sh # NOTE: Initial / required
22-
# input_pdb_path: !* ligand_keywords.pdbqt
23-
# out:
24-
# - output_pdb_path: !& ligand_temp.pdbqt
25-
- bash_pdb:
26-
in:
27-
# Add ROOT, ENDROOT, TORSDOF keywords (if necessary).
28-
# Unfortunately, all of the errors crash with the same cryptic message:
29-
# Parse error on line ... in file ".../ligand.pdbqt": Unknown or inappropriate tag
30-
script: !ii /pdbqt_remove_flex.sh # NOTE: Initial / required
2+
- id: python3_mol2_to_pdbqt
3+
in:
4+
script: !ii /mol2_to_pdbqt.py # NOTE: Initial / required
5+
input_mol2_path: !* conformer.mol2
6+
# out:
7+
# - output_pdb_path: !& ligand_periods.pdbqt
8+
- id: bash_pdb
9+
in:
10+
# MDAnalysis adds a . to the final atomtype column, which causes autodock to crash.
11+
script: !ii /pdbqt_remove_trailing_period.sh # NOTE: Initial / required
12+
# input_pdb_path: !* ligand_periods.pdbqt
13+
# out:
14+
# - output_pdb_path: !& ligand_keywords.pdbqt
15+
- id: bash_pdb
16+
in:
17+
# Remove the MODEL and ENDMDL keywords.
18+
# Even if there is actually only one model, autodock simply looks for MODEL and ENDMDL.
19+
# Unfortunately, all of the errors crash with the same cryptic message:
20+
# Parse error on line ... in file ".../ligand.pdbqt": Unknown or inappropriate tag
21+
script: !ii /pdbqt_remove_keywords.sh # NOTE: Initial / required
22+
# input_pdb_path: !* ligand_keywords.pdbqt
23+
# out:
24+
# - output_pdb_path: !& ligand_temp.pdbqt
25+
- id: bash_pdb
26+
in:
27+
# Add ROOT, ENDROOT, TORSDOF keywords (if necessary).
28+
# Unfortunately, all of the errors crash with the same cryptic message:
29+
# Parse error on line ... in file ".../ligand.pdbqt": Unknown or inappropriate tag
30+
script: !ii /pdbqt_remove_flex.sh # NOTE: Initial / required
3131
# input_pdb_path: !* ligand_temp.pdbqt
32-
out:
33-
- output_pdb_path: !& ligand_rigid.pdbqt
32+
out:
33+
- output_pdb_path: !& ligand_rigid.pdbqt

0 commit comments

Comments
 (0)