Merge pull request #52 from ndonyapour/glob_order_genconformers

jfennick · web-flow · commit 7f673a3bf215 · 2024-01-08T11:38:18.000-10:00
fix glob order for gen_conformers
diff --git a/.github/workflows/docker_build.yml b/.github/workflows/docker_build.yml
@@ -26,7 +26,7 @@ jobs:
                      rename_residues_mol, combine_structure,
                      remove_terminal_residue_name_prefixes, molgan,
                      pdbbind_refined, onionnet-sfct, smina, pdbfixer,
-                     fix_pdb_atom_column, extract_protein]  # No username for pdbind_refined
+                     fix_pdb_atom_column, extract_protein, generate_conformers]  # No username for pdbind_refined
         # skip data/ and cwl_adapters/file_format_conversions/biosimspace/
     runs-on: [ubuntu-latest]
 
diff --git a/cwl_adapters/generate_conformers.cwl b/cwl_adapters/generate_conformers.cwl
@@ -0,0 +1,205 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+
+class: CommandLineTool
+
+label: Download the PDBbind refined database
+
+doc: |-
+  Download the PDBbind refined database
+
+baseCommand: ['python3', '/generate_conformers.py']
+
+hints:
+  DockerRequirement:
+    dockerPull: ndonyapour/generate_conformers
+
+requirements:
+  InlineJavascriptRequirement: {}
+
+inputs:
+  input_excel_path:
+    label: Path to the input xlsx file
+    type: File
+    format:
+    - edam:format_3620
+    inputBinding:
+      prefix: --input_excel_path
+
+  query:
+    label: query str to search the dataset
+    doc: |-
+      query str to search the dataset
+      Type: string
+      File type: input
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --query
+
+  output_txt_path:
+    label: Path to the text dataset file
+    doc: |-
+      Path to the text dataset file
+      Type: string
+      File type: output
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --output_txt_path
+    default: system.log
+
+  output_sdf_path:
+    label: Path to the input file
+    doc: |-
+      Path to the input file
+      Type: string
+      File type: input
+      Accepted formats: sdf
+    type: string
+    format:
+    - edam:format_3814 # sdf
+
+  min_row:
+    label: The row min index
+    doc: |-
+      The row min inex
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --min_row
+
+  max_row:
+    label: The row max index
+    doc: |-
+      The row max inex
+      Type: int
+    type: int?
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --max_row
+
+  smiles_column:
+    label: The name of the smiles column
+    doc: |-
+      The name of the smiles column
+      Type: string
+      File type: output
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --smiles_column
+
+  binding_data_column:
+    label: The name of the binding data column
+    doc: |-
+      The name of the binding data column
+      Type: string
+      File type: output
+      Accepted formats: txt
+    type: string
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --binding_data_column
+
+  convert_Kd_dG:
+    label: If this is set to true, dG will be calculated
+    doc: If this is set to true, dG will be calculated  
+    type: boolean
+    format:
+    - edam:format_2330
+    inputBinding:
+      prefix: --convert_Kd_dG
+    default: False
+
+  experimental_dGs:
+    label: Experimental Free Energies of Binding
+    doc: |-
+      Experimental Free Energies of Binding
+    type: string?
+    format:
+    - edam:format_2330
+
+outputs:
+  output_txt_path:
+    label: Path to the txt file
+    doc: |-
+      Path to the txt file
+    type: File
+    outputBinding:
+      glob: $(inputs.output_txt_path)
+    format: edam:format_2330
+
+  output_sdf_path:
+    label: Path to the input file
+    doc: |-
+      Path to the input file
+      Type: string
+      File type: input
+      Accepted formats: sdf
+    type: File[]
+    outputBinding:
+      # NOTE: Do NOT just use glob: ./*.sdf !!! This will return an array sorted by filenames.
+      # We want the order of output_sdf_paths to match the order of experimental_dGs, etc
+      # Because we need to compare experimental ΔGs with predicted values.
+      glob: $(inputs.output_txt_path)
+      loadContents: true
+      outputEval: |
+        ${
+          var lines = self[0].contents.split("\n");
+          var sdfs = [];
+          for (var idx = 0; idx < lines.length; idx++) {
+            var words = lines[idx].split(" ");
+            var sdffile = {"class": "File", "path": "ligand_" + idx + ".sdf"};
+            sdfs.push(sdffile);
+            }
+            
+          return sdfs;
+        }
+    format: edam:format_3814
+
+  experimental_dGs:
+    label: Experimental Free Energies of Binding
+    doc: |-
+      Experimental Free Energies of Binding
+    type: float[]
+    outputBinding:
+      # NOTE: Do NOT just use $(inputs.output_txt_path) !!! This will return an array sorted by filenames.
+      # We want the order of output_sdf_paths to match the order of experimental_dGs, etc
+      # Because we need to compare experimental ΔGs with predicted values.
+      glob: $(inputs.output_txt_path)
+      loadContents: true
+      outputEval: |
+        ${
+          var lines = self[0].contents.split("\n");
+          var experimental_dGs = [];
+          for (var i = 0; i < lines.length; i++) {
+            var words = lines[i].split(" ");
+            if (words.length > 2) {
+              var experimental_dG = parseFloat(words[2]);
+              experimental_dGs.push(experimental_dG);
+            }
+          }
+
+          if (experimental_dGs.length == 0) {
+            throw new Error("Error! Experimental dGs are empty!");
+          } else {
+            return experimental_dGs;
+          }
+        }
+
+$namespaces:
+  edam: https://edamontology.org/
+
+$schemas:
+- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
diff --git a/docker/dockerBuild.sh b/docker/dockerBuild.sh
@@ -23,7 +23,6 @@ sudo docker build --no-cache --pull -f Dockerfile_autodock_vina -t jakefennick/a
 sudo docker build --no-cache --pull -f Dockerfile_autodock_vina_filter -t jakefennick/autodock_vina_filter .
 sudo docker build --no-cache --pull -f Dockerfile_bash_scripts -t jakefennick/bash_scripts .
 sudo docker build --no-cache --pull -f Dockerfile_calculate_net_charge -t jakefennick/calculate_net_charge .
-sudo docker build --no-cache --pull -f Dockerfile_generate_conformers -t jakefennick/generate_conformers .
 sudo docker build --no-cache --pull -f Dockerfile_mol2_to_pdbqt -t jakefennick/mol2_to_pdbqt .
 sudo docker build --no-cache --pull -f Dockerfile_nmr4md -t jakefennick/nmr4md .
 sudo docker build --no-cache --pull -f Dockerfile_openbabel -t jakefennick/openbabel .
@@ -35,7 +34,8 @@ sudo docker build --no-cache --pull -f Dockerfile_onionnet-sfct -t cyangnyu/onio
 sudo docker build --no-cache --pull -f Dockerfile_smina -t cyangnyu/smina .
 sudo docker build --no-cache --pull -f Dockerfile_pdb_fixer -t ndonyapour/pdbfixer .
 sudo docker build --no-cache --pull -f Dockerfile_extract_protein -t ndonyapour/extract_protein .
-sudo docker build --no-cache --pull -f Dockerfile_extract_protein -t ndonyapour/fix_pdb_atom_column .
+sudo docker build --no-cache --pull -f Dockerfile_fix_pdb_atom_column -t ndonyapour/fix_pdb_atom_column .
+sudo docker build --no-cache --pull -f Dockerfile_generate_conformers -t ndonyapour/generate_conformers .
 
 sudo docker build --no-cache --pull -f Dockerfile_pdbbind_refined -t pdbbind_refined_v2020 .  # NOTE: no username
 cd ../..
@@ -45,4 +45,4 @@ sudo docker build --no-cache --pull -f Dockerfile_diffdock_cpu -t mrbrandonwalke
 sudo docker build --no-cache --pull -f Dockerfile_diffdock_gpu -t mrbrandonwalker/diffdock_gpu .
 sudo docker build --no-cache --pull -f Dockerfile_rmsd_pose_cluster -t mrbrandonwalker/rmsd_pose_cluster .
 sudo docker build --no-cache --pull -f Dockerfile_rank_diffdock_poses -t mrbrandonwalker/rank_diffdock_poses .
-cd ../..
+cd ../..
diff --git a/dockerPull.sh b/dockerPull.sh
@@ -28,4 +28,5 @@ docker pull mrbrandonwalker/diffdock_gpu
 docker pull mrbrandonwalker/diffdock_cpu
 docker pull ndonyapour/pdbfixer
 docker pull ndonyapour/extract_protein
-docker pull ndonyapour/fix_pdb_atom_column
+docker pull ndonyapour/fix_pdb_atom_column
+docker pull ndonyapour/generate_conformers
diff --git a/examples/docking/download_smiles_ligand_db.yml b/examples/docking/download_smiles_ligand_db.yml
@@ -10,24 +10,22 @@ inputs:
   binding_data_column:
     type: string
   convert_Kd_dG:
-    type: string
+    type: boolean
   output_txt_path:
     type: string
 
 outputs:
   output_txt_path:
     type: File
     format: edam:format_2330
-    outputSource: download_smiles_ligand_db__step__2__python_script/output_txt_path
+    outputSource: download_smiles_ligand_db__step__2__generate_conformers/output_txt_path
 
 steps:
 - wget_xlsx:
     in:
       url: ~path
-- python_script:
+- generate_conformers:
     in:
-      script: ../scripts/generate_conformers.py
-      dockerPull: jakefennick/generate_conformers
       #input_excel_path: # inferred
       # query syntax: `column name` 'column value'
       query: ~query #"`Standard Type` == 'Kd' and `duplicate-type-classifier` == 'unique'"
@@ -47,7 +45,7 @@ wic:
       wic:
         graphviz:
           label: Download Excel File
-    (2, python_script):
+    (2, generate_conformers):
       wic:
         graphviz:
           label: Query Spreadsheet\nGenerate Conformers
diff --git a/examples/docking/vs_demo_2.yml b/examples/docking/vs_demo_2.yml
@@ -24,7 +24,7 @@ steps:
       max_row: 1 #25 # Use 1 for CI
       smiles_column: SMILES
       binding_data_column: Standard Value
-      convert_Kd_dG: 'True'
+      convert_Kd_dG: True
       output_txt_path: '&binding_data.txt'
 
 - assign_partial_charges.yml:
diff --git a/examples/docking/vs_demo_3.yml b/examples/docking/vs_demo_3.yml
@@ -27,7 +27,7 @@ steps:
       max_row: 1 #25 # Use 1 for CI
       smiles_column: SMILES
       binding_data_column: Standard Value
-      convert_Kd_dG: 'True'
+      convert_Kd_dG: True
       output_txt_path: '&binding_data.txt'
 
 - ligand_modeling_docking.yml:
diff --git a/examples/scripts/Dockerfile_generate_conformers b/examples/scripts/Dockerfile_generate_conformers
@@ -4,7 +4,4 @@ FROM condaforge/mambaforge
 RUN mamba install -c conda-forge pandas rdkit openpyxl xorg-libxrender
 
 ADD generate_conformers.py .
-ADD python_cwl_driver.py .
-ADD workflow_types.py .
-
 ADD Dockerfile_generate_conformers .
diff --git a/examples/scripts/generate_conformers.py b/examples/scripts/generate_conformers.py