diff --git a/faqs/galaxy/workflows_run.md b/faqs/galaxy/workflows_run.md index b71f5936bfde56..cb3708622466ff 100644 --- a/faqs/galaxy/workflows_run.md +++ b/faqs/galaxy/workflows_run.md @@ -3,18 +3,19 @@ title: Running a workflow area: workflows box_type: tip layout: faq -contributors: [shiltemann,hexylena,wm75, Marie59] +contributors: [shiltemann,hexylena,wm75, Marie59,katherine-d21] --- -1. Click on **Workflows** on the vertical panel on the left of the **Galaxy action list** -2. At the top of the resulting page you will have the option to switch between the *My workflows*, *Workflows shared with me* and *Public workflows* tabs. Select the tab {% if include.tab %}`{{ include.tab }}`{% else %} you want to see all workflows in that category.{% endif %}. -3. Search for {% if include.name %}`{{ include.name }}`{% else %} your desired workflow {% endif %}. +1. Click on **Workflows** on the vertical panel on the left of the **Galaxy action list**. +2. At the top of the resulting page you will have the option to switch between the *My workflows*, *Workflows shared with me* and *Public workflows* tabs. +3. {% if include.tab %}Select the tab `{{ include.tab }}`{% else %}Select the tab you want to see all workflows in that category{% endif %} +4. {% if include.name %}Search workflows `{{ include.name }}`{% else %}Search for your desired workflow{% endif %}. ![Select workflow]({% link topics/climate/images/bgc_calib/bgc_workflow.png %}){:width="15%"} -4. Click on the workflow name: a pop-up window opens with a preview of the workflow -5. To run it directly: click **Run** (top-right) +5. Click on the workflow name: a pop-up window opens with a preview of the workflow. +6. To run it directly: click **Run** (top-right). -6. **Recommended**: click **Import** (left of Run) to make your own local copy under *Workflows / My Workflows* +7. **Recommended**: click **Import** (left of Run) to make your own local copy under *Workflows / My Workflows*. You may have to refresh your history to see the queued jobs diff --git a/topics/proteomics/images/neoantigen/pvac.PNG b/topics/proteomics/images/neoantigen/pvac.PNG new file mode 100644 index 00000000000000..08b041a830f726 Binary files /dev/null and b/topics/proteomics/images/neoantigen/pvac.PNG differ diff --git a/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/tutorial.md b/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/tutorial.md index 690b62ae9fb8b9..df08ff28c178af 100644 --- a/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/tutorial.md @@ -319,10 +319,8 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > Running the Workflow > -> 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/main_workflow.ga" title="Fragpipe Discovery" %} -> +> 1. **Import the workflow** into Galaxy: +> - (Neoantigen Database Search)[https://tinyurl.com/ipepgen-dbsearch-wf] > > 2. Run **Workflow** {% icon workflow %} using the following parameters: > - *"Send results to a new history"*: `No` @@ -333,6 +331,13 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > > {% snippet faqs/galaxy/workflows_run.md %} > +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} +> +> {: .hands_on} # Are you feeling adventurous? ✨ diff --git a/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/index.md b/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/index.md index 878fc5bc6c1049..e8903a89037d0e 100644 --- a/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/index.md @@ -3,4 +3,5 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-3-fragpipe-discovery/workflows/ + - /topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/main_workflow.ga deleted file mode 100644 index 45beab8b8e78b7..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-fragpipe-discovery/workflows/main_workflow.ga +++ /dev/null @@ -1,571 +0,0 @@ -{ - "a_galaxy_workflow": "true", - "annotation": "Merging Fusion and non-normal databases + Discovery peptidomics using FragPipe", - "comments": [], - "creator": [ - { - "class": "Organization", - "name": "GalaxyP" - } - ], - "format-version": "0.1", - "license": "CC-BY-4.0", - "name": "GigaScience_Database_merge_FragPipe_STS26T_demonstration", - "report": { - "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" - }, - "steps": { - "0": { - "annotation": "Non-Normal databases i.e CustomProDB and StringTie", - "content_id": null, - "errors": null, - "id": 0, - "input_connections": {}, - "inputs": [ - { - "description": "Non-Normal databases i.e CustomProDB and StringTie", - "name": "Human_cRAP_Non_normal_transcripts_dB" - } - ], - "label": "Human_cRAP_Non_normal_transcripts_dB", - "name": "Input dataset", - "outputs": [], - "position": { - "left": 5.44921875, - "top": 165.39453125 - }, - "tool_id": null, - "tool_state": "{\"optional\": false, \"tag\": null}", - "tool_version": null, - "type": "data_input", - "uuid": "af3ddcc5-3642-4efb-8b46-4e7b1dfc9796", - "when": null, - "workflow_outputs": [] - }, - "1": { - "annotation": "Arriba Fusion db", - "content_id": null, - "errors": null, - "id": 1, - "input_connections": {}, - "inputs": [ - { - "description": "Arriba Fusion db", - "name": "Fusion-database" - } - ], - "label": "Fusion-database", - "name": "Input dataset", - "outputs": [], - "position": { - "left": 0, - "top": 321.82421875 - }, - "tool_id": null, - "tool_state": "{\"optional\": false, \"tag\": null}", - "tool_version": null, - "type": "data_input", - "uuid": "0d7a5ea0-199d-4a9b-bf79-6e014a70300b", - "when": null, - "workflow_outputs": [] - }, - "2": { - "annotation": "Input RAW file", - "content_id": null, - "errors": null, - "id": 2, - "input_connections": {}, - "inputs": [ - { - "description": "Input RAW file", - "name": "Input-RAW-FILE" - } - ], - "label": "Input-RAW-FILE", - "name": "Input dataset", - "outputs": [], - "position": { - "left": 560.0390625, - "top": 0 - }, - "tool_id": null, - "tool_state": "{\"optional\": false, \"tag\": null}", - "tool_version": null, - "type": "data_input", - "uuid": "4d0767bf-8a04-4882-b167-6d1087865845", - "when": null, - "workflow_outputs": [] - }, - "3": { - "annotation": "Design file", - "content_id": null, - "errors": null, - "id": 3, - "input_connections": {}, - "inputs": [ - { - "description": "Design file", - "name": "FragPipe_Experimental_design_tabular" - } - ], - "label": "FragPipe_Experimental_design_tabular", - "name": "Input dataset", - "outputs": [], - "position": { - "left": 560.0390625, - "top": 123 - }, - "tool_id": null, - "tool_state": "{\"optional\": false, \"tag\": null}", - "tool_version": null, - "type": "data_input", - "uuid": "0cfde86b-feba-4329-935f-56e0c3c9940a", - "when": null, - "workflow_outputs": [] - }, - "4": { - "annotation": "combine Fusion and Non Normal databases", - "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0", - "errors": null, - "id": 4, - "input_connections": { - "batchmode|input_fastas_0|input_fasta": { - "id": 0, - "output_name": "output" - }, - "batchmode|input_fastas_1|input_fasta": { - "id": 1, - "output_name": "output" - } - }, - "inputs": [], - "label": "Merge_non-normal_fusion_FASTA-files", - "name": "FASTA Merge Files and Filter Unique Sequences", - "outputs": [ - { - "name": "output", - "type": "fasta" - } - ], - "position": { - "left": 280.0390625, - "top": 178 - }, - "post_job_actions": { - "RenameDatasetActionoutput": { - "action_arguments": { - "newname": "Non_normal_and_Fusion_dB" - }, - "action_type": "RenameDatasetAction", - "output_name": "output" - } - }, - "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0", - "tool_shed_repository": { - "changeset_revision": "f546e7278f04", - "name": "fasta_merge_files_and_filter_unique_sequences", - "owner": "galaxyp", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"accession_parser\": \"^>([^ ]+).*$\", \"batchmode\": {\"processmode\": \"individual\", \"__current_case__\": 0, \"input_fastas\": [{\"__index__\": 0, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}]}, \"uniqueness_criterion\": \"sequence\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "1.2.0", - "type": "tool", - "uuid": "1eaac96c-1249-4701-8dfc-923406422777", - "when": null, - "workflow_outputs": [ - { - "label": "Non_normal_and_Fusion_dB", - "output_name": "output", - "uuid": "19df6de4-6440-4693-b893-139213d4e07e" - } - ] - }, - "5": { - "annotation": "Take the Passed fasta file", - "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/validate_fasta_database/validate_fasta_database/0.1.5", - "errors": null, - "id": 5, - "input_connections": { - "inFasta": { - "id": 4, - "output_name": "output" - } - }, - "inputs": [], - "label": "Validation_of_Fasta_file", - "name": "Validate FASTA Database", - "outputs": [ - { - "name": "goodFastaOut", - "type": "fasta" - }, - { - "name": "badFastaOut", - "type": "fasta" - } - ], - "position": { - "left": 560.0390625, - "top": 246 - }, - "post_job_actions": { - "RenameDatasetActiongoodFastaOut": { - "action_arguments": { - "newname": "Validated-fasta" - }, - "action_type": "RenameDatasetAction", - "output_name": "goodFastaOut" - } - }, - "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/validate_fasta_database/validate_fasta_database/0.1.5", - "tool_shed_repository": { - "changeset_revision": "9c246c2e24ad", - "name": "validate_fasta_database", - "owner": "galaxyp", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"__input_ext\": \"fasta\", \"checkHasAccession\": false, \"checkIsProtein\": true, \"chromInfo\": \"/data/db/reference_genomes/hg38/len/hg38.len\", \"crashIfInvalid\": false, \"customLetters\": \"\", \"inFasta\": {\"__class__\": \"ConnectedValue\"}, \"minimumLength\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "0.1.5", - "type": "tool", - "uuid": "aea6d45e-5e6e-470e-89f4-02e0dc8900bc", - "when": null, - "workflow_outputs": [ - { - "label": "Validated-fasta", - "output_name": "goodFastaOut", - "uuid": "efefe5f3-7bb9-42fe-a39b-b220fcaf1add" - } - ] - }, - "6": { - "annotation": "Non tryptic fragpipe", - "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fragpipe/fragpipe/20.0+galaxy2", - "errors": null, - "id": 6, - "input_connections": { - "database_name": { - "id": 5, - "output_name": "goodFastaOut" - }, - "inputs": { - "id": 2, - "output_name": "output" - }, - "manifest": { - "id": 3, - "output_name": "output" - } - }, - "inputs": [], - "label": "Fragpipe", - "name": "FragPipe - Academic Research and Education User License (Non-Commercial)", - "outputs": [ - { - "name": "output_psm", - "type": "input" - }, - { - "name": "output_ion", - "type": "input" - }, - { - "name": "output_peptide", - "type": "input" - }, - { - "name": "output_protein", - "type": "input" - }, - { - "name": "combined_outputs", - "type": "input" - }, - { - "name": "output_workflow", - "type": "txt" - }, - { - "name": "log", - "type": "txt" - } - ], - "position": { - "left": 840.03125, - "top": 91.25 - }, - "post_job_actions": { - "ChangeDatatypeActionoutput_peptide": { - "action_arguments": { - "newtype": "tabular" - }, - "action_type": "ChangeDatatypeAction", - "output_name": "output_peptide" - }, - "DeleteIntermediatesActionoutput_psm": { - "action_arguments": {}, - "action_type": "DeleteIntermediatesAction", - "output_name": "output_psm" - }, - "RenameDatasetActionoutput_peptide": { - "action_arguments": { - "newname": "Fragpipe_output_peptide" - }, - "action_type": "RenameDatasetAction", - "output_name": "output_peptide" - }, - "RenameDatasetActionoutput_protein": { - "action_arguments": { - "newname": "Fragpipe_output_protein" - }, - "action_type": "RenameDatasetAction", - "output_name": "output_protein" - }, - "RenameDatasetActionoutput_psm": { - "action_arguments": { - "newname": "Fragpipe_output_psm" - }, - "action_type": "RenameDatasetAction", - "output_name": "output_psm" - } - }, - "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fragpipe/fragpipe/20.0+galaxy2", - "tool_shed_repository": { - "changeset_revision": "ef46866326ef", - "name": "fragpipe", - "owner": "galaxyp", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"database_name\": {\"__class__\": \"ConnectedValue\"}, \"database_options\": \"decoy_and_contam\", \"input_prefix\": \"\", \"inputs\": {\"__class__\": \"ConnectedValue\"}, \"license_agreements\": true, \"manifest\": {\"__class__\": \"ConnectedValue\"}, \"num_slices\": \"200\", \"output_options\": [\"workflow\", \"log\", \"combined_outputs\"], \"wf\": {\"workflow_name\": \"Nonspecific-HLA\", \"__current_case__\": 2, \"msfragger\": {\"search_tolerances\": {\"precursor\": {\"precursor_mass_units\": \"default\", \"__current_case__\": 0}, \"precursor_true\": {\"precursor_true_units\": \"default\", \"__current_case__\": 0}, \"fragment\": {\"fragment_mass_units\": \"ppm\", \"__current_case__\": 1, \"fragment_mass_tolerance\": \"20\"}, \"isotope_error\": null, \"calibrate_mass\": \"2\"}, \"digestion\": {\"digest\": {\"search_enzyme\": \"nonspecific\", \"__current_case__\": 11, \"search_enzyme_name\": \"nonspecific\", \"search_enzyme_cut\": \"-\", \"search_enzyme_nocut\": \"\"}, \"allowed_missed_cleavage\": \"2\", \"search_enzyme_sense\": \"C\", \"enzyme2\": {\"digest2\": \"no\", \"__current_case__\": 0}, \"num_enzyme_termini\": \"0\", \"digest_min_length\": \"7\", \"digest_max_length\": \"20\", \"digest_mass_range_min\": \"500.0\", \"digest_mass_range_max\": \"5000.0\"}, \"variable_modification\": {\"max_variable_mods_per_peptide\": \"3\", \"variable_mods_select\": [\"15.99491461956 M 3\"], \"variable_mods\": \"\", \"use_all_mods_in_first_search\": null, \"clip_nTerm_M\": null, \"allow_multiple_variable_mods_on_residue\": null, \"max_variable_mods_combinations\": \"5000\"}, \"static_modification\": {\"add_Cterm_peptide\": null, \"add_Nterm_peptide\": null, \"add_Cterm_protein\": null, \"add_Nterm_protein\": null, \"add_A_alanine\": null, \"add_R_arginine\": null, \"add_N_asparagine\": null, \"add_D_aspartic_acid\": null, \"add_C_cysteine\": null, \"add_E_glutamic_acid\": null, \"add_Q_glutamine\": null, \"add_G_glycine\": null, \"add_H_histidine\": null, \"add_I_isoleucine\": null, \"add_L_leucine\": null, \"add_K_lysine\": null, \"add_M_methionine\": null, \"add_F_phenylalanine\": null, \"add_P_proline\": null, \"add_S_serine\": null, \"add_T_threonine\": null, \"add_W_tryptophan\": null, \"add_Y_tyrosine\": null, \"add_V_valine\": null, \"add_B_user_amino_acid\": null, \"add_J_user_amino_acid\": null, \"add_O_user_amino_acid\": null, \"add_U_user_amino_acid\": null, \"add_X_user_amino_acid\": null, \"add_Z_user_amino_acid\": null}, \"glyco_labile\": {\"labile_search_mode\": \"off\", \"diagnostic_intensity_filter\": \"0.0\", \"min_sequence_matches\": \"2\", \"Y_type_masses\": \"\", \"diagnostic_fragments\": \"\", \"remainder_fragment_masses\": null}, \"mass_offsets\": {\"mass_offsets\": \"0\", \"restrict_deltamass_to\": \"all\"}, \"spectrum_processing\": {\"precursor_mass_mode\": \"corrected\", \"minimum_peaks\": null, \"use_topN_peaks\": null, \"minimum_ratio\": null, \"precursor\": {\"override_charge\": \"default\", \"__current_case__\": 0}, \"clear\": {\"clear_mz_range_min\": \"0.0\", \"clear_mz_range_max\": \"0.0\"}, \"remove_precursor_peak\": \"1\", \"remove_precursor_range_min\": \"-1.5\", \"remove_precursor_range_max\": \"1.5\", \"intensity_transform\": \"1\", \"check_spectral_files\": true, \"require_precursor\": true, \"reuse_dia_fragment_peaks\": false, \"activation_types\": \"all\"}, \"open_search\": {\"mass_diff_to_variable_mod\": \"0\", \"track_zero_topN\": null, \"zero_bin_accept_expect\": null, \"zero_bin_mult_expect\": null, \"add_topN_complementary\": null, \"delta_mass_exclude_ranges_min\": \"-1.5\", \"delta_mass_exclude_ranges_max\": \"3.5\", \"localize_delta_mass\": false}, \"modeling_output\": {\"min_fragments_modelling\": null, \"min_matched_fragments\": null, \"max_fragment_charge\": null, \"deisotope\": null, \"deneutralloss\": true, \"fragment_ion_series\": null}, \"advanced_options\": {\"output_format\": \"pepXML_pin\", \"output_report_topN\": \"1\", \"report_alternative_proteins\": true, \"output_max_expect\": \"50.0\", \"write_calibrated_mzml\": false, \"write_uncalibrated_mgf\": false, \"group_variable\": null}}, \"validation\": {\"validation_tab\": {\"run_validation\": \"true\", \"__current_case__\": 0, \"psm_validation\": {\"run_psm_validation\": \"percolator\", \"__current_case__\": 1, \"percolator\": {\"keep_tsv_files\": false, \"min_prob\": \"0.5\"}}, \"protein_prophet\": {\"run_protein_prophet\": \"true\", \"__current_case__\": 0, \"protein_prophet_opts\": {\"iprophet\": false, \"maxppmdiff\": \"2000000\", \"minprob\": \"0.05\", \"nonsp\": false, \"subgroups\": false, \"unmapped\": false}}, \"phi_report\": {\"run_phi_report\": \"true\", \"__current_case__\": 0, \"phi_report_opts\": {\"dont_use_prot_prophet_file\": false, \"print_decoys\": false}}}}, \"quant_ms1\": {\"label_free_quantification\": {\"label_free_quantification_run\": \"default\", \"__current_case__\": 0}}, \"ptms\": {\"ptm_shepherd\": {\"run_ptm_shepherd\": \"no\", \"__current_case__\": 1}}, \"quant_iso\": {\"isobaric_quantification\": {\"isobaric_quantification_step\": \"no\", \"__current_case__\": 1}}}, \"workflow_only\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "20.0+galaxy2", - "type": "tool", - "uuid": "5e35fd95-bb98-47a5-883d-f9ac43bf8dcc", - "when": null, - "workflow_outputs": [ - { - "label": "Fragpipe_output_peptide", - "output_name": "output_peptide", - "uuid": "80182cb4-4a70-4a50-a42f-0de196f6e95b" - }, - { - "label": "Fragpipe_output_protein", - "output_name": "output_protein", - "uuid": "6e4e993f-8d69-4008-9528-91ea2699ab02" - }, - { - "label": "Fragpipe_output_psm", - "output_name": "output_psm", - "uuid": "b1ac88a6-70fb-4a0c-bc52-aa4f37019c8d" - } - ] - }, - "7": { - "annotation": "collapse peptide report", - "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0", - "errors": null, - "id": 7, - "input_connections": { - "input_list": { - "id": 6, - "output_name": "output_peptide" - } - }, - "inputs": [], - "label": "Extract_Peptides_from_Fragpipe", - "name": "Collapse Collection", - "outputs": [ - { - "name": "output", - "type": "input" - } - ], - "position": { - "left": 1120.0390625, - "top": 203 - }, - "post_job_actions": { - "RenameDatasetActionoutput": { - "action_arguments": { - "newname": "Fragpipe-Peptide-Report" - }, - "action_type": "RenameDatasetAction", - "output_name": "output" - } - }, - "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0", - "tool_shed_repository": { - "changeset_revision": "90981f86000f", - "name": "collapse_collections", - "owner": "nml", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"filename\": {\"add_name\": false, \"__current_case__\": 1}, \"input_list\": {\"__class__\": \"ConnectedValue\"}, \"one_header\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", - "tool_version": "5.1.0", - "type": "tool", - "uuid": "33583c0d-a247-4a7a-a8c3-467f8343ad0a", - "when": null, - "workflow_outputs": [ - { - "label": "Fragpipe-Peptide-Report", - "output_name": "output", - "uuid": "a3548299-ee6a-4649-a741-0e934557daf3" - } - ] - }, - "8": { - "annotation": "removing anything that matches _HUMAN", - "content_id": "Grep1", - "errors": null, - "id": 8, - "input_connections": { - "input": { - "id": 7, - "output_name": "output" - } - }, - "inputs": [], - "label": "Removing_known_human_peptides", - "name": "Select", - "outputs": [ - { - "name": "out_file1", - "type": "input" - } - ], - "position": { - "left": 1399.8862609863281, - "top": 203.352294921875 - }, - "post_job_actions": { - "RenameDatasetActionout_file1": { - "action_arguments": { - "newname": "Removing_known_human_peptides" - }, - "action_type": "RenameDatasetAction", - "output_name": "out_file1" - } - }, - "tool_id": "Grep1", - "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"invert\": \"-v\", \"keep_header\": true, \"pattern\": \"(HUMAN)|(contam_)|(con_)\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}", - "tool_version": "1.0.4", - "type": "tool", - "uuid": "7724eab3-8d29-4235-a7a0-0a0f56443fef", - "when": null, - "workflow_outputs": [ - { - "label": "Removing_known_human_peptides", - "output_name": "out_file1", - "uuid": "275b4fb5-e4c2-48ce-a8d8-92c91b1831fc" - } - ] - }, - "9": { - "annotation": "", - "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", - "errors": null, - "id": 9, - "input_connections": { - "add_to_database|withdb": { - "id": 8, - "output_name": "out_file1" - } - }, - "inputs": [ - { - "description": "runtime parameter for tool Query Tabular", - "name": "add_to_database" - } - ], - "label": "Extracting_Peptide-Candidates", - "name": "Query Tabular", - "outputs": [ - { - "name": "output", - "type": "tabular" - } - ], - "position": { - "left": 1658.7463684082031, - "top": 199.19036865234375 - }, - "post_job_actions": { - "RenameDatasetActionoutput": { - "action_arguments": { - "newname": "Extracting_Peptide-Candidates" - }, - "action_type": "RenameDatasetAction", - "output_name": "output" - } - }, - "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", - "tool_shed_repository": { - "changeset_revision": "cf4397560712", - "name": "query_tabular", - "owner": "iuc", - "tool_shed": "toolshed.g2.bx.psu.edu" - }, - "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"ConnectedValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"Select distinct c1 from t1\", \"tables\": [], \"workdb\": \"workdb.sqlite\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}", - "tool_version": "3.3.2", - "type": "tool", - "uuid": "6213dc3b-f9f9-4221-9746-fb0ba11b7c1d", - "when": null, - "workflow_outputs": [ - { - "label": "Extracting_Peptide-Candidates", - "output_name": "output", - "uuid": "443d70da-5732-449a-89fb-c8fc094dbaee" - } - ] - }, - "10": { - "annotation": "", - "content_id": "Remove beginning1", - "errors": null, - "id": 10, - "input_connections": { - "input": { - "id": 9, - "output_name": "output" - } - }, - "inputs": [ - { - "description": "runtime parameter for tool Remove beginning", - "name": "input" - } - ], - "label": "Neoantigen_Peptide_Candidates_for_PepQuery", - "name": "Remove beginning", - "outputs": [ - { - "name": "out_file1", - "type": "input" - } - ], - "position": { - "left": 1934.2684020996094, - "top": 220.08160400390625 - }, - "post_job_actions": { - "RenameDatasetActionout_file1": { - "action_arguments": { - "newname": "Neoantigen_Peptide_Candidates_for_PepQuery" - }, - "action_type": "RenameDatasetAction", - "output_name": "out_file1" - } - }, - "tool_id": "Remove beginning1", - "tool_state": "{\"input\": {\"__class__\": \"RuntimeValue\"}, \"num_lines\": \"1\", \"__page__\": 0, \"__rerun_remap_job_id__\": null}", - "tool_version": "1.0.0", - "type": "tool", - "uuid": "f13f29b2-b78f-475c-99fd-3c5d9eb08686", - "when": null, - "workflow_outputs": [] - } - }, - "tags": [ - "name:neoantigen" - ], - "uuid": "07c81888-cfbc-44f0-ad9b-7eb83a0903a0", - "version": 5 -} diff --git a/topics/proteomics/tutorials/neoantigen-fusion-database-generation/tutorial.md b/topics/proteomics/tutorials/neoantigen-fusion-database-generation/tutorial.md index 7c5b0aeec788a1..02196e7118bc9b 100644 --- a/topics/proteomics/tutorials/neoantigen-fusion-database-generation/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-fusion-database-generation/tutorial.md @@ -346,6 +346,7 @@ Using regex (regular expressions) for find and replace is a powerful technique f > {% snippet faqs/galaxy/analysis_regular_expressions.md %} > > 2. Rename the output FASTA as `Arriba-Fusion-Database.fasta` +> {: .hands_on} @@ -360,8 +361,7 @@ To rerun this entire analysis at once, you can use our workflow. Below, we show > Running the Workflow > > 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/main_workflow.ga" title="Neoantigen Fusion Database Generation" %} +> - (Neoantigen Fusion Database Generation)[https://tinyurl.com/ipepgen-gene-fusion-wf] > > > 2. Run **Workflow** {% icon workflow %} using the following parameters: @@ -373,6 +373,12 @@ To rerun this entire analysis at once, you can use our workflow. Below, we show > > {% snippet faqs/galaxy/workflows_run.md %} > +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} +> {: .hands_on} # Are you feeling adventurous? ✨ diff --git a/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/index.md b/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/index.md index 8d114bebd28122..2b4958c2178515 100644 --- a/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/index.md @@ -3,4 +3,5 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-1-fusion-database-generation/workflows/ + - /topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/main_workflow.ga deleted file mode 100644 index ccbdd6e53c0cf5..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-fusion-database-generation/workflows/main_workflow.ga +++ /dev/null @@ -1 +0,0 @@ -{"a_galaxy_workflow": "true", "annotation": "Create a protein Fusion database through the Arriba workflow", "comments": [], "creator": [{"class": "Organization", "name": "GalaxyP"}], "format-version": "0.1", "license": "GPL-3.0-or-later", "name": "Gigascience_Fusions_demonstration_STS26T-Gent_Workflow", "report": {"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"}, "steps": {"0": {"annotation": "Forward strand", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "Forward strand", "name": "RNA-Seq_Reads_1"}], "label": "RNA-Seq_Reads_1", "name": "Input dataset", "outputs": [], "position": {"left": 7.078125, "top": 87.01698250584444}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "681f56f4-4c68-4e1c-9ebe-4de06a596bf6", "when": null, "workflow_outputs": []}, "1": {"annotation": "Reverse strand", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "Reverse strand", "name": "RNA-Seq_Reads_2"}], "label": "RNA-Seq_Reads_2", "name": "Input dataset", "outputs": [], "position": {"left": 0, "top": 269.45057625584445}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "4f7c21a7-e113-4b5a-96fa-3b4187f58094", "when": null, "workflow_outputs": []}, "2": {"annotation": "human GTF", "content_id": null, "errors": null, "id": 2, "input_connections": {}, "inputs": [{"description": "human GTF", "name": "human_reference_genome_annotation.gtf"}], "label": "human_reference_genome_annotation.gtf", "name": "Input dataset", "outputs": [], "position": {"left": 287.078125, "top": 494.01698250584445}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "c21d5411-5b06-4cfc-8cf7-649cb2fa7e83", "when": null, "workflow_outputs": []}, "3": {"annotation": "Reference HUMAN FASTA", "content_id": null, "errors": null, "id": 3, "input_connections": {}, "inputs": [{"description": "Reference HUMAN FASTA", "name": "human_reference_genome.fasta"}], "label": "human_reference_genome.fasta", "name": "Input dataset", "outputs": [], "position": {"left": 577.078125, "top": 422.01698250584445}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "dc93e22c-adab-441d-b10e-17c7f78dfaaa", "when": null, "workflow_outputs": []}, "4": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/arriba_get_filters/arriba_get_filters/2.4.0+galaxy1", "errors": null, "id": 4, "input_connections": {}, "inputs": [], "label": "Arriba-Get-Filters", "name": "Arriba Get Filters", "outputs": [{"name": "blacklist", "type": "tabular.gz"}, {"name": "known_fusions", "type": "tabular.gz"}, {"name": "protein_domains", "type": "gff3"}, {"name": "cytobands", "type": "tabular"}], "position": {"left": 577.078125, "top": 566.0169825058445}, "post_job_actions": {"RenameDatasetActionblacklist": {"action_arguments": {"newname": "Arriba-Get-Filters_blacklist"}, "action_type": "RenameDatasetAction", "output_name": "blacklist"}, "RenameDatasetActioncytobands": {"action_arguments": {"newname": "Arriba-Get-Filters_cytobands"}, "action_type": "RenameDatasetAction", "output_name": "cytobands"}, "RenameDatasetActionknown_fusions": {"action_arguments": {"newname": "Arriba-Get-Filters_known_fusions"}, "action_type": "RenameDatasetAction", "output_name": "known_fusions"}, "RenameDatasetActionprotein_domains": {"action_arguments": {"newname": "Arriba-Get-Filters_protein_domains"}, "action_type": "RenameDatasetAction", "output_name": "protein_domains"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/arriba_get_filters/arriba_get_filters/2.4.0+galaxy1", "tool_shed_repository": {"changeset_revision": "703a2cb3fb38", "name": "arriba_get_filters", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"456e7fe2f37911ee99c9001e67d2ec02\", \"arriba_reference_name\": \"GRCh38\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.4.0+galaxy1", "type": "tool", "uuid": "7e2bc58f-6422-4e6b-b882-b1f71d6ef236", "when": null, "workflow_outputs": [{"label": "Arriba-Get-Filters_protein_domains", "output_name": "protein_domains", "uuid": "bdb7b8fe-ad19-44b0-8824-c930809543eb"}, {"label": "Arriba-Get-Filters_cytobands", "output_name": "cytobands", "uuid": "d19553ea-c199-47b1-ad18-f7197de66fc7"}, {"label": "Arriba-Get-Filters_blacklist", "output_name": "blacklist", "uuid": "02a9718d-0153-4bf8-b7b9-17826109ede7"}, {"label": "Arriba-Get-Filters_known_fusions", "output_name": "known_fusions", "uuid": "75ae6bd6-5b30-4af1-bc91-2f2c798a44ef"}]}, "5": {"annotation": "Uncompressed RNA-Seq forward reads", "content_id": "CONVERTER_gz_to_uncompressed", "errors": null, "id": 5, "input_connections": {"input1": {"id": 0, "output_name": "output"}}, "inputs": [], "label": "Uncompressed-RNA-Seq-forward-reads", "name": "Convert compressed file to uncompressed.", "outputs": [{"name": "output1", "type": "auto"}], "position": {"left": 287.078125, "top": 41.26698250584444}, "post_job_actions": {"ChangeDatatypeActionoutput1": {"action_arguments": {"newtype": "fastqsanger"}, "action_type": "ChangeDatatypeAction", "output_name": "output1"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "Uncompressed-RNA-Seq-forward-reads"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "CONVERTER_gz_to_uncompressed", "tool_state": "{\"__input_ext\": \"input\", \"__target_datatype__\": \"fastqsanger\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "aaf9e0e8-1799-427e-ab29-fa6fa325490d", "when": null, "workflow_outputs": [{"label": "Uncompressed-RNA-Seq-forward-reads", "output_name": "output1", "uuid": "ad7de653-861f-4fe5-961b-0ec2bd2485dc"}]}, "6": {"annotation": "Uncompressed RNA-Seq reverse reads", "content_id": "CONVERTER_gz_to_uncompressed", "errors": null, "id": 6, "input_connections": {"input1": {"id": 1, "output_name": "output"}}, "inputs": [], "label": "Uncompressed-RNA-Seq-reverse-reads", "name": "Convert compressed file to uncompressed.", "outputs": [{"name": "output1", "type": "auto"}], "position": {"left": 263.3203125, "top": 236.32167000584445}, "post_job_actions": {"ChangeDatatypeActionoutput1": {"action_arguments": {"newtype": "fastqsanger"}, "action_type": "ChangeDatatypeAction", "output_name": "output1"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "Uncompressed-RNA-Seq-reverse-reads"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "CONVERTER_gz_to_uncompressed", "tool_state": "{\"__input_ext\": \"input\", \"__target_datatype__\": \"fastqsanger\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "96158d24-cffb-4ecb-b01c-76b77c6ae97b", "when": null, "workflow_outputs": [{"label": "Uncompressed-RNA-Seq-reverse-reads", "output_name": "output1", "uuid": "30fe35ba-5af2-4f36-ab8a-7f0099253882"}]}, "7": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.10b+galaxy4", "errors": null, "id": 7, "input_connections": {"refGenomeSource|GTFconditional|sjdbGTFfile": {"id": 2, "output_name": "output"}, "singlePaired|input1": {"id": 5, "output_name": "output1"}, "singlePaired|input2": {"id": 6, "output_name": "output1"}}, "inputs": [{"description": "runtime parameter for tool RNA STAR", "name": "singlePaired"}, {"description": "runtime parameter for tool RNA STAR", "name": "singlePaired"}], "label": "RNA_STAR", "name": "RNA STAR", "outputs": [{"name": "output_log", "type": "txt"}, {"name": "splice_junctions", "type": "interval"}, {"name": "mapped_reads", "type": "bam"}], "position": {"left": 593.9446115032721, "top": 0}, "post_job_actions": {"RenameDatasetActionmapped_reads": {"action_arguments": {"newname": "RNA_STAR_mapped_reads"}, "action_type": "RenameDatasetAction", "output_name": "mapped_reads"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.10b+galaxy4", "tool_shed_repository": {"changeset_revision": "79de45b5069b", "name": "rgrnastar", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"456e7fe2f37911ee99c9001e67d2ec02\", \"algo\": {\"params\": {\"settingsType\": \"star_fusion\", \"__current_case__\": 1}}, \"chimOutType\": \"WithinBAM SoftClip\", \"chromInfo\": \"/data/db/reference_genomes/hg38/len/hg38.len\", \"filter\": {\"basic_filters\": null, \"output_params2\": {\"output_select2\": \"no\", \"__current_case__\": 1}}, \"oformat\": {\"outSAMattributes\": [\"NH\", \"HI\", \"AS\", \"nM\", \"ch\"], \"HI_offset\": \"1\", \"outSAMprimaryFlag\": \"OneBestScore\", \"outSAMmapqUnique\": \"60\"}, \"outWig\": {\"outWigType\": \"None\", \"__current_case__\": 0, \"outWigStrand\": \"false\"}, \"perf\": {\"outBAMsortingBinsN\": \"50\", \"winAnchorMultimapNmax\": \"50\"}, \"refGenomeSource\": {\"geneSource\": \"indexed\", \"__current_case__\": 0, \"GTFconditional\": {\"GTFselect\": \"without-gtf-with-gtf\", \"__current_case__\": 1, \"genomeDir\": \"hg38\", \"sjdbGTFfile\": {\"__class__\": \"ConnectedValue\"}, \"sjdbGTFfeatureExon\": \"exon\", \"sjdbOverhang\": \"100\", \"quantmode_output\": {\"quantMode\": \"-\", \"__current_case__\": 0}}}, \"singlePaired\": {\"sPaired\": \"paired\", \"__current_case__\": 1, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}}, \"twopass\": {\"twopassMode\": \"Basic\", \"__current_case__\": 1, \"twopass_read_subset\": \"\", \"sj_precalculated\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.7.10b+galaxy4", "type": "tool", "uuid": "e4053668-7c87-4709-b541-9bc1b7fdbd61", "when": null, "workflow_outputs": [{"label": "RNA_STAR_mapped_reads", "output_name": "mapped_reads", "uuid": "8002eb14-9ce0-4767-8e64-ad5d8af17f25"}]}, "8": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/arriba/arriba/2.4.0+galaxy1", "errors": null, "id": 8, "input_connections": {"blacklist": {"id": 4, "output_name": "blacklist"}, "genome_gtf|annotation": {"id": 2, "output_name": "output"}, "genome|assembly": {"id": 3, "output_name": "output"}, "input": {"id": 7, "output_name": "mapped_reads"}, "known_fusions": {"id": 4, "output_name": "known_fusions"}, "protein_domains": {"id": 4, "output_name": "protein_domains"}, "visualization|cytobands": {"id": 4, "output_name": "cytobands"}}, "inputs": [{"description": "runtime parameter for tool Arriba", "name": "genome"}, {"description": "runtime parameter for tool Arriba", "name": "genome_gtf"}, {"description": "runtime parameter for tool Arriba", "name": "visualization"}], "label": "Arriba", "name": "Arriba", "outputs": [{"name": "fusions_tsv", "type": "tabular"}, {"name": "discarded_fusions_tsv", "type": "tabular"}, {"name": "fusions_vcf", "type": "vcf"}, {"name": "fusions_pdf", "type": "pdf"}], "position": {"left": 854.2890625, "top": 230.89979500584445}, "post_job_actions": {"RenameDatasetActionfusions_tsv": {"action_arguments": {"newname": "Arriba-Fusions-tsv"}, "action_type": "RenameDatasetAction", "output_name": "fusions_tsv"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/arriba/arriba/2.4.0+galaxy1", "tool_shed_repository": {"changeset_revision": "01ccd366690d", "name": "arriba", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"456e7fe2f37911ee99c9001e67d2ec02\", \"blacklist\": {\"__class__\": \"ConnectedValue\"}, \"chimeric\": null, \"chromInfo\": \"/data/db/reference_genomes/hg38/len/hg38.len\", \"genome\": {\"genome_source\": \"history\", \"__current_case__\": 0, \"assembly\": {\"__class__\": \"ConnectedValue\"}}, \"genome_gtf\": {\"gtf_source\": \"history\", \"__current_case__\": 0, \"annotation\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"known_fusions\": {\"__class__\": \"ConnectedValue\"}, \"options\": {\"gtf_features\": \"\", \"strandedness\": null, \"genome_contigs\": \"\", \"viral_contigs\": \"\", \"filters\": null, \"max_evalue\": null, \"min_supporting_reads\": null, \"max_mismappers\": null, \"max_homolog_identity\": null, \"homopolymer_length\": null, \"read_through_distance\": null, \"min_anchor_length\": null, \"many_spliced_events\": null, \"max_kmer_content\": null, \"max_mismatch_pvalue\": null, \"fragment_length\": null, \"max_reads\": null, \"quantile\": null, \"exonic_fraction\": null, \"top_n\": null, \"covered_fraction\": null, \"max_itd_length\": null, \"min_itd_allele_fraction\": null, \"min_itd_supporting_reads\": null, \"duplicate_marking\": false, \"fill_discarded_columns\": false, \"fill_the_gaps\": false}, \"output_fusion_bams\": false, \"output_fusions_discarded\": true, \"output_fusions_vcf\": true, \"protein_domains\": {\"__class__\": \"ConnectedValue\"}, \"tags\": null, \"visualization\": {\"do_viz\": \"yes\", \"__current_case__\": 0, \"cytobands\": {\"__class__\": \"ConnectedValue\"}, \"options\": {\"sampleName\": \"\", \"transcriptSelection\": null, \"minConfidenceForCircosPlot\": null, \"squishIntrons\": null, \"showIntergenicVicinity\": \"\", \"mergeDomainsOverlappingBy\": null, \"printExonLabels\": null, \"render3dEffect\": null, \"optimizeDomainColors\": null, \"color1\": \"\", \"color2\": \"\", \"pdfWidth\": null, \"pdfHeight\": null, \"fontSize\": null, \"fontFamily\": \"\", \"fixedScale\": null, \"coverageRange\": \"\"}}, \"wgs_cond\": {\"use_wgs\": \"no\", \"__current_case__\": 1}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.4.0+galaxy1", "type": "tool", "uuid": "0751844a-837b-4ee1-80b3-a8493e5ebe1c", "when": null, "workflow_outputs": [{"label": "Arriba-Fusions-tsv", "output_name": "fusions_tsv", "uuid": "4cb6fdb6-b4c5-47b4-9dca-3a05a7a26c74"}]}, "9": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/1.1.2", "errors": null, "id": 9, "input_connections": {"infile": {"id": 8, "output_name": "fusions_tsv"}}, "inputs": [], "label": "Reformating Fusion data", "name": "Text reformatting", "outputs": [{"name": "outfile", "type": "input"}], "position": {"left": 1127.5155912109362, "top": 108.15760750584444}, "post_job_actions": {"RenameDatasetActionoutfile": {"action_arguments": {"newname": "Reformated_Fusion_data"}, "action_type": "RenameDatasetAction", "output_name": "outfile"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/1.1.2", "tool_shed_repository": {"changeset_revision": "ddf54b12c295", "name": "text_processing", "owner": "bgruening", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"__workflow_invocation_uuid__\": \"456e7fe2f37911ee99c9001e67d2ec02\", \"chromInfo\": \"/data/db/reference_genomes/hg38/len/hg38.len\", \"code\": \"(NR==1){\\n for (i=1;i<=NF;i++) {\\n if ($i ~ \\\"gene1\\\") { \\n gene1 = i;\\n }\\n if ($i == \\\"gene2\\\") { \\n gene2 = i;\\n }\\n if ($i == \\\"breakpoint1\\\") { \\n breakpoint1 = i;\\n }\\n if ($i == \\\"breakpoint2\\\") { \\n breakpoint2 = i;\\n }\\n if ($i == \\\"reading_frame\\\") { \\n reading_frame = i;\\n }\\n if ($i == \\\"peptide_sequence\\\") { \\n pscol = i;\\n }\\n }\\n}\\n(NR>1){\\n pseq = $pscol\\n if (pseq != \\\".\\\") {\\n bp = index(pseq,\\\"|\\\");\\n pos = bp - 8; \\n n=split(pseq,array,\\\"|\\\");\\n pep = toupper(array[1] array[2])\\n sub(\\\"[*]\\\",\\\"\\\",pep)\\n g1 = $gene1;\\n g2 = $gene2;\\n sub(\\\"[(,].*\\\",\\\"\\\",g1);\\n sub(\\\"[(,].*\\\",\\\"\\\",g2);\\n id = g1 \\\"_\\\" g2\\n brkpnts = $breakpoint1 \\\"_\\\" $breakpoint2 \\n neopep = substr(pep,pos)\\n if ($reading_frame == \\\"in-frame\\\") {\\n neopep = substr(pep,pos,16)\\n }\\n print(id \\\"\\\\t\\\" (NR-1) \\\"\\\\t\\\" brkpnts \\\"\\\\t\\\" neopep); \\n }\\n} \", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.2", "type": "tool", "uuid": "2c931d79-0814-4c68-a40b-21e7dc8bc96f", "when": null, "workflow_outputs": [{"label": "Reformated_Fusion_data", "output_name": "outfile", "uuid": "a8817c07-800d-4588-8c87-18ab611f85a8"}]}, "10": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.1", "errors": null, "id": 10, "input_connections": {"tables_0|table": {"id": 9, "output_name": "outfile"}}, "inputs": [], "label": "Table_generation_from_Fusion_data", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1213.6874662109362, "top": 271.80213875584445}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Table_generation_from_Fusion_data"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.1", "tool_shed_repository": {"changeset_revision": "3a6b78c39dca", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"__workflow_invocation_uuid__\": \"456e7fe2f37911ee99c9001e67d2ec02\", \"add_to_database\": {\"withdb\": null}, \"addqueries\": {\"queries\": []}, \"chromInfo\": \"/data/db/reference_genomes/hg38/len/hg38.len\", \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT t1.c1 || '__' || t1.c2 || '__' || t1.c3, t1.c4\\nFROM t1 \", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"c1,c2,c3,c4\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.1", "type": "tool", "uuid": "8bacfaac-e689-4fb5-8b9b-0905591403d5", "when": null, "workflow_outputs": [{"label": "Table_generation_from_Fusion_data", "output_name": "output", "uuid": "ab3ad7b0-6f43-4f0f-9e0f-758c721a03f0"}]}, "11": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "errors": null, "id": 11, "input_connections": {"input": {"id": 10, "output_name": "output"}}, "inputs": [], "label": "Converting_Tabular_to_Fasta", "name": "Tabular-to-FASTA", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 1275.8241849609362, "top": 490.32167000584445}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Converting_Tabular_to_Fasta"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "tool_shed_repository": {"changeset_revision": "0a7799698fe5", "name": "tabular_to_fasta", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"__workflow_invocation_uuid__\": \"456e7fe2f37911ee99c9001e67d2ec02\", \"chromInfo\": \"/data/db/reference_genomes/hg38/len/hg38.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"2\", \"title_col\": [\"1\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "a1607179-d186-466a-a83f-494519fb2778", "when": null, "workflow_outputs": [{"label": "Converting_Tabular_to_Fasta", "output_name": "output", "uuid": "e21178f2-6a4a-4174-9660-1af15ff7dbb4"}]}, "12": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.3", "errors": null, "id": 12, "input_connections": {"input": {"id": 11, "output_name": "output"}}, "inputs": [], "label": "Arriba-Fusion-Database", "name": "Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1331.8319974609362, "top": 669.5280403183431}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Arriba-Fusion-Database"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regex1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"checks\": [{\"__index__\": 0, \"pattern\": \">(\\\\b\\\\w+\\\\S+)(.*$)\", \"replacement\": \">generic|fusion_\\\\1|\\\\2\"}], \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "b4b60488-65c8-4182-8c3d-7858ec97836a", "when": null, "workflow_outputs": [{"label": "Arriba-Fusion-Database", "output_name": "out_file1", "uuid": "be2983fd-4ddf-4593-8b8d-933393eac469"}]}}, "tags": ["name:neoantigen"], "uuid": "ff5df560-54f4-4229-bb32-59bd55477e00", "version": 0} \ No newline at end of file diff --git a/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/tutorial.md b/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/tutorial.md index b4f00079fc5db9..5d5deb2c217399 100644 --- a/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/tutorial.md @@ -177,6 +177,37 @@ In this step, the Filter tool is used to refine the results from the previous IE > {: .question} +## Peptide Annotation Using FASTA-to-Tabular +The **FASTA-to-Tabular** converter transforms peptide FASTA sequences into a tabular format that is easier to filter, join, and annotate. This step is commonly used before submitting peptides to annotation tools such as for database lookups. + + +> Peptide annotation +> +> 1. {% tool [FASTA-to-Tabular](toolshed.g2.bx.psu.edu/repos/iuc/fasta_slider/fasta_to_tabular/1.1.1) %} with the following parameters: +> - {% icon param-file %} *"Convert these sequences"* → the peptide FASTA file +> - *"How many columns to divide title string into?"*: `1` +> - *"How many title characters to keep?"*: `0` +> +> This tool converts the peptide FASTA into a structured tabular format, preserving full header information in a single column. The resulting table provides clean sequence identifiers that can be linked to genomic, variant, and immunological metadata in later steps. +> +{: .hands_on} + +> +> +> 1. Why do we keep the entire FASTA title string as a single column? +> 2. How does converting FASTA to tabular format support downstream annotation workflows (e.g., HLA binding, variant mapping, epitope analysis)? +> +> > +> > +> > 1. A single-column title ensures that peptide identifiers remain intact and can be reliably matched across tools that expect exact header strings—especially important for workflows combining genomics and proteomics metadata. +> > 2. Many annotation tools require tabular inputs. Converting FASTA to tabular enables joins, filtering, SQL-based queries, and integration with peptide-centric metadata (variant origin, binding scores, validation status, etc.), allowing seamless downstream analysis. +> > +> {: .solution} +> +{: .question} + + + ## Refining Results Using Table Operations ### Pivoting the table to aggregate affinity scores @@ -207,85 +238,127 @@ Specifically: > {: .hands_on} +## Identifying Weak Neopeptides (SQLite Query) +The **Query Tabular (using sqlite sql)** tool extracts identifiers for weak-binding or otherwise filtered neopeptides from a tabular/SQLite-backed dataset. This step can be used to pull peptide IDs for downstream HLA-binding review, reporting, or separate IEDB-based workflows. -> Strong-Table Compute +> Weak peptides and HLA binding > -> 1. {% tool [Table Compute](toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0) %} with the following parameters: -> - *"Input Single or Multiple Tables"*: `Single Table` -> - {% icon param-file %} *"Table"*: `out_file1` (output of **Filter** {% icon tool %}) -> - *"Type of table operation"*: `Perform a full table operation` -> - *"Operation"*: `Pivot` -> - *"Index"*: `icore` -> - *"Column"*: `allele` -> - *"Values"*: `percentile_rank` -> - *"Aggregator Function"*: `Maximum` -> -> 2. Rename file as "IEDB Binding Affinity - Strong Peptides". +> 1. {% tool [Query Tabular: sqlite sql](toolshed.g2.bx.psu.edu/repos/galaxyp/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - {% icon param-file %} *"Tabular Dataset for Table"* (table1): `Peptide Annotation` (output of **FASTA-to-Tabular**) +> - {% icon param-file %} *"Tabular Dataset for Table"* (table2): `IEDB Binding Affinity - Weak Peptides` (output of **Table Compute - Weak peptides**) +> - *"Save the sqlite database in your history"*: `Yes` +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT t2.* +> FROM t1 +> JOIN t2 +> ON t1.c2 = t2.icore +> ``` > +> 2. Run the query to obtain a deduplicated, ordered list of weak-binding peptide identifiers for downstream review or submission to IEDB/HLA pipelines. +> {: .hands_on} +> Weak-binding neopeptides +> +> 1. {% tool [Query Tabular: sqlite sql](toolshed.g2.bx.psu.edu/repos/galaxyp/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - {% icon param-file %} *"Tabular Dataset for Table"* (table): `peptide_table_for_sql` (output of **Query Tabular - Weak peptides**) +> - *"Save the sqlite database in your history"*: `Yes` +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT t1.icore +> FROM t1 +> ORDER BY t1.icore +> ``` +> +> 2. Run the query to obtain a deduplicated, ordered list of weak-binding peptide identifiers for downstream review or submission to IEDB/HLA pipelines. +> +{: .hands_on} > > -> 1. Why is the Pivot operation used in this step? -> 2. Why do we use the 'Maximum' aggregator function? +> 1. What is the purpose of extracting `icore` in this query? +> 2. How can the output of this step be used in an IEDB workflow? > > > > > -> > 1. The Pivot operation is used to reorganize the data so that each allele is associated with the highest percentile rank (maximum value) for the peptides. This helps in focusing on the top-ranked peptides for each allele, ensuring that the analysis highlights the most promising candidates for immunotherapy or vaccine development. -> > 2. The 'Maximum' function ensures that, for each allele, we keep the peptide with the highest binding affinity (best percentile rank). This step prioritizes the most relevant peptides for further analysis, eliminating lower-affinity binders that are less likely to have therapeutic potential. +> > 1. `icore` typically represents a canonical peptide identifier used to join HLA-binding results with peptide metadata; extracting it enables targeted follow-up. +> > 2. The list can be used to submit only weak-binding or candidate peptides to separate IEDB prediction workflows or for manual curation and reporting. > > > {: .solution} > {: .question} -## Extract Peptide column from the tabular -> Weak Peptide extraction -> -> 1. {% tool [Cut](Cut1) %} with the following parameters: -> - *"Cut columns"*: `c1` -> - {% icon param-file %} *"From"*: `out_file1` (output of **Table Compute (Weak Peptides)** {% icon tool %}) -> -> -> -{: .hands_on} -> Strong Peptide extraction +> Strong-Table Compute > -> 1. {% tool [Cut](Cut1) %} with the following parameters: -> - *"Cut columns"*: `c1` -> - {% icon param-file %} *"From"*: `out_file1` (output of **Table Compute (Strong Peptides)** {% icon tool %}) +> 1. {% tool [Table Compute](toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0) %} with the following parameters: +> - *"Input Single or Multiple Tables"*: `Single Table` +> - {% icon param-file %} *"Table"*: `out_file1` (output of **Filter** {% icon tool %}) +> - *"Type of table operation"*: `Perform a full table operation` +> - *"Operation"*: `Pivot` +> - *"Index"*: `icore` +> - *"Column"*: `allele` +> - *"Values"*: `percentile_rank` +> - *"Aggregator Function"*: `Maximum` > +> 2. Rename file as "IEDB Binding Affinity - Strong Peptides". > {: .hands_on} -### Removing headers - -In this sub-step, the Remove Beginning tool is used to clean the data by removing unnecessary rows or headers from the start of the table. After performing the Pivot operation in the previous step, the dataset may include extra header rows or metadata that aren't needed for analysis. This tool helps streamline the data by removing these initial rows, ensuring that only relevant information remains for further processing. +## Identifying Strong Neopeptides (SQLite Query) +The **Query Tabular (using sqlite sql)** tool extracts identifiers for strong-binding or otherwise filtered neopeptides from a tabular/SQLite-backed dataset. This step can be used to pull peptide IDs for downstream HLA-binding review, reporting, or separate IEDB-based workflows. -By applying the Remove Beginning tool, the user ensures that any unwanted starting rows—such as those containing column names, labels, or metadata that might have been carried over from previous operations—are removed, leaving the dataset clean and ready for the next analysis step. - -> Weak- Remove beginning +> Strong peptides and HLA binding +> +> 1. {% tool [Query Tabular: sqlite sql](toolshed.g2.bx.psu.edu/repos/galaxyp/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - {% icon param-file %} *"Tabular Dataset for Table"* (table1): `Peptide Annotation` (output of **FASTA-to-Tabular**) +> - {% icon param-file %} *"Tabular Dataset for Table"* (table2): `IEDB Binding Affinity - Strong Peptides` (output of **Table Compute - Strong peptides**) +> - *"Save the sqlite database in your history"*: `Yes` +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT t2.* +> FROM t1 +> JOIN t2 +> ON t1.c2 = t2.icore +> ``` > -> 1. {% tool [Remove beginning](Remove beginning1) %} with the following parameters: -> - {% icon param-file %} *"from"*: `table` (output of **Cut (Weak peptides)** {% icon tool %}) +> 2. Run the query to obtain a deduplicated, ordered list of strong-binding peptide identifiers for downstream review or submission to IEDB/HLA pipelines. > -> 2. Rename file as "IEDB Predicted Weak Binding Peptides". +{: .hands_on} + +> Strong-binding neopeptides > +> 1. {% tool [Query Tabular: sqlite sql](toolshed.g2.bx.psu.edu/repos/galaxyp/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - {% icon param-file %} *"Tabular Dataset for Table"* (table): `peptide_table_for_sql` (output of **Query Tabular - Strong peptides**) +> - *"Save the sqlite database in your history"*: `Yes` +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT t1.icore +> FROM t1 +> ORDER BY t1.icore +> ``` > +> 2. Run the query to obtain a deduplicated, ordered list of strong-binding peptide identifiers for downstream review or submission to IEDB/HLA pipelines. +> {: .hands_on} -> Strong- Remove beginning +> > -> 1. {% tool [Remove beginning](Remove beginning1) %} with the following parameters: -> - {% icon param-file %} *"from"*: `table` (output of **Cut (Strong peptides)** {% icon tool %}) +> 1. Why is the Pivot operation used in this step? +> 2. Why do we use the 'Maximum' aggregator function? > -> 2. Rename file as "IEDB Predicted Strong Binding Peptides". +> > +> > +> > 1. The Pivot operation is used to reorganize the data so that each allele is associated with the highest percentile rank (maximum value) for the peptides. This helps in focusing on the top-ranked peptides for each allele, ensuring that the analysis highlights the most promising candidates for immunotherapy or vaccine development. +> > 2. The 'Maximum' function ensures that, for each allele, we keep the peptide with the highest binding affinity (best percentile rank). This step prioritizes the most relevant peptides for further analysis, eliminating lower-affinity binders that are less likely to have therapeutic potential. +> > +> {: .solution} > -> -{: .hands_on} +{: .question} + ## Annotation of Strong and Weak Binder Peptides @@ -321,18 +394,21 @@ This workflow is particularly relevant in the neoantigen discovery process, as i Given the increasing demand for personalized cancer treatments, this workflow represents a vital approach for accelerating the identification of clinically relevant neoantigens, thus advancing the field of cancer immunotherapy and personalized medicine. +The outputs from the iPepGen workflow, specifically the HLA allele genotypes and verified neoantigen candidate peptide sequences, serve as input to the pVACbind software suite, which we deployed as a Galaxy tool. pVACbind is part of the personalized Variant Antigens in Cancer tool suite (pVACtools). The pVACbind software bundles numerous complementary tools for predicting binding of input peptide sequences to HLA protein complexes (both class I and II) coded by specific allele genotypes, as well as two deep learning algorithms, DeepImmuno and BigMHC, for predicting immunogenicity of HLA alleles and bound peptides. The Galaxy tool outputs a tabular file with aggregated results across all of the algorithms selected by the user, as well as a filtered output showing only those peptide and allele combination scores indicating high potential for immunogenicity. The (pVACbind)[https://pvactools.readthedocs.io/en/latest/pvacbind.html] software is well documented and provides users with ample information about the bundled tools and how to interpret results for assisting in prioritizing peptides for further experimental testing as immunotherapy agents. + +![pvac-process]({% link topics/proteomics/images/neoantigen/pvac.PNG %}) # Rerunning on your own data -To rerun this entire analysis at once, you can use our workflow. Below we show how to do this: +To rerun this entire analysis at once, you can use our workflow. If users encounter failed tools and/or workflows, we recommend that users review that the appropriate inputs are selected before re-running. + +Below we show how to do this: > Running the Workflow > > 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/main_workflow.ga" title="HLA Binding for Novel Peptides" %} -> +> - (Neoantigen IEDB Binding HLA Alleles)[https://tinyurl.com/ipepgen-iedb-pep-wf] > > 2. Run **Workflow** {% icon workflow %} using the following parameters: > - *"Send results to a new history"*: `No` @@ -341,6 +417,14 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > > {% snippet faqs/galaxy/workflows_run.md %} > +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} +> +> +> {: .hands_on} # Are you feeling adventurous? ✨ @@ -350,5 +434,4 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h This new [One-Click Neoantigen Workflow](https://usegalaxy.eu/u/galaxyp/w/ipepgen-one-click-workflow) brings together all key modules of the neoantigen discovery process into a single, streamlined analysis within Galaxy. Instead of launching each tutorial separately, users can now execute the entire end-to-end pipeline—from database creation to HLA binding prediction—with just one click —without ever leaving Galaxy. # Disclaimer - Please note that all the software tools used in this workflow are subject to version updates and changes. As a result, the parameters, functionalities, and outcomes may differ with each new version. Additionally, if the protein sequences are downloaded at different times, the number of sequences may also vary due to updates in the reference databases or tool modifications. We recommend the users to verify the specific versions of software tools used to ensure the reproducibility and accuracy of results. diff --git a/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/index.md b/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/index.md index 3b205c20b64400..93fc1f10890b87 100644 --- a/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/index.md @@ -3,4 +3,5 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-7-hla-binding-novel-peptides/workflows/ + - /topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/main_workflow.ga deleted file mode 100644 index 7c6cb0167af708..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-hla-binding-novel-peptides/workflows/main_workflow.ga +++ /dev/null @@ -1 +0,0 @@ -{"a_galaxy_workflow": "true", "annotation": "Predict binding using IEDB and check novelty peptides with PepQuery", "comments": [], "creator": [{"class": "Organization", "name": "GalaxyP", "url": "https://galaxyp.org/"}], "format-version": "0.1", "license": "GPL-3.0-or-later", "name": "GigaScience-IEDB-PepQuery-Neoantigen", "report": {"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"}, "steps": {"0": {"annotation": "A single column tabular file with an HLA per line IEDB format: e.g. HLA-A*01:01\n", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "A single column tabular file with an HLA per line IEDB format: e.g. HLA-A*01:01\n", "name": "IEDB-optityle-seq2hla-alleles"}], "label": "IEDB-optityle-seq2hla-alleles", "name": "Input dataset", "outputs": [], "position": {"left": 0, "top": 319.4960920164948}, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": \"\"}", "tool_version": null, "type": "data_input", "uuid": "151ad927-d27e-4565-9fde-e8fe24297b43", "when": null, "workflow_outputs": []}, "1": {"annotation": "A fasta file of peptides to test for HLA binding, can be from Arriba\n", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "A fasta file of peptides to test for HLA binding, can be from Arriba\n", "name": "FASTA-for-IEDB"}], "label": "FASTA-for-IEDB", "name": "Input dataset", "outputs": [], "position": {"left": 1.22265625, "top": 439.3763720787159}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": \"\"}", "tool_version": null, "type": "data_input", "uuid": "e9431cfe-4617-4d93-b149-c695c92e2761", "when": null, "workflow_outputs": []}, "2": {"annotation": "MSMS spectrum", "content_id": null, "errors": null, "id": 2, "input_connections": {}, "inputs": [{"description": "MSMS spectrum", "name": "MSMS-Spectrum"}], "label": "MSMS-Spectrum", "name": "Input dataset", "outputs": [], "position": {"left": 1139.828125, "top": 331.01332092285156}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "52751f4b-710f-4057-85b0-9264a7da61b5", "when": null, "workflow_outputs": []}, "3": {"annotation": "IEDB binding prediction", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/iedb_api/iedb_api/2.15.2", "errors": null, "id": 3, "input_connections": {"prediction|alleles|allele_file": {"id": 0, "output_name": "output"}, "sequence|seq_fasta": {"id": 1, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool IEDB", "name": "sequence"}], "label": "IEDB", "name": "IEDB", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 268.0664367675781, "top": 347.67715332871603}, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/iedb_api/iedb_api/2.15.2", "tool_shed_repository": {"changeset_revision": "7ed6dda64a81", "name": "iedb_api", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"prediction\": {\"tool\": \"mhci\", \"__current_case__\": 0, \"method\": \"netmhcpan_el\", \"alleles\": {\"allelesrc\": \"history\", \"__current_case__\": 0, \"allele_file\": {\"__class__\": \"ConnectedValue\"}}, \"lengths\": [\"8\", \"9\", \"10\", \"11\", \"12\"]}, \"sequence\": {\"seqsrc\": \"fasta\", \"__current_case__\": 0, \"seq_fasta\": {\"__class__\": \"ConnectedValue\"}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.15.2", "type": "tool", "uuid": "47d6a4af-d71e-425d-bcea-5f15fd53717d", "when": null, "workflow_outputs": [{"label": "iedb_binding_predictions", "output_name": "output", "uuid": "a7fdb54c-93ed-4235-b76b-caa7c15d0dfc"}]}, "4": {"annotation": "anything less than 2 and more than 0.5 percentile rank", "content_id": "Filter1", "errors": null, "id": 4, "input_connections": {"input": {"id": 3, "output_name": "output"}}, "inputs": [], "label": "Filter-weak-binder", "name": "Filter", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 510.7421875, "top": 113.28515451649483}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Weak-Binders"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}, "TagDatasetActionout_file1": {"action_arguments": {"tags": "name:weak"}, "action_type": "TagDatasetAction", "output_name": "out_file1"}}, "tool_id": "Filter1", "tool_state": "{\"cond\": \"c11>0.5 and c11<=2\", \"header_lines\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "3a9a608b-3ad5-4826-b7ff-253aa1bdb142", "when": null, "workflow_outputs": [{"label": "Weak-Binders", "output_name": "out_file1", "uuid": "da34763e-d406-4366-af85-cb805e168105"}]}, "5": {"annotation": "anything less than 0.5 percentile rank", "content_id": "Filter1", "errors": null, "id": 5, "input_connections": {"input": {"id": 3, "output_name": "output"}}, "inputs": [], "label": "Filter-strong-binders", "name": "Filter", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 508.9921875, "top": 621.1171857664948}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Strong-Binders"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}, "TagDatasetActionout_file1": {"action_arguments": {"tags": "name:strong"}, "action_type": "TagDatasetAction", "output_name": "out_file1"}}, "tool_id": "Filter1", "tool_state": "{\"cond\": \"c11<=0.5\", \"header_lines\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "8416fa58-4e0f-40a7-bb30-c3b5af628d45", "when": null, "workflow_outputs": [{"label": "Strong-Binders", "output_name": "out_file1", "uuid": "0f023395-964b-42c2-9e4f-b05a929bf064"}]}, "6": {"annotation": "extracting allele results using pivot", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", "errors": null, "id": 6, "input_connections": {"singtabop|input": {"id": 4, "output_name": "out_file1"}}, "inputs": [{"description": "runtime parameter for tool Table Compute", "name": "singtabop"}], "label": "pivot_table_iedb_results-weak", "name": "Table Compute", "outputs": [{"name": "table", "type": "tabular"}], "position": {"left": 786.08984375, "top": 76.16015451649483}, "post_job_actions": {"RenameDatasetActiontable": {"action_arguments": {"newname": "pivot_table_iedb_results_weak"}, "action_type": "RenameDatasetAction", "output_name": "table"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", "tool_shed_repository": {"changeset_revision": "3bf5661c0280", "name": "table_compute", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"out_opts\": [\"ignore_nas\", \"output_headers_col\", \"output_headers_row\"], \"precision\": \"6\", \"singtabop\": {\"use_type\": \"single\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"col_row_names\": [\"has_col_names\", \"has_row_names\"], \"adv\": {\"header\": null, \"nrows\": null, \"skipfooter\": null, \"skip_blank_lines\": true}, \"user\": {\"mode\": \"fulltable\", \"__current_case__\": 4, \"general\": {\"use\": \"pivot\", \"__current_case__\": 2, \"index\": \"icore\", \"column\": \"allele\", \"values\": \"percentile_rank\", \"aggfunc\": \"max\", \"colfun_map\": []}}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.2.4+galaxy0", "type": "tool", "uuid": "385a138a-f7f8-4813-8860-d700329b4135", "when": null, "workflow_outputs": [{"label": "iedb_results_pivot_table_weak", "output_name": "table", "uuid": "ec56600f-a26b-4931-8431-d73da746ca4c"}]}, "7": {"annotation": "extracting allele results using pivot", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", "errors": null, "id": 7, "input_connections": {"singtabop|input": {"id": 5, "output_name": "out_file1"}}, "inputs": [{"description": "runtime parameter for tool Table Compute", "name": "singtabop"}], "label": "pivot_table_iedb_results_strong", "name": "Table Compute", "outputs": [{"name": "table", "type": "tabular"}], "position": {"left": 772.375, "top": 614.6093732664948}, "post_job_actions": {"RenameDatasetActiontable": {"action_arguments": {"newname": "pivot_table_iedb_results_strong"}, "action_type": "RenameDatasetAction", "output_name": "table"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", "tool_shed_repository": {"changeset_revision": "3bf5661c0280", "name": "table_compute", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"out_opts\": [\"ignore_nas\", \"output_headers_col\", \"output_headers_row\"], \"precision\": \"6\", \"singtabop\": {\"use_type\": \"single\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"col_row_names\": [\"has_col_names\", \"has_row_names\"], \"adv\": {\"header\": null, \"nrows\": null, \"skipfooter\": null, \"skip_blank_lines\": true}, \"user\": {\"mode\": \"fulltable\", \"__current_case__\": 4, \"general\": {\"use\": \"pivot\", \"__current_case__\": 2, \"index\": \"icore\", \"column\": \"allele\", \"values\": \"percentile_rank\", \"aggfunc\": \"max\", \"colfun_map\": []}}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.2.4+galaxy0", "type": "tool", "uuid": "9fdd35ca-6b67-4df6-a558-36353304000e", "when": null, "workflow_outputs": [{"label": "iedb_results_pivot_table_strong", "output_name": "table", "uuid": "332fcd7f-7ace-4d23-9ea1-3a53a993db2b"}]}, "8": {"annotation": "this is to remove header", "content_id": "Remove beginning1", "errors": null, "id": 8, "input_connections": {"input": {"id": 6, "output_name": "table"}}, "inputs": [], "label": "Removing_first_line-weak", "name": "Remove beginning", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1047.859375, "top": 28.6328125}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "header_removed_for_filter-weak"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Remove beginning1", "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"num_lines\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "99756617-eb99-4d06-9476-b0b8844fef97", "when": null, "workflow_outputs": [{"label": "header_removed_for_filter-weak", "output_name": "out_file1", "uuid": "991c6d8e-f192-4706-bd2f-a5a247d43361"}]}, "9": {"annotation": "removing header", "content_id": "Remove beginning1", "errors": null, "id": 9, "input_connections": {"input": {"id": 7, "output_name": "table"}}, "inputs": [], "label": "Removing_first_line-strong", "name": "Remove beginning", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1044.99609375, "top": 589.8593732664948}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "header removed for filter-strong"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Remove beginning1", "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"num_lines\": \"1\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "961e17aa-6776-4753-a689-bbea59f812ed", "when": null, "workflow_outputs": [{"label": "header removed for filter-strong", "output_name": "out_file1", "uuid": "c8c7951d-4b89-44f7-9583-86cd69b3212b"}]}, "10": {"annotation": "cutting first column to extract peptides", "content_id": "Cut1", "errors": null, "id": 10, "input_connections": {"input": {"id": 8, "output_name": "out_file1"}}, "inputs": [], "label": "Peptide-Column-extraction-weak", "name": "Cut", "outputs": [{"name": "out_file1", "type": "tabular"}], "position": {"left": 1273.921875, "top": 54.671875}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Peptide-for-PepQuery-weak"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Cut1", "tool_state": "{\"columnList\": \"c1\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.2", "type": "tool", "uuid": "e6ff6a9e-2f3c-4b7e-acbd-4c3277a487f7", "when": null, "workflow_outputs": [{"label": "Peptide-for-PepQuery-weak", "output_name": "out_file1", "uuid": "45fc4362-d338-402f-903c-e22630895f7b"}]}, "11": {"annotation": "cutting first column to extract peptides", "content_id": "Cut1", "errors": null, "id": 11, "input_connections": {"input": {"id": 9, "output_name": "out_file1"}}, "inputs": [], "label": "Peptide-Column-extraction-strong", "name": "Cut", "outputs": [{"name": "out_file1", "type": "tabular"}], "position": {"left": 1301, "top": 568.0390625}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Peptide-for-PepQuery-strong"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Cut1", "tool_state": "{\"columnList\": \"c1\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.2", "type": "tool", "uuid": "0ced96f2-7f59-45de-8ffb-34b7a9e3bdba", "when": null, "workflow_outputs": [{"label": "Peptide-for-PepQuery-strong", "output_name": "out_file1", "uuid": "caa24097-6a49-4950-8271-42ce94ac37e6"}]}, "12": {"annotation": "Test if the peptides are novel, and search for peptide in Mass Spec results.", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery2/pepquery2/2.0.2+galaxy2", "errors": null, "id": 12, "input_connections": {"req_inputs|input_type|multiple|input": {"id": 10, "output_name": "out_file1"}, "req_inputs|ms_dataset|spectrum_files": {"id": 2, "output_name": "output"}}, "inputs": [], "label": "PepQuery-IEDB-Peptides-weak", "name": "PepQuery2", "outputs": [{"name": "log_txt", "type": "txt"}, {"name": "psm_rank_txt", "type": "tabular"}], "position": {"left": 1518.0108023437747, "top": 0}, "post_job_actions": {"RenameDatasetActionlog_txt": {"action_arguments": {"newname": "PepQuery-IEDB-Peptides-log-weak"}, "action_type": "RenameDatasetAction", "output_name": "log_txt"}, "RenameDatasetActionpsm_rank_txt": {"action_arguments": {"newname": "PepQuery-IEDB-Peptides-weak"}, "action_type": "RenameDatasetAction", "output_name": "psm_rank_txt"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery2/pepquery2/2.0.2+galaxy2", "tool_shed_repository": {"changeset_revision": "c32806a80862", "name": "pepquery2", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"digestion\": {\"enzyme\": null, \"max_missed_cleavages\": null}, \"fast\": false, \"modifications\": {\"fixed_mod\": null, \"var_mod\": null, \"max_mods\": null, \"unmodified\": false, \"aa\": false}, \"ms_params\": {\"tolerance_params\": {\"precursor_tolerance\": null, \"precursor_unit\": null, \"tolerance\": null}, \"search\": {\"frag_method\": null, \"scoring_method\": null, \"extra_score_validation\": false, \"min_charge\": null, \"max_charge\": null, \"min_peaks\": null, \"isotope_error\": \"\", \"min_score\": null, \"min_length\": null, \"max_length\": null, \"num_random_peptides\": null}}, \"outputs_selected\": \"psm_rank.txt\", \"parameter_set\": \"\", \"req_inputs\": {\"input_type\": {\"input_type_selector\": \"peptide\", \"__current_case__\": 0, \"multiple\": {\"peptide_input_selector\": \"multiple\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}}}, \"db_type\": {\"db_type_selector\": \"download\", \"__current_case__\": 1, \"db_id\": \"gencode:human\"}, \"ms_dataset\": {\"ms_dataset_type\": \"history\", \"__current_case__\": 0, \"spectrum_files\": {\"__class__\": \"ConnectedValue\"}}, \"indexType\": \"2\"}, \"validation\": {\"task_type\": \"novel\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.0.2+galaxy2", "type": "tool", "uuid": "706371d9-fde4-4998-badc-b5af6a68c26c", "when": null, "workflow_outputs": [{"label": "PepQuery-IEDB-Peptides-weak", "output_name": "psm_rank_txt", "uuid": "23f3a99d-81cf-4fe0-bb63-218228aec6dc"}, {"label": "PepQuery-IEDB-Peptides-log-weak", "output_name": "log_txt", "uuid": "a7765d0f-2282-46ce-a8e4-4aee6b3b01ce"}]}, "13": {"annotation": "Test if the peptides are novel, and search for peptide in Mass Spec results.", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery2/pepquery2/2.0.2+galaxy2", "errors": null, "id": 13, "input_connections": {"req_inputs|input_type|multiple|input": {"id": 11, "output_name": "out_file1"}, "req_inputs|ms_dataset|spectrum_files": {"id": 2, "output_name": "output"}}, "inputs": [], "label": "PepQuery-IEDB-Peptides-strong", "name": "PepQuery2", "outputs": [{"name": "log_txt", "type": "txt"}, {"name": "psm_rank_txt", "type": "tabular"}], "position": {"left": 1527.7344351562747, "top": 438.80860900878906}, "post_job_actions": {"RenameDatasetActionlog_txt": {"action_arguments": {"newname": "PepQuery-IEDB-Peptides-log-strong"}, "action_type": "RenameDatasetAction", "output_name": "log_txt"}, "RenameDatasetActionpsm_rank_txt": {"action_arguments": {"newname": "PepQuery-IEDB-Peptides"}, "action_type": "RenameDatasetAction", "output_name": "psm_rank_txt"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery2/pepquery2/2.0.2+galaxy2", "tool_shed_repository": {"changeset_revision": "c32806a80862", "name": "pepquery2", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"digestion\": {\"enzyme\": null, \"max_missed_cleavages\": null}, \"fast\": false, \"modifications\": {\"fixed_mod\": null, \"var_mod\": null, \"max_mods\": null, \"unmodified\": false, \"aa\": false}, \"ms_params\": {\"tolerance_params\": {\"precursor_tolerance\": null, \"precursor_unit\": null, \"tolerance\": null}, \"search\": {\"frag_method\": null, \"scoring_method\": null, \"extra_score_validation\": false, \"min_charge\": null, \"max_charge\": null, \"min_peaks\": null, \"isotope_error\": \"\", \"min_score\": null, \"min_length\": null, \"max_length\": null, \"num_random_peptides\": null}}, \"outputs_selected\": \"psm_rank.txt\", \"parameter_set\": \"\", \"req_inputs\": {\"input_type\": {\"input_type_selector\": \"peptide\", \"__current_case__\": 0, \"multiple\": {\"peptide_input_selector\": \"multiple\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}}}, \"db_type\": {\"db_type_selector\": \"download\", \"__current_case__\": 1, \"db_id\": \"gencode:human\"}, \"ms_dataset\": {\"ms_dataset_type\": \"history\", \"__current_case__\": 0, \"spectrum_files\": {\"__class__\": \"ConnectedValue\"}}, \"indexType\": \"2\"}, \"validation\": {\"task_type\": \"novel\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.0.2+galaxy2", "type": "tool", "uuid": "55514ad4-65bb-40ec-83e6-692078ee56c5", "when": null, "workflow_outputs": [{"label": "PepQuery-IEDB-Peptides", "output_name": "psm_rank_txt", "uuid": "854db1c7-25f2-4f0e-ba05-dd180f954f73"}, {"label": "PepQuery-IEDB-Peptides-log-strong", "output_name": "log_txt", "uuid": "72ad5a85-80e6-40a0-8a8a-e0e33427b984"}]}, "14": {"annotation": "IEDB-Validated-NeoAntigen_Peptides, filtering if the validation column is yes or no ", "content_id": "Filter1", "errors": null, "id": 14, "input_connections": {"input": {"id": 12, "output_name": "psm_rank_txt"}}, "inputs": [], "label": "Filtering-Confident-peptides-weak", "name": "Filter", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1755.8438101562747, "top": 0.6679687499999716}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "IEDB-Validated-NeoAntigen_Peptides-weak"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Filter1", "tool_state": "{\"cond\": \"c20=='Yes'\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "650f7ee1-0c91-4a03-8b52-127580aef87d", "when": null, "workflow_outputs": [{"label": "IEDB-Validated-NeoAntigen_Peptides-weak", "output_name": "out_file1", "uuid": "cb8fceb5-ad70-459a-af65-516db31ba6d9"}]}, "15": {"annotation": "IEDB-Validated-NeoAntigen_Peptides, filtering if the validation column is yes or no ", "content_id": "Filter1", "errors": null, "id": 15, "input_connections": {"input": {"id": 13, "output_name": "psm_rank_txt"}}, "inputs": [], "label": "Filtering-Confident-peptides-strong", "name": "Filter", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1786.2695914062747, "top": 474.48048400878895}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "IEDB-Validated-NeoAntigen_Peptides-strong"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Filter1", "tool_state": "{\"cond\": \"c20=='Yes'\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "dfd05454-fe76-4b55-8c19-ce5e252d4fd6", "when": null, "workflow_outputs": [{"label": "IEDB-Validated-NeoAntigen_Peptides-strong", "output_name": "out_file1", "uuid": "eade637d-296b-412a-bcf6-c51c72f90c6f"}]}, "16": {"annotation": "Remove reference peptides from IeDB Pivot Table", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 16, "input_connections": {"tables_0|table": {"id": 6, "output_name": "table"}, "tables_1|table": {"id": 14, "output_name": "out_file1"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Novel_NeoPeptides-weak", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}, {"name": "output1", "type": "tabular"}], "position": {"left": 2023.6406851562747, "top": 4.695327758788977}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "iedb_novel_peptide_x_hla_table-weak"}, "action_type": "RenameDatasetAction", "output_name": "output"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "iedb_predicted_neoantigens-weak"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": [{\"__index__\": 0, \"sqlquery\": \"SELECT icore\\nFROM t1 \\nWHERE t1.icore IN (SELECT c1 FROM t2)\\nORDER BY icore\", \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}}]}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"yes\", \"__current_case__\": 0, \"header_prefix\": null}, \"save_db\": false, \"sqlquery\": \"SELECT *\\nFROM t1 \\nWHERE t1.icore IN (SELECT c1 FROM t2)\\n\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": true, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}, {\"__index__\": 1, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "7ff13727-d4e7-46da-a8af-529b4fef0cc8", "when": null, "workflow_outputs": [{"label": "iedb_novel_peptide_x_hla_table-weak", "output_name": "output", "uuid": "dd312a10-321b-46cf-8de6-3b17944676f3"}, {"label": "iedb_predicted_neoantigens-weak", "output_name": "output1", "uuid": "c5b24d01-60d3-4499-94f4-165c5985c70d"}]}, "17": {"annotation": "Remove reference peptides from IeDB Pivot Table", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 17, "input_connections": {"tables_0|table": {"id": 7, "output_name": "table"}, "tables_1|table": {"id": 15, "output_name": "out_file1"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Novel_NeoPeptides-strong", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}, {"name": "output1", "type": "tabular"}], "position": {"left": 2036.4141226562747, "top": 454.95314025878895}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "iedb_novel_peptide_x_hla_table-strong"}, "action_type": "RenameDatasetAction", "output_name": "output"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "iedb_predicted_neoantigens-strong"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": [{\"__index__\": 0, \"sqlquery\": \"SELECT icore\\nFROM t1 \\nWHERE t1.icore IN (SELECT c1 FROM t2)\\nORDER BY icore\", \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}}]}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"yes\", \"__current_case__\": 0, \"header_prefix\": null}, \"save_db\": false, \"sqlquery\": \"SELECT *\\nFROM t1 \\nWHERE t1.icore IN (SELECT c1 FROM t2)\\n\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": true, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}, {\"__index__\": 1, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "18b4f109-d48a-4635-b4d9-230d53e661dd", "when": null, "workflow_outputs": [{"label": "iedb_novel_peptide_x_hla_table-strong", "output_name": "output", "uuid": "5d9e2770-3431-421c-9f70-f9cf88dd7620"}, {"label": "iedb_predicted_neoantigens-strong", "output_name": "output1", "uuid": "c3661e5e-b0cd-4975-948e-5d5ef9e28c8a"}]}}, "tags": ["IEDB", "name:neoantigen"], "uuid": "e22e9f40-ba2e-4af8-910f-8919cd3311b6", "version": 0} \ No newline at end of file diff --git a/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/tutorial.md b/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/tutorial.md index 0741c356585cb4..9ef9c1bc74c15c 100644 --- a/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/tutorial.md @@ -847,19 +847,25 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > Running the Workflow > -> 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/main_workflow.ga" title="Neoantigen non-reference Database Generation" %} -> -> -> 2. Run **Workflow** {% icon workflow %} using the following parameters: +> 1. **Import the workflow** into Galaxy: +> - (Neoantigen Non-Reference Database Generation)[https://tinyurl.com/ipepgen-nonref-wf] +> +> 2. Run **Workflow** {% icon workflow %} using the following parameters: > - *"Send results to a new history"*: `No` > - {% icon param-file %} *"Human Reference Genome Annotation"*: `Homo_sapiens.GRCh38_canon.106.gtf` > - {% icon param-file %} *"Human Uniprot (with isoforms) and cRAP Database"*: `HUMAN_CRAP.fasta` > - {% icon param-file %} *"RNA-Seq_Reads_1 (forward strand)"*: `RNA-Seq_Reads_1.fastqsanger.gz` > - {% icon param-file %} *"RNA-Seq_Reads_2 (reverse strand)"*: `RNA-Seq_Reads_2.fastqsanger.gz` > -> {% snippet faqs/galaxy/workflows_run.md %} +> +> {% snippet faqs/galaxy/workflows_run.md %} +> +> +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} > {: .hands_on} diff --git a/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/index.md b/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/index.md index 8576092a7d32cb..996dd9d6fd1af6 100644 --- a/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/index.md @@ -3,4 +3,5 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-2-non-reference-database-generation/workflows/ + - /topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/main_workflow.ga deleted file mode 100644 index 0ef1aacc88b638..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-non-reference-database-generation/workflows/main_workflow.ga +++ /dev/null @@ -1 +0,0 @@ -{"a_galaxy_workflow": "true", "annotation": "Generating non-reference protein database for FragPipe discovery", "comments": [], "creator": [{"class": "Organization", "name": "GalaxyP"}], "format-version": "0.1", "license": "CC-BY-4.0", "name": "Gigascience_Indels_SAV_non-reference_demonstration_STS26T-Gent_Workflow", "report": {"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"}, "steps": {"0": {"annotation": "Human reference genome\n#!genome-build GRCh38.p13\n#!genome-version GRCh38\n#!genome-date 2013-12\n#!genome-build-accession GCA_000001405.28\n#!genebuild-last-updated 2021-11", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "Human reference genome\n#!genome-build GRCh38.p13\n#!genome-version GRCh38\n#!genome-date 2013-12\n#!genome-build-accession GCA_000001405.28\n#!genebuild-last-updated 2021-11", "name": "Homo_sapiens.GRCh38_canon.106.gtf"}], "label": "Homo_sapiens.GRCh38_canon.106.gtf", "name": "Input dataset", "outputs": [], "position": {"left": 295.5937262204385, "top": 141.17708455680213}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "2dcbfce2-682b-4a07-b370-d09857e426a1", "when": null, "workflow_outputs": []}, "1": {"annotation": "Second data set of RNA-seq data", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "Second data set of RNA-seq data", "name": "RNA-Seq_Reads_2.fastq"}], "label": "RNA-Seq_Reads_2.fastq", "name": "Input dataset", "outputs": [], "position": {"left": 0.0, "top": 643.0000389659879}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "a72ae09b-84ff-4989-a71a-ebc00efed697", "when": null, "workflow_outputs": []}, "2": {"annotation": "First data set of RNA-seq data", "content_id": null, "errors": null, "id": 2, "input_connections": {}, "inputs": [{"description": "First data set of RNA-seq data", "name": "RNA-Seq_Reads_1.fastq"}], "label": "RNA-Seq_Reads_1.fastq", "name": "Input dataset", "outputs": [], "position": {"left": 0.0, "top": 817.0000135778079}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "6818c468-bbb0-4002-b347-c35e374e25c6", "when": null, "workflow_outputs": []}, "3": {"annotation": "HUMAN Uniprot+isoforms and cRAP FASTA database", "content_id": null, "errors": null, "id": 3, "input_connections": {}, "inputs": [{"description": "HUMAN Uniprot+isoforms and cRAP FASTA database", "name": "HUMAN-Uniprot-and-isoforms_and_cRAP-FASTA-Database"}], "label": "HUMAN-Uniprot-and-isoforms_and_cRAP-FASTA-Database", "name": "Input dataset", "outputs": [], "position": {"left": 1162.8398291365352, "top": 1324.83984375}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "99296fa6-b9a9-43b2-9c0f-f51821d5ce4c", "when": null, "workflow_outputs": []}, "4": {"annotation": "Uncompressed_RNA_Seq_Reads_2", "content_id": "CONVERTER_gz_to_uncompressed", "errors": null, "id": 4, "input_connections": {"input1": {"id": 1, "output_name": "output"}}, "inputs": [], "label": "Uncompressed_RNA_Seq_Reads_2", "name": "Convert compressed file to uncompressed.", "outputs": [{"name": "output1", "type": "auto"}], "position": {"left": 279.99996837320856, "top": 617.0000266110596}, "post_job_actions": {"ChangeDatatypeActionoutput1": {"action_arguments": {"newtype": "fastqsanger"}, "action_type": "ChangeDatatypeAction", "output_name": "output1"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "Uncompressed_RNA_Seq_Reads_2"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "CONVERTER_gz_to_uncompressed", "tool_state": "{\"input1\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "c1145635-5d55-4f7a-a0e0-8c4d3f596f92", "when": null, "workflow_outputs": [{"label": "Uncompressed_RNA_Seq_Reads_2", "output_name": "output1", "uuid": "5983317a-db25-4f0f-8a0b-290d15aafd14"}]}, "5": {"annotation": "Uncompressed_RNA_Seq_Reads_1", "content_id": "CONVERTER_gz_to_uncompressed", "errors": null, "id": 5, "input_connections": {"input1": {"id": 2, "output_name": "output"}}, "inputs": [], "label": "Uncompressed_RNA_Seq_Reads_1", "name": "Convert compressed file to uncompressed.", "outputs": [{"name": "output1", "type": "auto"}], "position": {"left": 280.00001854910533, "top": 805.7343874460303}, "post_job_actions": {"ChangeDatatypeActionoutput1": {"action_arguments": {"newtype": "fastqsanger"}, "action_type": "ChangeDatatypeAction", "output_name": "output1"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "Uncompressed_RNA_Seq_Reads_1"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "CONVERTER_gz_to_uncompressed", "tool_state": "{\"input1\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "ca222f5f-7eb1-4c5c-aa91-8d34b44adf16", "when": null, "workflow_outputs": [{"label": "Uncompressed_RNA_Seq_Reads_1", "output_name": "output1", "uuid": "d6f2044c-cd3b-4fde-901a-2c6c7db5db4f"}]}, "6": {"annotation": "Convert_HUMAN_Uniprot_and_CRAP_FASTA_to_tabular", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "errors": null, "id": 6, "input_connections": {"input": {"id": 3, "output_name": "output"}}, "inputs": [], "label": "Convert_HUMAN_Uniprot_and_CRAP_FASTA_to_tabular", "name": "FASTA-to-Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1431.8593374399654, "top": 1368.1613827597016}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "HUMAN_Uniprot_and_CRAP.tabular"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "tool_shed_repository": {"changeset_revision": "e7ed3c310b74", "name": "fasta_to_tabular", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"descr_columns\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"keep_first\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "e9f69ad5-d4bc-44ed-afdb-9c16e23257e4", "when": null, "workflow_outputs": [{"label": "HUMAN_Uniprot_and_CRAP.tabular", "output_name": "output", "uuid": "067e8f48-59d2-4f9c-bce9-13d1dca9d74e"}]}, "7": {"annotation": "HISAT2_Alignment_BAM", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.2.1+galaxy1", "errors": null, "id": 7, "input_connections": {"library|input_1": {"id": 5, "output_name": "output1"}, "library|input_2": {"id": 4, "output_name": "output1"}}, "inputs": [{"description": "runtime parameter for tool HISAT2", "name": "library"}, {"description": "runtime parameter for tool HISAT2", "name": "library"}], "label": "HISAT2_Alignment_BAM", "name": "HISAT2", "outputs": [{"name": "output_alignments", "type": "bam"}], "position": {"left": 559.9999869223138, "top": 639.1146182283327}, "post_job_actions": {"RenameDatasetActionoutput_alignments": {"action_arguments": {"newname": "HISAT2_Alignment_BAM"}, "action_type": "RenameDatasetAction", "output_name": "output_alignments"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/hisat2/hisat2/2.2.1+galaxy1", "tool_shed_repository": {"changeset_revision": "f4af63aaf57a", "name": "hisat2", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"input_options\": {\"input_options_selector\": \"defaults\", \"__current_case__\": 0}, \"alignment_options\": {\"alignment_options_selector\": \"defaults\", \"__current_case__\": 0}, \"scoring_options\": {\"scoring_options_selector\": \"defaults\", \"__current_case__\": 0}, \"spliced_options\": {\"spliced_options_selector\": \"defaults\", \"__current_case__\": 0}, \"reporting_options\": {\"reporting_options_selector\": \"defaults\", \"__current_case__\": 0}, \"output_options\": {\"output_options_selector\": \"defaults\", \"__current_case__\": 0}, \"sam_options\": {\"sam_options_selector\": \"defaults\", \"__current_case__\": 0}, \"other_options\": {\"other_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"library\": {\"type\": \"paired\", \"__current_case__\": 1, \"input_1\": {\"__class__\": \"ConnectedValue\"}, \"input_2\": {\"__class__\": \"ConnectedValue\"}, \"rna_strandness\": \"\", \"paired_options\": {\"paired_options_selector\": \"defaults\", \"__current_case__\": 0}}, \"reference_genome\": {\"source\": \"indexed\", \"__current_case__\": 0, \"index\": \"hg38\"}, \"sum\": {\"new_summary\": false, \"summary_file\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.2.1+galaxy1", "type": "tool", "uuid": "b6a3e86b-05f1-4ea5-bf92-ed6ff202e4ee", "when": null, "workflow_outputs": [{"label": "HISAT2_Alignment_BAM", "output_name": "output_alignments", "uuid": "0137dcb5-9d02-427f-82da-995723a93c85"}]}, "8": {"annotation": "Filtering_HUMAN_Uniprot_and_cRAP_Accessions_tabular", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/filter_tabular/filter_tabular/3.3.1", "errors": null, "id": 8, "input_connections": {"input": {"id": 6, "output_name": "output"}}, "inputs": [], "label": "Filtering_HUMAN_Uniprot_and_cRAP_Accessions_tabular", "name": "Filter Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1712.23678930761, "top": 1337.4909505244289}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Filtering_HUMAN_Uniprot_and_cRAP_Accessions_tabular"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/filter_tabular/filter_tabular/3.3.1", "tool_shed_repository": {"changeset_revision": "90f657745fea", "name": "filter_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"comment_char\": true, \"input\": {\"__class__\": \"ConnectedValue\"}, \"linefilters\": [{\"__index__\": 0, \"filter\": {\"filter_type\": \"select_columns\", \"__current_case__\": 9, \"columns\": \"1\"}}, {\"__index__\": 1, \"filter\": {\"filter_type\": \"replace\", \"__current_case__\": 11, \"column\": \"1\", \"regex_pattern\": \"^[^|]+[|]([^| ]+).*$\", \"regex_replace\": \"\\\\1\", \"add\": null}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.1", "type": "tool", "uuid": "0a3a1f8f-072f-4d0b-befa-93f2ccb5ffb8", "when": null, "workflow_outputs": [{"label": "Filtering_HUMAN_Uniprot_and_cRAP_Accessions_tabular", "output_name": "output", "uuid": "4a2dbdd0-8e12-418c-a8f9-38256a760be8"}]}, "9": {"annotation": "StringTie_Alignment_GTF", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/2.2.3+galaxy0", "errors": null, "id": 9, "input_connections": {"guide|guide_source|ref_hist": {"id": 0, "output_name": "output"}, "input_options|input_bam": {"id": 7, "output_name": "output_alignments"}}, "inputs": [{"description": "runtime parameter for tool StringTie", "name": "input_options"}], "label": "StringTie_Alignment_GTF", "name": "StringTie", "outputs": [{"name": "output_gtf", "type": "gtf"}], "position": {"left": 839.9999853865352, "top": 65}, "post_job_actions": {"RenameDatasetActionoutput_gtf": {"action_arguments": {"newname": "StringTie_Alignment_GTF"}, "action_type": "RenameDatasetAction", "output_name": "output_gtf"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/2.2.3+galaxy0", "tool_shed_repository": {"changeset_revision": "cbf488da3b2c", "name": "stringtie", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"abundance_estimation\": false, \"omit_sequences\": \"\", \"name_prefix\": null, \"fraction\": \"0.01\", \"min_tlen\": \"200\", \"min_anchor_len\": \"10\", \"min_anchor_cov\": \"1\", \"min_bundle_cov\": \"1\", \"bdist\": \"50\", \"bundle_fraction\": \"1.0\", \"disable_trimming\": false, \"multi_mapping\": false, \"point_features\": null}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"guide\": {\"use_guide\": \"yes\", \"__current_case__\": 1, \"guide_source\": {\"guide_gff_select\": \"history\", \"__current_case__\": 1, \"ref_hist\": {\"__class__\": \"ConnectedValue\"}}, \"input_estimation\": false, \"special_outputs\": {\"special_outputs_select\": \"no\", \"__current_case__\": 2}, \"coverage_file\": false}, \"input_options\": {\"input_mode\": \"short_reads\", \"__current_case__\": 0, \"input_bam\": {\"__class__\": \"ConnectedValue\"}}, \"rna_strandness\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.2.3+galaxy0", "type": "tool", "uuid": "c554cff4-7a4c-490c-9b8b-23cfb83ac6a3", "when": null, "workflow_outputs": [{"label": "StringTie_Alignment_GTF", "output_name": "output_gtf", "uuid": "a4ff5eba-c73a-424b-8a9e-cfe76a0fe77f"}]}, "10": {"annotation": "FreeBayes_variants_VCF", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/freebayes/freebayes/1.3.6+galaxy0", "errors": null, "id": 10, "input_connections": {"reference_source|batchmode|input_bams": {"id": 7, "output_name": "output_alignments"}}, "inputs": [], "label": "FreeBayes_variants_VCF", "name": "FreeBayes", "outputs": [{"name": "output_vcf", "type": "vcf"}], "position": {"left": 839.9999853865352, "top": 773}, "post_job_actions": {"RenameDatasetActionoutput_vcf": {"action_arguments": {"newname": "FreeBayes_variants_VCF"}, "action_type": "RenameDatasetAction", "output_name": "output_vcf"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/freebayes/freebayes/1.3.6+galaxy0", "tool_shed_repository": {"changeset_revision": "a5937157062f", "name": "freebayes", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"bam\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"coverage_options\": {\"coverage_options_selector\": \"do_not_set\", \"__current_case__\": 1}, \"options_type\": {\"options_type_selector\": \"simple\", \"__current_case__\": 1}, \"reference_source\": {\"reference_source_selector\": \"cached\", \"__current_case__\": 0, \"batchmode\": {\"processmode\": \"individual\", \"__current_case__\": 0, \"input_bams\": {\"__class__\": \"ConnectedValue\"}}, \"ref_file\": \"hg38\"}, \"target_limit_type\": {\"target_limit_type_selector\": \"do_not_limit\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.3.6+galaxy0", "type": "tool", "uuid": "ab0e57d8-8ec2-467d-bf3b-4b0be1260ad5", "when": null, "workflow_outputs": [{"label": "FreeBayes_variants_VCF", "output_name": "output_vcf", "uuid": "b4ebb058-327d-4284-b96e-f7491262912e"}]}, "11": {"annotation": "GffCompare_Annotated_Transcripts_GTF", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/gffcompare/gffcompare/0.12.6+galaxy0", "errors": null, "id": 11, "input_connections": {"conditional_annotation|ref_source|reference_annotation": {"id": 0, "output_name": "output"}, "gffinputs": {"id": 9, "output_name": "output_gtf"}}, "inputs": [], "label": "GffCompare_Annotated_Transcripts_GTF", "name": "GffCompare", "outputs": [{"name": "refmap_output_collection", "type": "input"}, {"name": "tmap_output_collection", "type": "input"}, {"name": "transcripts_annotated", "type": "gtf"}, {"name": "transcripts_combined", "type": "gtf"}, {"name": "refmap_output", "type": "tabular"}, {"name": "tmap_output", "type": "tabular"}, {"name": "transcripts_stats", "type": "txt"}, {"name": "transcripts_loci", "type": "tabular"}, {"name": "transcripts_tracking", "type": "tabular"}], "position": {"left": 1119.9999853865352, "top": 0}, "post_job_actions": {"RenameDatasetActiontranscripts_annotated": {"action_arguments": {"newname": "GffCompare_Annotated_Transcripts_GTF"}, "action_type": "RenameDatasetAction", "output_name": "transcripts_annotated"}, "RenameDatasetActiontranscripts_combined": {"action_arguments": {"newname": "GffCompare combined transcripts"}, "action_type": "RenameDatasetAction", "output_name": "transcripts_combined"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/gffcompare/gffcompare/0.12.6+galaxy0", "tool_shed_repository": {"changeset_revision": "3c5e024a18cf", "name": "gffcompare", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"adv_output\": {\"p\": \"TCONS\", \"C\": false, \"A\": false, \"X\": false, \"K\": false}, \"chr_stats\": false, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"conditional_annotation\": {\"selector\": \"yes\", \"__current_case__\": 0, \"ref_source\": {\"selector\": \"history\", \"__current_case__\": 1, \"reference_annotation\": {\"__class__\": \"ConnectedValue\"}}, \"R\": false, \"Q\": false, \"conditional_strict\": {\"selector\": \"\", \"__current_case__\": 0}, \"discard_single_exon\": \"\", \"conditional_duplication\": {\"selector\": \"\", \"__current_case__\": 0}, \"no_merge\": false}, \"gffinputs\": {\"__class__\": \"ConnectedValue\"}, \"max_dist_group\": \"100\", \"refmap_tmap\": true, \"seq_data\": {\"selector\": \"no\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.12.6+galaxy0", "type": "tool", "uuid": "88de6da2-a823-496a-9670-b0056657014f", "when": null, "workflow_outputs": [{"label": "GffCompare_Annotated_Transcripts_GTF", "output_name": "transcripts_annotated", "uuid": "72c73a0b-6677-4701-ab52-4d296c553df9"}]}, "12": {"annotation": "CustomProDB to generate protein FASTAs from BAM and VCF files", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/custom_pro_db/custom_pro_db/1.22.0", "errors": null, "id": 12, "input_connections": {"genome_annotation|bamInput": {"id": 7, "output_name": "output_alignments"}, "genome_annotation|vcfInput": {"id": 10, "output_name": "output_vcf"}}, "inputs": [{"description": "runtime parameter for tool CustomProDB", "name": "genome_annotation"}, {"description": "runtime parameter for tool CustomProDB", "name": "genome_annotation"}], "label": "CustomProDB_protein_FASTAs_from_BAM_and_VCF", "name": "CustomProDB", "outputs": [{"name": "output_rpkm", "type": "fasta"}, {"name": "output_snv", "type": "fasta"}, {"name": "output_indel", "type": "fasta"}, {"name": "output_variant_annotation_rdata", "type": "rdata"}, {"name": "output_genomic_mapping_sqlite", "type": "sqlite"}, {"name": "output_variant_annotation_sqlite", "type": "sqlite"}], "position": {"left": 1119.9999571193289, "top": 654.9010592031063}, "post_job_actions": {"RenameDatasetActionoutput_genomic_mapping_sqlite": {"action_arguments": {"newname": "CustomProDB_Genomic_SQLlite"}, "action_type": "RenameDatasetAction", "output_name": "output_genomic_mapping_sqlite"}, "RenameDatasetActionoutput_indel": {"action_arguments": {"newname": "CustomProDB_INDEL_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output_indel"}, "RenameDatasetActionoutput_rpkm": {"action_arguments": {"newname": "CustomProDB_RPKM_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output_rpkm"}, "RenameDatasetActionoutput_snv": {"action_arguments": {"newname": "CustomProDB_SNV_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output_snv"}, "RenameDatasetActionoutput_variant_annotation_rdata": {"action_arguments": {"newname": "CustomProDB_VARIANT_ANNOTATION_RDATA"}, "action_type": "RenameDatasetAction", "output_name": "output_variant_annotation_rdata"}, "RenameDatasetActionoutput_variant_annotation_sqlite": {"action_arguments": {"newname": "CustomProDB_VARIANT_ANNOTATION_SQLite"}, "action_type": "RenameDatasetAction", "output_name": "output_variant_annotation_sqlite"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/custom_pro_db/custom_pro_db/1.22.0", "tool_shed_repository": {"changeset_revision": "2c7df0077d28", "name": "custom_pro_db", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"bam\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"genome_annotation\": {\"source\": \"builtin\", \"__current_case__\": 0, \"builtin\": \"hg38_GRCh38\", \"bamInput\": {\"__class__\": \"ConnectedValue\"}, \"vcfInput\": {\"__class__\": \"ConnectedValue\"}, \"dbsnpInCoding\": false, \"cosmic\": false}, \"outputIndels\": true, \"outputRData\": true, \"outputSQLite\": true, \"rpkmCutoff\": \"1.0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.22.0", "type": "tool", "uuid": "6d92015c-72ea-4ef5-8160-7975f927555d", "when": null, "workflow_outputs": [{"label": "CustomProDB_INDEL_FASTA", "output_name": "output_indel", "uuid": "8ce883fd-1961-47f6-b7b0-8153b7d1b4be"}, {"label": "CustomProDB_VARIANT_ANNOTATION_RDATA", "output_name": "output_variant_annotation_rdata", "uuid": "b31fbd26-fec0-42d9-a079-a28ae64ec009"}, {"label": "CustomProDB_Genomic_SQLlite", "output_name": "output_genomic_mapping_sqlite", "uuid": "b7944440-cbca-4fb5-96fd-03e952854728"}, {"label": "CustomProDB_VARIANT_ANNOTATION_SQLite", "output_name": "output_variant_annotation_sqlite", "uuid": "9cee39b1-2766-43b0-b002-697e496a11d7"}, {"label": "CustomProDB_RPKM_FASTA", "output_name": "output_rpkm", "uuid": "8258d64c-c755-4b49-b417-32ea722773bd"}, {"label": "CustomProDB_SNV_FASTA", "output_name": "output_snv", "uuid": "9ce1b014-b001-4ed8-9144-5890bb8cdbd5"}]}, "13": {"annotation": "GffCompare_Annotated_GTF_to_BED", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/gffcompare_to_bed/gffcompare_to_bed/0.2.1", "errors": null, "id": 13, "input_connections": {"input": {"id": 11, "output_name": "transcripts_annotated"}}, "inputs": [], "label": "GffCompare_Annotated_GTF_to_BED", "name": "Convert gffCompare annotated GTF to BED", "outputs": [{"name": "output", "type": "bed"}], "position": {"left": 1404.041627321157, "top": 35.34896313992981}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "GffCompare_Annotated_GTF_to_BED"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/gffcompare_to_bed/gffcompare_to_bed/0.2.1", "tool_shed_repository": {"changeset_revision": "ba5368c19dbd", "name": "gffcompare_to_bed", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"gtf\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"class_codes\": [\"j\", \"e\", \"i\", \"p\", \"u\"], \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.2.1", "type": "tool", "uuid": "ffe0650f-d62a-4925-8769-d0a0a710c28f", "when": null, "workflow_outputs": [{"label": "GffCompare_Annotated_GTF_to_BED", "output_name": "output", "uuid": "751c3898-45a0-4836-bd33-44affbe9748a"}]}, "14": {"annotation": "Convert_INDEL_FASTA_to_tabular", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "errors": null, "id": 14, "input_connections": {"input": {"id": 12, "output_name": "output_indel"}}, "inputs": [], "label": "Convert_INDEL_FASTA_to_tabular", "name": "FASTA-to-Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1420.6554991618148, "top": 559.0073520043709}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "CustomProDB_INDEL.tabular"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "tool_shed_repository": {"changeset_revision": "e7ed3c310b74", "name": "fasta_to_tabular", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"descr_columns\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"keep_first\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "7e6ede2e-b9a7-4156-af94-f5553b7bbf4c", "when": null, "workflow_outputs": [{"label": "CustomProDB_INDEL.tabular", "output_name": "output", "uuid": "dc1bb9c3-0f77-4f30-a0ea-d164563269d8"}]}, "15": {"annotation": "Convert-SNV_FASTA_to_tabular", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "errors": null, "id": 15, "input_connections": {"input": {"id": 12, "output_name": "output_snv"}}, "inputs": [], "label": "Convert-SNV_FASTA_to_tabular", "name": "FASTA-to-Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1443.7968603865352, "top": 829.9793949491793}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "CustomProDB_SNV.tabular"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "tool_shed_repository": {"changeset_revision": "e7ed3c310b74", "name": "fasta_to_tabular", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"descr_columns\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"keep_first\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "c37b196e-1d53-492d-9ad0-bb84b9d6c547", "when": null, "workflow_outputs": [{"label": "CustomProDB_SNV.tabular", "output_name": "output", "uuid": "0d8682f5-66fa-40db-b280-d6f1bb4a5717"}]}, "16": {"annotation": "Convert-RPKM_FASTA_to_tabular", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "errors": null, "id": 16, "input_connections": {"input": {"id": 12, "output_name": "output_rpkm"}}, "inputs": [], "label": "Convert-RPKM_FASTA_to_tabular", "name": "FASTA-to-Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1420.2394757738223, "top": 982.5780611339459}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "CustomProDB_RPKM.tabular"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "tool_shed_repository": {"changeset_revision": "e7ed3c310b74", "name": "fasta_to_tabular", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"descr_columns\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"keep_first\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "d342afe5-3715-425d-a0d2-b9052ed17cc6", "when": null, "workflow_outputs": [{"label": "CustomProDB_RPKM.tabular", "output_name": "output", "uuid": "88cfd137-3b8e-4c69-b38a-0b9ea8fea7fc"}]}, "17": {"annotation": "Converting_Genomic_SQLite_to_database_mode", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/sqlite_to_tabular/sqlite_to_tabular/2.0.0", "errors": null, "id": 17, "input_connections": {"sqlitedb": {"id": 12, "output_name": "output_genomic_mapping_sqlite"}}, "inputs": [], "label": "Converting_Genomic_SQLite_to_database_mode", "name": "SQLite to tabular", "outputs": [{"name": "query_results", "type": "tabular"}], "position": {"left": 1699.7468397400469, "top": 337.4479630839247}, "post_job_actions": {"RenameDatasetActionquery_results": {"action_arguments": {"newname": "Convert_Genomic_SQLite_to_tabular"}, "action_type": "RenameDatasetAction", "output_name": "query_results"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/sqlite_to_tabular/sqlite_to_tabular/2.0.0", "tool_shed_repository": {"changeset_revision": "b722161a845a", "name": "sqlite_to_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"sqlite\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"sqlitedb\": {\"__class__\": \"ConnectedValue\"}, \"sqlquery\": \"SELECT pro_name, chr_name, cds_chr_start - 1, cds_chr_end,strand,cds_start - 1, cds_end\\n FROM genomic_mapping\\n ORDER BY pro_name, cds_start, cds_end\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.0.0", "type": "tool", "uuid": "e5db24d8-712c-4334-998c-2946bb85bfad", "when": null, "workflow_outputs": [{"label": "Convert_Genomic_SQLite_to_tabular", "output_name": "query_results", "uuid": "91eaaad0-e93e-434b-b87c-2de0f9c232ef"}]}, "18": {"annotation": "Converting_CustomProDB_FASTA_to_tabular", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "errors": null, "id": 18, "input_connections": {"input": {"id": 12, "output_name": "output_rpkm"}}, "inputs": [], "label": "Converting_CustomProDB_FASTA_to_tabular", "name": "FASTA-to-Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1370.0676390744181, "top": 1145.0312528707605}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "CustomProDB_FASTA_to_tabular"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_to_tabular/fasta2tab/1.1.1", "tool_shed_repository": {"changeset_revision": "e7ed3c310b74", "name": "fasta_to_tabular", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"descr_columns\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"keep_first\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "bd46c339-1a55-4e3d-8aca-0abce96561c7", "when": null, "workflow_outputs": [{"label": "CustomProDB_FASTA_to_tabular", "output_name": "output", "uuid": "6bcf6e52-85a8-439e-83cc-50de2cc8749d"}]}, "19": {"annotation": "Converting_Variant_SQLite_to_database_mode", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/sqlite_to_tabular/sqlite_to_tabular/2.0.0", "errors": null, "id": 19, "input_connections": {"sqlitedb": {"id": 12, "output_name": "output_variant_annotation_sqlite"}}, "inputs": [], "label": "Converting_Variant_SQLite_to_database_mode", "name": "SQLite to tabular", "outputs": [{"name": "query_results", "type": "tabular"}], "position": {"left": 1955.5073237140923, "top": 494.36259587125755}, "post_job_actions": {"RenameDatasetActionquery_results": {"action_arguments": {"newname": "Convert_Variant_SQLite_to_tabular"}, "action_type": "RenameDatasetAction", "output_name": "query_results"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/sqlite_to_tabular/sqlite_to_tabular/2.0.0", "tool_shed_repository": {"changeset_revision": "b722161a845a", "name": "sqlite_to_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"sqlite\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"sqlitedb\": {\"__class__\": \"ConnectedValue\"}, \"sqlquery\": \"SELECT var_pro_name,pro_name,cigar,annotation\\nFROM variant_annotation\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.0.0", "type": "tool", "uuid": "d37b3574-1248-437b-ad45-a9730c24c9ef", "when": null, "workflow_outputs": [{"label": "Convert_Variant_SQLite_to_tabular", "output_name": "query_results", "uuid": "7698d1f1-7979-4deb-9ac2-0f1541d905ce"}]}, "20": {"annotation": "Translate_BED_Transcripts", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/translate_bed/translate_bed/0.1.0", "errors": null, "id": 20, "input_connections": {"input": {"id": 13, "output_name": "output"}}, "inputs": [], "label": "Translate_BED_Transcripts", "name": "Translate BED transcripts", "outputs": [{"name": "translation_bed", "type": "bed"}, {"name": "translation_fasta", "type": "fasta"}], "position": {"left": 1701.9774551941957, "top": 109.15800194287092}, "post_job_actions": {"RenameDatasetActiontranslation_bed": {"action_arguments": {"newname": "Translate_BED_Transcripts"}, "action_type": "RenameDatasetAction", "output_name": "translation_bed"}, "RenameDatasetActiontranslation_fasta": {"action_arguments": {"newname": "Translation_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "translation_fasta"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/translate_bed/translate_bed/0.1.0", "tool_shed_repository": {"changeset_revision": "038ecf54cbec", "name": "translate_bed", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"bed\", \"bed_filters\": {\"regions\": \"\", \"biotypes\": \"\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"fa_id\": {\"reference\": \"\", \"fa_db\": \"generic\", \"fa_sep\": \"\", \"id_prefix\": \"\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"ref\": {\"ref_source\": \"cached\", \"__current_case__\": 0, \"ref_loc\": \"hg38\"}, \"translations\": {\"translate\": \"cDNA_minus_CDS\", \"min_length\": \"10\", \"start_codon\": false, \"enzyme\": null}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.1.0", "type": "tool", "uuid": "61de2184-54b1-4383-b4ee-303b51f4d760", "when": null, "workflow_outputs": [{"label": "Translation_FASTA", "output_name": "translation_fasta", "uuid": "9510c804-d13e-4cd6-b08e-1c670623e296"}, {"label": "Translate_BED_Transcripts", "output_name": "translation_bed", "uuid": "4ebcd87a-1526-4c05-914d-6051b996201d"}]}, "21": {"annotation": "Annotating-INDEL", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "errors": null, "id": 21, "input_connections": {"input": {"id": 14, "output_name": "output"}}, "inputs": [], "label": "Annotating-INDEL", "name": "Column Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1700.000047697945, "top": 682.2414705939166}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Annotating-INDEL"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"checks\": [{\"__index__\": 0, \"pattern\": \"^(ENS[^_]+_\\\\d+:)([ACGTacgt]+)>([ACGTacgt]+)\\\\s*\", \"replacement\": \"\\\\1\\\\2_\\\\3\"}, {\"__index__\": 1, \"pattern\": \",([A-Y]\\\\d+[A-Y]?)\\\\s*\", \"replacement\": \".\\\\1\"}, {\"__index__\": 2, \"pattern\": \"^(ENS[^ |]*)\\\\s*\", \"replacement\": \"\\\\1\"}, {\"__index__\": 3, \"pattern\": \"^(.*)$\", \"replacement\": \"generic|INDEL_\\\\1\"}], \"field\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "5c3bbf5d-a045-4f9a-9baf-a8837f1a9a93", "when": null, "workflow_outputs": [{"label": "Annotating-INDEL", "output_name": "out_file1", "uuid": "aff83bc0-c7af-4257-a51c-93dfe90ae6c0"}]}, "22": {"annotation": "Annotating-SNV", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "errors": null, "id": 22, "input_connections": {"input": {"id": 15, "output_name": "output"}}, "inputs": [], "label": "Annotating-SNV", "name": "Column Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1699.9999853865352, "top": 836}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Annotating-SNV"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"checks\": [{\"__index__\": 0, \"pattern\": \"^(ENS[^_]+_\\\\d+:)([ACGTacgt]+)>([ACGTacgt]+)\\\\s*\", \"replacement\": \"\\\\1\\\\2_\\\\3\"}, {\"__index__\": 1, \"pattern\": \",([A-Y]\\\\d+[A-Y]?)\\\\s*\", \"replacement\": \".\\\\1\"}, {\"__index__\": 2, \"pattern\": \"^(ENS[^ |]*)\\\\s*\", \"replacement\": \"\\\\1\"}, {\"__index__\": 3, \"pattern\": \"^(.*)$\", \"replacement\": \"generic|SNV_\\\\1\"}], \"field\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "9a3398ea-e8e3-4198-98a3-3fa9ac8e59ca", "when": null, "workflow_outputs": [{"label": "Annotating-SNV", "output_name": "out_file1", "uuid": "960e7c54-1a91-4b92-b75c-0da23fe1650a"}]}, "23": {"annotation": "Annotating-RPKM", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "errors": null, "id": 23, "input_connections": {"input": {"id": 16, "output_name": "output"}}, "inputs": [], "label": "Annotating-RPKM", "name": "Column Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1699.9999853865352, "top": 990}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Annotating-RPKM"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"checks\": [{\"__index__\": 0, \"pattern\": \"^(ENS[^_]+_\\\\d+:)([ACGTacgt]+)>([ACGTacgt]+)\\\\s*\", \"replacement\": \"\\\\1\\\\2_\\\\3\"}, {\"__index__\": 1, \"pattern\": \",([A-Y]\\\\d+[A-Y]?)\\\\s*\", \"replacement\": \".\\\\1\"}, {\"__index__\": 2, \"pattern\": \"^(ENS[^ |]*)\\\\s*\", \"replacement\": \"\\\\1\"}, {\"__index__\": 3, \"pattern\": \"^(.*)$\", \"replacement\": \"generic|RPKM_\\\\1\"}], \"field\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "478fc18e-6230-48ab-b0ed-d6a37cc1b5cb", "when": null, "workflow_outputs": [{"label": "Annotating-RPKM", "output_name": "out_file1", "uuid": "ef203c53-9e5b-4120-a508-a0dcbbfaec3b"}]}, "24": {"annotation": "Annotating_Genomic_SQLite", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "errors": null, "id": 24, "input_connections": {"input": {"id": 17, "output_name": "query_results"}}, "inputs": [], "label": "Annotating_Genomic_SQLite", "name": "Column Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 1989.9999665492253, "top": 321.99997918505875}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Annotating_Genomic_SQLite"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"checks\": [{\"__index__\": 0, \"pattern\": \"^(ENS[^_]+_\\\\d+:)([ACGTacgt]+)>([ACGTacgt]+)\\\\s*\", \"replacement\": \"\\\\1\\\\2_\\\\3\"}, {\"__index__\": 1, \"pattern\": \",([A-Y]\\\\d+[A-Y]?)\\\\s*\", \"replacement\": \".\\\\1\"}, {\"__index__\": 2, \"pattern\": \"^(ENS[^ |]*)\\\\s*\", \"replacement\": \"\\\\1\"}], \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"field\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "767e1cd5-eb88-48b1-a611-dad410f41c57", "when": null, "workflow_outputs": [{"label": "Annotating_Genomic_SQLite", "output_name": "out_file1", "uuid": "045776fe-4fc9-4da1-b808-4ab674dcced4"}]}, "25": {"annotation": "Filtering_RPKM_accessions", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/filter_tabular/filter_tabular/3.3.1", "errors": null, "id": 25, "input_connections": {"input": {"id": 18, "output_name": "output"}}, "inputs": [], "label": "Filtering_RPKM_accessions", "name": "Filter Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1672.1032367017563, "top": 1135.2956637847162}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Filtering_RPKM_accessions"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/filter_tabular/filter_tabular/3.3.1", "tool_shed_repository": {"changeset_revision": "90f657745fea", "name": "filter_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"comment_char\": true, \"input\": {\"__class__\": \"ConnectedValue\"}, \"linefilters\": [{\"__index__\": 0, \"filter\": {\"filter_type\": \"select_columns\", \"__current_case__\": 9, \"columns\": \"1\"}}, {\"__index__\": 1, \"filter\": {\"filter_type\": \"replace\", \"__current_case__\": 11, \"column\": \"1\", \"regex_pattern\": \"^([^ |]+).*$\", \"regex_replace\": \"\\\\1\", \"add\": null}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.1", "type": "tool", "uuid": "52a4ebc9-1029-4c37-8c4c-53adc7da962d", "when": null, "workflow_outputs": [{"label": "Filtering_RPKM_accessions", "output_name": "output", "uuid": "d23ae9b0-374f-4668-831c-c08217161834"}]}, "26": {"annotation": "Annotating_Variant_SQLite", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "errors": null, "id": 26, "input_connections": {"input": {"id": 19, "output_name": "query_results"}}, "inputs": [], "label": "Annotating_Variant_SQLite", "name": "Column Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 2283.7187854596787, "top": 510.3229781560865}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Annotating_Variant_SQLite"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"checks\": [{\"__index__\": 0, \"pattern\": \"^(ENS[^_]+_\\\\d+:)([ACGTacgt]+)>([ACGTacgt]+)\\\\s*\", \"replacement\": \"\\\\1\\\\2_\\\\3\"}, {\"__index__\": 1, \"pattern\": \",([A-Y]\\\\d+[A-Y]?)\\\\s*\", \"replacement\": \".\\\\1\"}, {\"__index__\": 2, \"pattern\": \"^(ENS[^ |]*)\\\\s*\", \"replacement\": \"\\\\1\"}], \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"field\": \"1\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "d071bcc7-951e-4ce4-a0cf-725cd418f3f9", "when": null, "workflow_outputs": [{"label": "Annotating_Variant_SQLite", "output_name": "out_file1", "uuid": "0238c405-dbcb-40a0-b9ce-6bc091cc01c5"}]}, "27": {"annotation": "Convert_Translation_BED_to_tabular_for_protein_map", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/bed_to_protein_map/bed_to_protein_map/0.2.0", "errors": null, "id": 27, "input_connections": {"input": {"id": 20, "output_name": "translation_bed"}}, "inputs": [], "label": "Convert_Translation_BED_to_tabular_for_protein_map", "name": "bed to protein map", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 2122.6804386774656, "top": 47.272708254414795}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Translation_tabular_for_protein_map"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/bed_to_protein_map/bed_to_protein_map/0.2.0", "tool_shed_repository": {"changeset_revision": "a7c58b43cbaa", "name": "bed_to_protein_map", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"bed\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.2.0", "type": "tool", "uuid": "f7ab95f3-8a94-49b0-b1a5-1764a2337a06", "when": null, "workflow_outputs": [{"label": "Translation_tabular_for_protein_map", "output_name": "output", "uuid": "873fc597-ac53-4dfa-8f5a-d5532af8886c"}]}, "28": {"annotation": "Converting_Annotated_Indel_to_FASTA", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "errors": null, "id": 28, "input_connections": {"input": {"id": 21, "output_name": "out_file1"}}, "inputs": [], "label": "Converting_Annotated_Indel_to_FASTA", "name": "Tabular-to-FASTA", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 1989.7468693076564, "top": 682.2531543064655}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Converting_Annotated_Indel_to_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "tool_shed_repository": {"changeset_revision": "0a7799698fe5", "name": "tabular_to_fasta", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"2\", \"title_col\": [\"1\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "c35300a9-31c3-4420-8e90-13bdd047cb3a", "when": null, "workflow_outputs": [{"label": "Converting_Annotated_Indel_to_FASTA", "output_name": "output", "uuid": "ae642819-14c2-46d9-a31f-0b142c074a8c"}]}, "29": {"annotation": "Converting_Annotated_SNV_to_FASTA", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "errors": null, "id": 29, "input_connections": {"input": {"id": 22, "output_name": "out_file1"}}, "inputs": [], "label": "Converting_Annotated_SNV_to_FASTA", "name": "Tabular-to-FASTA", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 1989.9999853865352, "top": 836}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Converting_Annotated_SNV_to_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "tool_shed_repository": {"changeset_revision": "0a7799698fe5", "name": "tabular_to_fasta", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"2\", \"title_col\": [\"1\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "fd1c3ef9-1a68-4879-ba47-1c7c66887790", "when": null, "workflow_outputs": [{"label": "Converting_Annotated_SNV_to_FASTA", "output_name": "output", "uuid": "49e24e6a-ae3e-463c-915b-65f895a471e4"}]}, "30": {"annotation": "Converting_Annotated_RPKM_to_FASTA", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "errors": null, "id": 30, "input_connections": {"input": {"id": 23, "output_name": "out_file1"}}, "inputs": [], "label": "Converting_Annotated_RPKM_to_FASTA", "name": "Tabular-to-FASTA", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 1989.9999853865352, "top": 990}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Converting_Annotated_RPKM_to_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "tool_shed_repository": {"changeset_revision": "0a7799698fe5", "name": "tabular_to_fasta", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"2\", \"title_col\": [\"1\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "d633cca6-66cc-40b4-bbd4-9426d449cd66", "when": null, "workflow_outputs": [{"label": "Converting_Annotated_RPKM_to_FASTA", "output_name": "output", "uuid": "0969ba6f-4bd8-4464-94cc-c990cf395507"}]}, "31": {"annotation": "Not needed for Fragpipe or MaxQuant", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_cat/9.3+galaxy1", "errors": null, "id": 31, "input_connections": {"inputs": [{"id": 8, "output_name": "output"}, {"id": 25, "output_name": "output"}]}, "inputs": [], "label": "Concatenate_HUMAN_Crap_protein-accessions", "name": "Concatenate datasets", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 2040.5311136293037, "top": 1332.1822204349364}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Concatenate_HUMAN_Crap_protein-accessions"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_cat/9.3+galaxy1", "tool_shed_repository": {"changeset_revision": "86755160afbf", "name": "text_processing", "owner": "bgruening", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"inputs\": {\"__class__\": \"ConnectedValue\"}, \"queries\": [], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "9.3+galaxy1", "type": "tool", "uuid": "fca9fcfe-43be-4665-9e63-4b7f3c5379e8", "when": null, "workflow_outputs": [{"label": "Concatenate_HUMAN_Crap_protein-accessions", "output_name": "out_file1", "uuid": "e3f2a023-888a-4d80-b16c-ea87adbc6588"}]}, "32": {"annotation": "Variant_input_for_MVP", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 32, "input_connections": {"tables_0|table": {"id": 26, "output_name": "out_file1"}}, "inputs": [], "label": "Variant_input_for_MVP", "name": "Query Tabular", "outputs": [{"name": "sqlitedb", "type": "sqlite"}], "position": {"left": 2548.7669361283706, "top": 490.01309046421}, "post_job_actions": {"RenameDatasetActionsqlitedb": {"action_arguments": {"newname": "Variant_input_for_MVP"}, "action_type": "RenameDatasetAction", "output_name": "sqlitedb"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"add_to_database\": {\"withdb\": null}, \"addqueries\": {\"queries\": []}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"yes\", \"__current_case__\": 0, \"header_prefix\": \"35\"}, \"save_db\": true, \"sqlquery\": \"\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"variant_annotation\", \"column_names_from_first_line\": false, \"col_names\": \"name,reference,cigar,annotation\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": [{\"__index__\": 0, \"unique\": false, \"index_columns\": \"name,cigar\"}]}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "806e9bc5-65f2-4d08-a931-5f56cbb2343b", "when": null, "workflow_outputs": [{"label": "Variant_input_for_MVP", "output_name": "sqlitedb", "uuid": "ef638bf1-2cd7-4c4f-acc7-0fc5ce771756"}]}, "33": {"annotation": "Concatenate_databases_from_Genomic_SQlite_and_translation_BED_file", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_cat/9.3+galaxy1", "errors": null, "id": 33, "input_connections": {"inputs": [{"id": 24, "output_name": "out_file1"}, {"id": 27, "output_name": "output"}]}, "inputs": [], "label": "Concatenate_databases_from_Genomic_SQlite_and_translation_BED_file", "name": "Concatenate datasets", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 2280.4843848686173, "top": 239.71359026531047}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Concatenate_databases_from_Genomic_SQlite_and_translation_BED_file"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_cat/9.3+galaxy1", "tool_shed_repository": {"changeset_revision": "86755160afbf", "name": "text_processing", "owner": "bgruening", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"inputs\": {\"__class__\": \"ConnectedValue\"}, \"queries\": [], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "9.3+galaxy1", "type": "tool", "uuid": "0c558f24-d7bc-4252-837c-103bab10cfe3", "when": null, "workflow_outputs": [{"label": "Concatenate_databases_from_Genomic_SQlite_and_translation_BED_file", "output_name": "out_file1", "uuid": "77671f49-26c7-4eeb-b38c-62f09933e99b"}]}, "34": {"annotation": "Merge_Indel_SNV_RPKM_to_make_Non_normal_CustomProDB_FASTA", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0", "errors": null, "id": 34, "input_connections": {"batchmode|input_fastas_0|input_fasta": {"id": 30, "output_name": "output"}, "batchmode|input_fastas_1|input_fasta": {"id": 29, "output_name": "output"}, "batchmode|input_fastas_2|input_fasta": {"id": 28, "output_name": "output"}}, "inputs": [], "label": "Merge_Indel_SNV_RPKM_to_make_Non_normal_CustomProDB_FASTA", "name": "FASTA Merge Files and Filter Unique Sequences", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 2287.481324251687, "top": 823.08471448979}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "non-reference_CustomProDB_FASTA"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0", "tool_shed_repository": {"changeset_revision": "f546e7278f04", "name": "fasta_merge_files_and_filter_unique_sequences", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"fasta\", \"accession_parser\": \"^>([^ ]+).*$\", \"batchmode\": {\"processmode\": \"individual\", \"__current_case__\": 0, \"input_fastas\": [{\"__index__\": 0, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 2, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}]}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"uniqueness_criterion\": \"sequence\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.2.0", "type": "tool", "uuid": "d9183593-8749-41ec-88a9-0ab0b25fa371", "when": null, "workflow_outputs": [{"label": "non-reference_CustomProDB_FASTA", "output_name": "output", "uuid": "81e3ae85-7111-4517-a9fe-64f245654ad4"}]}, "35": {"annotation": "Genomic_input_for_MVP", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 35, "input_connections": {"tables_0|table": {"id": 33, "output_name": "out_file1"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Genomic_input_for_MVP", "name": "Query Tabular", "outputs": [{"name": "sqlitedb", "type": "sqlite"}], "position": {"left": 2544.3295594665274, "top": 280.2738982833467}, "post_job_actions": {"RenameDatasetActionsqlitedb": {"action_arguments": {"newname": "Genomic_input_for_MVP"}, "action_type": "RenameDatasetAction", "output_name": "sqlitedb"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": true, \"sqlquery\": \"\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"feature_cds_map\", \"column_names_from_first_line\": false, \"col_names\": \"name,chrom,start,end,strand,cds_start,cds_end\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": [{\"__index__\": 0, \"unique\": false, \"index_columns\": \"name,cds_start,cds_end\"}]}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "71ae5980-08b0-470b-b23a-ab4e12360e1c", "when": null, "workflow_outputs": [{"label": "Genomic_input_for_MVP", "output_name": "sqlitedb", "uuid": "d72ce75b-6ee3-4bcb-b829-f3c85b3c5a65"}]}, "36": {"annotation": "Human + crap + non-reference transcripts dB generation", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0", "errors": null, "id": 36, "input_connections": {"batchmode|input_fastas_0|input_fasta": {"id": 3, "output_name": "output"}, "batchmode|input_fastas_1|input_fasta": {"id": 34, "output_name": "output"}, "batchmode|input_fastas_2|input_fasta": {"id": 20, "output_name": "translation_fasta"}}, "inputs": [], "label": "Human_cRAP_Non_normal_transcripts_dB generation", "name": "FASTA Merge Files and Filter Unique Sequences", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 2342.2810653459683, "top": 1220.0364245096291}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Human_cRAP_Non_normal_transcripts_dB generation"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/fasta_merge_files_and_filter_unique_sequences/fasta_merge_files_and_filter_unique_sequences/1.2.0", "tool_shed_repository": {"changeset_revision": "f546e7278f04", "name": "fasta_merge_files_and_filter_unique_sequences", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"__input_ext\": \"fasta\", \"accession_parser\": \"^>([^ ]+).*$\", \"batchmode\": {\"processmode\": \"individual\", \"__current_case__\": 0, \"input_fastas\": [{\"__index__\": 0, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 2, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}]}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/hg38.len\", \"uniqueness_criterion\": \"sequence\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.2.0", "type": "tool", "uuid": "343f4889-a82e-4400-a9a4-e3488589782a", "when": null, "workflow_outputs": [{"label": "Human_cRAP_Non_normal_transcripts_dB generation", "output_name": "output", "uuid": "400380de-2047-4c30-a9e4-320a19489be4"}]}}, "tags": ["name:neoantigen"], "uuid": "0c01260b-203d-4506-9616-8bbf8f4517e0", "version": 0} \ No newline at end of file diff --git a/topics/proteomics/tutorials/neoantigen-peptide-verification/tutorial.md b/topics/proteomics/tutorials/neoantigen-peptide-verification/tutorial.md index 8ea11124132b17..176421778223d0 100644 --- a/topics/proteomics/tutorials/neoantigen-peptide-verification/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-peptide-verification/tutorial.md @@ -170,10 +170,10 @@ PepQuery2 is a tool used to validate novel peptides and proteins by searching ma > - *"Precursor Unit"*: `ppm` > - In *"PSM"*: > - *"Fragmentation Method"*: `CID/HCD` -> - *"Minimum Charge"*: `2` +> - *"Minimum Charge"*: `1` > - *"Maximum Charge"*: `3` -> - *"Minimum length of peptide"*: `8` -> - *"Maximum length of peptide"*: `9` +> - *"Minimum length of peptide"*: `9` +> - *"Maximum length of peptide"*: `11` > - *"Use fast mode for searching"*: `Yes` > > @@ -241,8 +241,8 @@ Tabular-to-FASTA is a tool used to convert tabular data into the FASTA format, c > > 1. {% tool [Tabular-to-FASTA](toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1) %} with the following parameters: > - {% icon param-file %} *"Tab-delimited file"*: `output` (output of **Query Tabular** {% icon tool %}) -> - *"Title column(s)"*: `c['2']` -> - *"Sequence column"*: `c1` +> - *"Title column(s)"*: `2` +> - *"Sequence column"*: `1` > > {: .hands_on} @@ -378,11 +378,9 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > Running the Workflow > -> 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/main_workflow.ga" title="PepQuery2 Verification" %} -> -> +> 1. **Import the workflow** into Galaxy: +> - (Neoantigen Peptide Verification)[https://tinyurl.com/ipepgen-pepquery-wf] +> > 2. Run **Workflow** {% icon workflow %} using the following parameters: > - *"Send results to a new history"*: `No` > - {% icon param-file %} *"Candidate_Neoantigens"*: `NeoAntigen-Candidates.tabular` @@ -391,6 +389,13 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > - {% icon param-file %} *"Human Taxonomy ID"*: `Human-TaxID.txt` > > {% snippet faqs/galaxy/workflows_run.md %} +> +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} +> > {: .hands_on} diff --git a/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/index.md b/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/index.md index efccc1deb87069..cdf8f5076a14c1 100644 --- a/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/index.md @@ -3,4 +3,5 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-4-peptide-verification/workflows/ + - /topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/main_workflow.ga deleted file mode 100644 index 2a2c608e41a1d2..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-peptide-verification/workflows/main_workflow.ga +++ /dev/null @@ -1 +0,0 @@ -{"a_galaxy_workflow": "true", "annotation": "Validate the NeoAntigen Candidates from FragPipe discovery through the PepQuery Novel search", "comments": [], "creator": [{"class": "Organization", "name": "GalaxyP"}], "format-version": "0.1", "license": "GPL-3.0-or-later", "name": "GigaScience_PepQuery2_demonstration_STS26T_neoantigen_candidates_workflow", "report": {"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"}, "steps": {"0": {"annotation": "Candidate Neoantigens from Fragpipe Workflow", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "Candidate Neoantigens from Fragpipe Workflow", "name": "Candidate_Neoantigens"}], "label": "Candidate_Neoantigens", "name": "Input dataset", "outputs": [], "position": {"left": 0, "top": 0}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "60cb8414-561d-4004-a2e2-8b90f51bb7e4", "when": null, "workflow_outputs": []}, "1": {"annotation": "Known human and contaminant protein fasta database", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "Known human and contaminant protein fasta database", "name": "HUMAN_Uniprot_and_CRAP.fasta"}], "label": "HUMAN_Uniprot_and_CRAP.fasta", "name": "Input dataset", "outputs": [], "position": {"left": 1.47265625, "top": 113.1015625}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "dcb42406-0659-4ed1-be73-0d3437f586ad", "when": null, "workflow_outputs": []}, "2": {"annotation": "RAW File", "content_id": null, "errors": null, "id": 2, "input_connections": {}, "inputs": [{"description": "RAW File", "name": "Input Raw-files"}], "label": "Input Raw-files", "name": "Input dataset", "outputs": [], "position": {"left": 12.6328125, "top": 330.1953125}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "290d38c4-dfbe-4009-89d8-701aaed023d2", "when": null, "workflow_outputs": []}, "3": {"annotation": "this is for blastP taxid restrictions- homo sapiens-9606", "content_id": null, "errors": null, "id": 3, "input_connections": {}, "inputs": [{"description": "this is for blastP taxid restrictions- homo sapiens-9606", "name": "Human-TAX-ID"}], "label": "Human-TAX-ID", "name": "Input dataset", "outputs": [], "position": {"left": 663.48828125, "top": 210.65625}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "e579dcc6-3daf-41ba-81e4-9b40f32e70ee", "when": null, "workflow_outputs": []}, "4": {"annotation": "Converting Raw to MGF", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/msconvert/msconvert/3.0.20287.2", "errors": null, "id": 4, "input_connections": {"input": {"id": 2, "output_name": "output"}}, "inputs": [], "label": "Converting_RAW_to_MGF", "name": "msconvert", "outputs": [{"name": "output", "type": "mzml"}], "position": {"left": 47.16005404527099, "top": 546.0879848438761}, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/msconvert/msconvert/3.0.20287.2", "tool_shed_repository": {"changeset_revision": "6153e8ada1ee", "name": "msconvert", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"data_processing\": {\"peak_picking\": {\"pick_peaks\": true, \"__current_case__\": 1, \"pick_peaks_ms_levels\": \"1-\", \"pick_peaks_algorithm\": \"vendor\"}, \"precursor_refinement\": {\"use_mzrefinement\": false, \"__current_case__\": 0}, \"charge_state_calculation\": {\"charge_state_calculation_method\": \"false\", \"__current_case__\": 0}, \"thresholds\": [], \"filter_mz_windows\": {\"do_mzwindow_filter\": false, \"__current_case__\": 0}, \"etd_filtering\": {\"do_etd_filtering\": false, \"__current_case__\": 0}, \"ms2denoise\": {\"denoise\": false, \"__current_case__\": 1}, \"ms2deisotope\": false, \"demultiplex\": {\"demultiplex_on\": false, \"__current_case__\": 1}}, \"filtering\": {\"activation\": \"false\", \"indices\": [], \"scan_numbers\": [], \"strip_it\": false, \"filter_ms_levels\": {\"do_ms_level_filter\": false, \"__current_case__\": 0}, \"polarity\": \"false\", \"analyzer\": \"false\"}, \"general_options\": {\"combineIonMobilitySpectra\": false, \"scan_summing\": {\"do_scan_summing\": false, \"__current_case__\": 0}, \"simAsSpectra\": false, \"srmAsSpectra\": false, \"acceptZeroLengthSpectra\": false, \"ignoreUnknownInstrumentError\": false, \"multi_run_output\": {\"do_multi_run_output\": false, \"__current_case__\": 0, \"runIndexSet\": \"0\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"license_agreement\": true, \"output_type\": \"mgf\", \"settings\": {\"mz_encoding\": \"64\", \"intensity_encoding\": \"32\", \"binary_compression\": \"zlib\", \"gzip_compression\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.0.20287.2", "type": "tool", "uuid": "d654a201-dad0-47ce-b007-9f109e691ff2", "when": null, "workflow_outputs": []}, "5": {"annotation": "PepQuery against HUMAN and contaminants to know filter peptides that doesn't match to anything in the known Uniprot database", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery2/pepquery2/2.0.2+galaxy2", "errors": null, "id": 5, "input_connections": {"req_inputs|db_type|db_file": {"id": 1, "output_name": "output"}, "req_inputs|input_type|multiple|input": {"id": 0, "output_name": "output"}, "req_inputs|ms_dataset|spectrum_files": {"id": 4, "output_name": "output"}}, "inputs": [], "label": "PepQuery2", "name": "PepQuery2", "outputs": [{"name": "log_txt", "type": "txt"}, {"name": "psm_rank_txt", "type": "tabular"}, {"name": "ptm_detail_txt", "type": "tabular"}], "position": {"left": 346.6796875, "top": 13.5}, "post_job_actions": {"ChangeDatatypeActionpsm_rank_txt": {"action_arguments": {"newtype": "tabular"}, "action_type": "ChangeDatatypeAction", "output_name": "psm_rank_txt"}, "RenameDatasetActionpsm_rank_txt": {"action_arguments": {"newname": "PepQuery_psm_rank_txt"}, "action_type": "RenameDatasetAction", "output_name": "psm_rank_txt"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery2/pepquery2/2.0.2+galaxy2", "tool_shed_repository": {"changeset_revision": "c32806a80862", "name": "pepquery2", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"digestion\": {\"enzyme\": \"0\", \"max_missed_cleavages\": null}, \"fast\": true, \"modifications\": {\"fixed_mod\": [\"0\"], \"var_mod\": [\"2\"], \"max_mods\": null, \"unmodified\": false, \"aa\": false}, \"ms_params\": {\"tolerance_params\": {\"precursor_tolerance\": null, \"precursor_unit\": \"ppm\", \"tolerance\": null}, \"search\": {\"frag_method\": \"1\", \"scoring_method\": null, \"extra_score_validation\": false, \"min_charge\": \"2\", \"max_charge\": \"3\", \"min_peaks\": null, \"isotope_error\": \"\", \"min_score\": null, \"min_length\": \"8\", \"max_length\": \"9\", \"num_random_peptides\": null}}, \"outputs_selected\": [\"psm_rank.txt\", \"ptm_detail.txt\"], \"parameter_set\": \"\", \"req_inputs\": {\"input_type\": {\"input_type_selector\": \"peptide\", \"__current_case__\": 0, \"multiple\": {\"peptide_input_selector\": \"multiple\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}}}, \"db_type\": {\"db_type_selector\": \"history\", \"__current_case__\": 0, \"db_file\": {\"__class__\": \"ConnectedValue\"}}, \"ms_dataset\": {\"ms_dataset_type\": \"history\", \"__current_case__\": 0, \"spectrum_files\": {\"__class__\": \"ConnectedValue\"}}, \"indexType\": \"2\"}, \"validation\": {\"task_type\": \"novel\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.0.2+galaxy2", "type": "tool", "uuid": "104e3c02-2fa4-4e15-96ad-9159cee7c414", "when": null, "workflow_outputs": [{"label": "PepQuery_psm_rank_txt", "output_name": "psm_rank_txt", "uuid": "22b213c0-f1b4-4a89-9ee5-c32b72bd8715"}]}, "6": {"annotation": "Extract Peptides with the confident column as \"Yes\"", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 6, "input_connections": {"tables_0|table": {"id": 5, "output_name": "psm_rank_txt"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "PepQuery_Validated_Peptides", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 356.89168981860706, "top": 336.40387311186873}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "PepQuery_Validated_Peptides"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT c1,c4\\nFROM t1\\nWHERE (c20 = 'Yes')\\n\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "de4476bf-a724-40d5-b0a0-4fd1f01e1ea5", "when": null, "workflow_outputs": [{"label": "PepQuery_Validated_Peptides", "output_name": "output", "uuid": "3bbac43a-16eb-48d0-9144-8b86ee735cb0"}]}, "7": {"annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "errors": null, "id": 7, "input_connections": {"input": {"id": 6, "output_name": "output"}}, "inputs": [], "label": "Getting-blast-ready", "name": "Tabular-to-FASTA", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 659.9046752732726, "top": 7.0390625}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "BLAST-P-READY-Peptides"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "tool_shed_repository": {"changeset_revision": "0a7799698fe5", "name": "tabular_to_fasta", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"1\", \"title_col\": [\"2\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "152cce01-4d55-4f63-9cb5-f793289bd2fd", "when": null, "workflow_outputs": [{"label": "BLAST-P-READY-Peptides", "output_name": "output", "uuid": "62cd5a4c-20a3-4e14-9aac-12702b085b3b"}]}, "8": {"annotation": "BLAST-Protein against swissprot, ref seq and NCBI NR and filtering for HUMAN only results", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastp_wrapper/2.14.1+galaxy2", "errors": null, "id": 8, "input_connections": {"adv_opts|adv_optional_id_files_opts|taxidlist": {"id": 3, "output_name": "output"}, "query": {"id": 7, "output_name": "output"}}, "inputs": [], "label": "BLAST-Protein", "name": "NCBI BLAST+ blastp", "outputs": [{"name": "output1", "type": "tabular"}], "position": {"left": 921.41796875, "top": 49.33984375}, "post_job_actions": {"RenameDatasetActionoutput1": {"action_arguments": {"newname": "BlastP-output"}, "action_type": "RenameDatasetAction", "output_name": "output1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastp_wrapper/2.14.1+galaxy2", "tool_shed_repository": {"changeset_revision": "cbf3f518b668", "name": "ncbi_blast_plus", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"adv_opts\": {\"adv_opts_selector\": \"advanced\", \"__current_case__\": 1, \"filter_query\": false, \"matrix_gapcosts\": {\"matrix\": \"PAM30\", \"__current_case__\": 8, \"gap_costs\": \"-gapopen 9 -gapextend 1\"}, \"max_hits\": \"0\", \"max_hsps\": \"1\", \"word_size\": \"2\", \"window_size\": \"15\", \"threshold\": \"16\", \"comp_based_stats\": \"0\", \"parse_deflines\": false, \"adv_optional_id_files_opts\": {\"adv_optional_id_files_opts_selector\": \"taxidlist\", \"__current_case__\": 4, \"taxidlist\": {\"__class__\": \"ConnectedValue\"}}, \"qcov_hsp_perc\": \"0.0\", \"use_sw_tback\": false}, \"blast_type\": \"blastp-short\", \"db_opts\": {\"db_opts_selector\": \"db\", \"__current_case__\": 0, \"database\": [\"nr_2023-09-03\"], \"histdb\": \"\", \"subject\": \"\"}, \"evalue_cutoff\": \"200000.0\", \"output\": {\"out_format\": \"ext\", \"__current_case__\": 1}, \"query\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.14.1+galaxy2", "type": "tool", "uuid": "c5323322-0b52-4ec5-8053-e9fece6a7de3", "when": null, "workflow_outputs": [{"label": "BlastP-output", "output_name": "output1", "uuid": "69ed808c-2254-4808-bbf3-140d87279ebf"}]}, "9": {"annotation": "Extracting Novel peptides after BlastP", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 9, "input_connections": {"tables_0|table": {"id": 8, "output_name": "output1"}, "tables_1|table": {"id": 6, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Novel_Peptides_from_PepQuery", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 921.34375, "top": 308.70703125}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Novel_Peptides_from_PepQuery"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT DISTINCT pep.*\\nFROM pep \\nJOIN blast ON pep.pep = blast.qseq\\nWHERE pep.pep NOT IN (\\n SELECT qseq \\n FROM blast \\n WHERE pident = 100\\n)\\nAND (blast.pident < 100 \\n OR blast.gapopen >= 1 \\n OR blast.length < blast.qlen)\\nORDER BY pep.pep\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"blast\", \"column_names_from_first_line\": false, \"col_names\": \"qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,sallseqid,score,nident,positive,gaps,ppos,qframe,sframe,qseq,sseq,qlen,slen,salltitles\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": [{\"__index__\": 0, \"unique\": false, \"index_columns\": \"qseqid\"}]}}, {\"__index__\": 1, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"pep\", \"column_names_from_first_line\": false, \"col_names\": \"pep,seq\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": [{\"__index__\": 0, \"unique\": false, \"index_columns\": \"pep\"}]}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "07fa3e8e-15c4-463d-a3f6-e229c495e1d8", "when": null, "workflow_outputs": [{"label": "Novel_Peptides_from_PepQuery", "output_name": "output", "uuid": "543e7ef1-069a-44b6-97d2-53884fef62c8"}]}}, "tags": ["name:neoantigen"], "uuid": "cc4417c9-ef45-4932-bad4-8d1f193e3d85", "version": 0} \ No newline at end of file diff --git a/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/tutorial.md b/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/tutorial.md index ba1bb282dcb1ed..b747ce8b74c394 100644 --- a/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/tutorial.md @@ -311,10 +311,9 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > Running the Workflow > -> 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/main_workflow.ga" title="HLA Binding Prediction of Verified Candidates" %} > +> 1. **Import the workflow** into Galaxy: +> - (Neoantigen HLA Genotyping)[https://tinyurl.com/ipepgen-hla-genotyping-wf] > > 2. Run **Workflow** {% icon workflow %} using the following parameters: > - *"Send results to a new history"*: `No` @@ -323,6 +322,14 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > > {% snippet faqs/galaxy/workflows_run.md %} > +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} +> +> +> {: .hands_on} # Are you feeling adventurous? ✨ diff --git a/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/index.md b/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/index.md index 9ef384ee151af1..c106c46e6c8724 100644 --- a/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/index.md @@ -3,4 +3,5 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-6-predicting-hla-binding/workflows/ + - /topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/main_workflow.ga deleted file mode 100644 index d6702ae7fe2428..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-predicting-hla-binding/workflows/main_workflow.ga +++ /dev/null @@ -1 +0,0 @@ -{"a_galaxy_workflow": "true", "annotation": "Prediction of HLA binding for verified candidates", "comments": [], "creator": [{"class": "Organization", "name": "galaxyp"}], "format-version": "0.1", "license": "GPL-3.0-or-later", "name": "GigaScience-RNAseq-Optitype-seq2HLA-to-IEDB-alleles", "report": {"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"}, "steps": {"0": {"annotation": "Paired RNAseq forward reads.", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "Paired RNAseq forward reads.", "name": "R1.fastq"}], "label": "R1.fastq", "name": "Input dataset", "outputs": [], "position": {"left": 0, "top": 172.50329230957024}, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\", \"fastqsanger.gz\"], \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "0db8cd2f-9c00-4c93-96fd-1b756cc83c66", "when": null, "workflow_outputs": []}, "1": {"annotation": "Paired RNAseq reverse reads.", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "Paired RNAseq reverse reads.", "name": "R2.fastq"}], "label": "R2.fastq", "name": "Input dataset", "outputs": [], "position": {"left": 3.6874771118164062, "top": 283.5241243713378}, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\", \"fastqsanger.gz\"], \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "6c668770-34f7-4091-a830-a68030244843", "when": null, "workflow_outputs": []}, "2": {"annotation": "HLA genotyping output", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/optitype/optitype/1.3.5+galaxy0", "errors": null, "id": 2, "input_connections": {"fastq_input|fastq_input1": {"id": 0, "output_name": "output"}, "fastq_input|fastq_input2": {"id": 1, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool OptiType", "name": "fastq_input"}, {"description": "runtime parameter for tool OptiType", "name": "fastq_input"}], "label": "Optitype", "name": "OptiType", "outputs": [{"name": "coverage_plot", "type": "pdf"}, {"name": "result", "type": "tabular"}], "position": {"left": 501.484375, "top": 0}, "post_job_actions": {"RenameDatasetActioncoverage_plot": {"action_arguments": {"newname": "OptiType_on_input_datasets_coverage_plot"}, "action_type": "RenameDatasetAction", "output_name": "coverage_plot"}, "RenameDatasetActionresult": {"action_arguments": {"newname": "OptiType_on_input_datasets_result"}, "action_type": "RenameDatasetAction", "output_name": "result"}, "TagDatasetActioncoverage_plot": {"action_arguments": {"tags": "#Optitype"}, "action_type": "TagDatasetAction", "output_name": "coverage_plot"}, "TagDatasetActionresult": {"action_arguments": {"tags": "#Optitype"}, "action_type": "TagDatasetAction", "output_name": "result"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/optitype/optitype/1.3.5+galaxy0", "tool_shed_repository": {"changeset_revision": "fd974c5df8bc", "name": "optitype", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"beta\": null, \"enumerations\": \"3\", \"fastq_input\": {\"fastq_input_selector\": \"paired\", \"__current_case__\": 0, \"fastq_input1\": {\"__class__\": \"ConnectedValue\"}, \"fastq_input2\": {\"__class__\": \"ConnectedValue\"}}, \"read_type\": \"--rna\", \"solver\": \"glpk\", \"unpaired_weight\": \"0.0\", \"use_discordant\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.3.5+galaxy0", "type": "tool", "uuid": "fa062606-0731-448f-825d-be633335c923", "when": null, "workflow_outputs": [{"label": "OptiType_on_input_datasets_result", "output_name": "result", "uuid": "cce6a623-1f69-47f9-af17-f6d147bc6494"}, {"label": "OptiType_on_input_datasets_coverage_plot", "output_name": "coverage_plot", "uuid": "c3aeb7f3-89e2-4b9b-b8af-481955d7de61"}]}, "3": {"annotation": "HLA genotyping output", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seq2hla/seq2hla/2.3+galaxy0", "errors": null, "id": 3, "input_connections": {"fastq_input|fastq_input1": {"id": 0, "output_name": "output"}, "fastq_input|fastq_input2": {"id": 1, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool seq2HLA", "name": "fastq_input"}, {"description": "runtime parameter for tool seq2HLA", "name": "fastq_input"}], "label": "seq2HLA", "name": "seq2HLA", "outputs": [{"name": "seq2hla_log", "type": "txt"}, {"name": "c1_genotype2digits", "type": "tabular"}, {"name": "c1_genotype4digits", "type": "tabular"}, {"name": "c2_genotype4digits", "type": "tabular"}, {"name": "c1_expression", "type": "tabular"}, {"name": "c2_expression", "type": "tabular"}, {"name": "ambiguity", "type": "txt"}], "position": {"left": 517.2152581356928, "top": 370.18557425006134}, "post_job_actions": {"HideDatasetActionc1_expression": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "c1_expression"}, "HideDatasetActionc1_genotype2digits": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "c1_genotype2digits"}, "HideDatasetActionc2_expression": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "c2_expression"}, "HideDatasetActionc2_genotype4digits": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "c2_genotype4digits"}, "HideDatasetActionseq2hla_log": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "seq2hla_log"}, "RenameDatasetActionc1_genotype4digits": {"action_arguments": {"newname": "seq2HLA_genotype_4digits"}, "action_type": "RenameDatasetAction", "output_name": "c1_genotype4digits"}, "TagDatasetActionambiguity": {"action_arguments": {"tags": "#seq2HLA"}, "action_type": "TagDatasetAction", "output_name": "ambiguity"}, "TagDatasetActionc1_expression": {"action_arguments": {"tags": "#seq2HLA"}, "action_type": "TagDatasetAction", "output_name": "c1_expression"}, "TagDatasetActionc1_genotype4digits": {"action_arguments": {"tags": "#seq2HLA"}, "action_type": "TagDatasetAction", "output_name": "c1_genotype4digits"}, "TagDatasetActionc2_expression": {"action_arguments": {"tags": "#seq2HLA"}, "action_type": "TagDatasetAction", "output_name": "c2_expression"}, "TagDatasetActionc2_genotype4digits": {"action_arguments": {"tags": "#seq2HLA"}, "action_type": "TagDatasetAction", "output_name": "c2_genotype4digits"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seq2hla/seq2hla/2.3+galaxy0", "tool_shed_repository": {"changeset_revision": "86d0ce0560a1", "name": "seq2hla", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"fastq_input\": {\"fastq_input_selector\": \"paired\", \"__current_case__\": 0, \"fastq_input1\": {\"__class__\": \"ConnectedValue\"}, \"fastq_input2\": {\"__class__\": \"ConnectedValue\"}}, \"run_name\": \"STS26TGen\", \"trim\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.3+galaxy0", "type": "tool", "uuid": "8ce88cf1-03d5-40c0-9b17-1ce3ab93ec1e", "when": null, "workflow_outputs": [{"label": "seq2HLA_genotype_4digits", "output_name": "c1_genotype4digits", "uuid": "be8e1b19-4a02-4d37-b716-355aac3466d6"}]}, "4": {"annotation": "reformatting the data to make it easy to interpret", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", "errors": null, "id": 4, "input_connections": {"infile": {"id": 2, "output_name": "result"}}, "inputs": [], "label": "Reformated-Optitype-Data", "name": "Text reformatting", "outputs": [{"name": "outfile", "type": "input"}], "position": {"left": 749.984375, "top": 145.35746406249993}, "post_job_actions": {"HideDatasetActionoutfile": {"action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "outfile"}, "RenameDatasetActionoutfile": {"action_arguments": {"newname": "Reformated-Optitype-Data"}, "action_type": "RenameDatasetAction", "output_name": "outfile"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", "tool_shed_repository": {"changeset_revision": "86755160afbf", "name": "text_processing", "owner": "bgruening", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"code\": \"$1 ~ /[0-9]/{ \\n for (i = 2; i <=7; i++) { allele[$i]++}\\n}\\nEND {\\n for (i in allele) {\\n print i\\n }\\n}\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "9.3+galaxy1", "type": "tool", "uuid": "201675b0-58d0-418a-b378-ce746a612b85", "when": null, "workflow_outputs": [{"label": "Reformated-Optitype-Data", "output_name": "outfile", "uuid": "b359062d-bb66-4ad6-a4fe-a80c71671a6a"}]}, "5": {"annotation": "extract IEDB alleles", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 5, "input_connections": {"tables_0|table": {"id": 4, "output_name": "outfile"}, "tables_1|table": {"id": 3, "output_name": "c1_genotype4digits"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "IEDB_alleles", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}, {"name": "output1", "type": "tabular"}, {"name": "output2", "type": "tabular"}], "position": {"left": 1030.6875, "top": 305.6074640624999}, "post_job_actions": {"RemoveTagDatasetActionoutput1": {"action_arguments": {"tags": "#seq2HLA"}, "action_type": "RemoveTagDatasetAction", "output_name": "output1"}, "RemoveTagDatasetActionoutput2": {"action_arguments": {"tags": "#Optitype"}, "action_type": "RemoveTagDatasetAction", "output_name": "output2"}, "RenameDatasetActionoutput": {"action_arguments": {"newname": "IEDB-Optitype-seq2HLA-alleles"}, "action_type": "RenameDatasetAction", "output_name": "output"}, "RenameDatasetActionoutput1": {"action_arguments": {"newname": "IEDB_Optitype_alleles"}, "action_type": "RenameDatasetAction", "output_name": "output1"}, "RenameDatasetActionoutput2": {"action_arguments": {"newname": "IEDB_seq2HLA_alleles"}, "action_type": "RenameDatasetAction", "output_name": "output2"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": [{\"__index__\": 0, \"sqlquery\": \"SELECT c1 FROM optitype\\nORDER BY c1\", \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}}, {\"__index__\": 1, \"sqlquery\": \"SELECT c1 FROM seq2hla WHERE c1 LIKE '%*%:%' \\nUNION \\nSELECT c2 FROM seq2hla WHERE c2 LIKE '%*%:%'\", \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}}]}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT hla\\nFROM\\n(SELECT c1 AS \\\"hla\\\" FROM optitype\\nUNION\\nSELECT c1 AS \\\"hla\\\" FROM seq2hla WHERE c1 LIKE '%*%:%'\\nUNION \\nSELECT c2 AS \\\"hla\\\" FROM seq2hla WHERE c2 LIKE '%*%:%') \\nORDER BY hla\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": [{\"__index__\": 0, \"filter\": {\"filter_type\": \"regex\", \"__current_case__\": 8, \"regex_pattern\": \"^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$\", \"regex_action\": \"include_match\"}}, {\"__index__\": 1, \"filter\": {\"filter_type\": \"replace\", \"__current_case__\": 11, \"column\": \"c1\", \"regex_pattern\": \"^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$\", \"regex_replace\": \"HLA-\\\\1\", \"add\": null}}]}, \"tbl_opts\": {\"table_name\": \"optitype\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}, {\"__index__\": 1, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": [{\"__index__\": 0, \"filter\": {\"filter_type\": \"skip\", \"__current_case__\": 0, \"skip_lines\": \"1\"}}, {\"__index__\": 1, \"filter\": {\"filter_type\": \"select_columns\", \"__current_case__\": 9, \"columns\": \"2,4\"}}, {\"__index__\": 2, \"filter\": {\"filter_type\": \"replace\", \"__current_case__\": 11, \"column\": \"1\", \"regex_pattern\": \"^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$\", \"regex_replace\": \"HLA-\\\\1\", \"add\": null}}, {\"__index__\": 3, \"filter\": {\"filter_type\": \"replace\", \"__current_case__\": 11, \"column\": \"2\", \"regex_pattern\": \"^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$\", \"regex_replace\": \"HLA-\\\\1\", \"add\": null}}]}, \"tbl_opts\": {\"table_name\": \"seq2hla\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "c6860475-0a44-4212-a7ca-2db2c8696b93", "when": null, "workflow_outputs": [{"label": "IEDB_Optitype_alleles", "output_name": "output1", "uuid": "865da3d9-67ed-44ef-b019-f489ad834188"}, {"label": "IEDB-Optitype-seq2HLA-alleles", "output_name": "output", "uuid": "e7f83293-69fa-44c9-9627-904c2b0dde97"}, {"label": "IEDB_seq2HLA_alleles", "output_name": "output2", "uuid": "cc73bee6-39bd-44d3-915c-ef38d15fe3af"}]}}, "tags": ["HLA", "name:neoantigen"], "uuid": "ab6a2e5a-964a-41a1-bb3e-7c84e0b5c17c", "version": 0} \ No newline at end of file diff --git a/topics/proteomics/tutorials/neoantigen-variant-annotation/tutorial.md b/topics/proteomics/tutorials/neoantigen-variant-annotation/tutorial.md index edbe00c44ab0bb..07729423b68b03 100644 --- a/topics/proteomics/tutorials/neoantigen-variant-annotation/tutorial.md +++ b/topics/proteomics/tutorials/neoantigen-variant-annotation/tutorial.md @@ -142,9 +142,10 @@ This step extracts information about novel peptides from FragPipe, which primari ## Annotation and Filtering -Because the annotation structures differ between **assembly-derived peptides** and **SAV (single-amino-acid–variant) peptides**, we apply separate text-formatting procedures for each. We first format the assembly-derived peptide annotations, followed by the SAV annotations; the two are then merged to generate unified peptide annotations for downstream processing with **PepPointer**. +Because the annotation structures differ between **assembly-derived peptides** and **SAV (single-amino-acid–variant) peptides**, we apply separate text-formatting procedures for each. We first format the assembly-derived peptide annotations, followed by the SAV annotations, and will be processed separately with **PepPointer**. ### Extracting assembly-derived peptides + The **Select lines that match an expression** tool is used to extract only the rows corresponding to StringTie-assembled transcripts from a mixed text file. StringTie typically labels transcript or gene identifiers with the prefix `STRG`. By filtering for lines containing `STRG`, we keep only the relevant transcript entries and discard header or unrelated lines. This cleaned file is then used for downstream integration with genomic and variant annotations. > Select StringTie transcript lines @@ -168,14 +169,14 @@ The **Select lines that match an expression** tool is used to extract only the r > > > > > -> > 1. The `STRG` prefix identifies StringTie-assembled genes/transcripts. Filtering for `STRG` ensures that only these biologically relevant entries are carried forward for coordinate merging and annotation. +> > 1. The `STRG` prefix identifies StringTie-assembled genes/transcripts. Filtering for `STRG` ensures that only these biologically relevant entries are carried forward for coordinate annotation. > > 2. Header lines, comments, or any non-StringTie records that do not contain `STRG` are removed. This avoids clutter and prevents unrelated entries from interfering with downstream joins and comparisons. > > > {: .solution} > {: .question} -### Converting delimiters +### Converting delimiters - Assembly-derived peptides In this step, we will use the Convert tool to modify characters in a dataset. Specifically, the tool will be used to convert all instances of pipe characters (|) to another format. The tool helps clean and standardize the data for subsequent processing or analysis. This is often a necessary step when preparing data for tools that require specific formats or when performing tasks such as parsing or file importation. @@ -220,7 +221,7 @@ In this step, we will use the Column Regex Find And Replace tool to find and rep > > 1. {% tool [Column Regex Find And Replace](toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3) %} with the following parameters: > - {% icon param-file %} *"Select cells from"*: `out_file1` (output of **Convert** {% icon tool %}) -> - *"using column"*: `c3` +> - *"using column"*: `3` > - In *"Check"*: > - {% icon param-repeat %} *"Insert Check"* > - *"Find Regex"*: `u_` @@ -242,9 +243,9 @@ In this step, we will use the Column Regex Find And Replace tool to find and rep {: .hands_on} +### Extracting bed file information using Query Tabular - Assembly-derived peptides +In this step, we will use the Query Tabular tool to extract specific information from a dataset, such as a BED file containing genomic regions, and match it with novel peptides. This allows for identifying the relevant genomic and peptide information by querying data from two sources and combining them through an SQL query. By using an INNER JOIN operation, we can merge data from two tables based on shared columns, and retrieve the necessary information. This query extracts specific columns from both the BED file (such as genomic coordinates) and the novel peptide dataset (such as peptide sequences or identifiers), enabling the identification of peptides that correspond to specific genomic regions. These are the columns that will be extracted: -### Extracting bed file information Query Tabular -In this step, we will use the Query Tabular tool to extract specific information from a dataset, such as a BED file containing genomic regions, and match it with novel peptides. This allows for identifying the relevant genomic and peptide information by querying data from two sources and combining them through an SQL query. By using an INNER JOIN operation, we can merge data from two tables based on shared columns, and retrieve the necessary information. This query extracts specific columns from both the BED file (such as genomic coordinates) and the novel peptide dataset (such as peptide sequences or identifiers), enabling the identification of peptides that correspond to specific genomic regions. These are the columns that will be extracted - - Chrom: Chromosome name (e.g., chr1). - Start: Starting position of the feature (zero-based index). - End: Ending position of the feature (one-based index). @@ -253,7 +254,7 @@ In this step, we will use the Query Tabular tool to extract specific information - Strand: Strand orientation (+ or -). - ThickStart and ThickEnd: Define the start and end of the transcribed or relevant part of the feature, often the coding region. -> Query Tabular +> Extract BED file for Assembly-derived peptides > > 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: > - In *"Database Table"*: @@ -275,7 +276,191 @@ In this step, we will use the Query Tabular tool to extract specific information {: .hands_on} -### Extracting SAV-derived peptides +### Convert proteomic coordinates to genomic coordinates - Assembly-derived peptides +To convert proteomic coordinates to genomic coordinates, it is essential to account for the relationship between the protein sequence and its corresponding gene or genomic region. In this workflow, the proteomic coordinates have already been extracted at the amino acid level. Since each amino acid in the protein sequence corresponds to a triplet of nucleotides (a codon) in the mRNA, we need to multiply the proteomic coordinate by 3 to obtain the genomic coordinate. This conversion will give us the position of each amino acid within the genomic sequence. The resulting genomic coordinates are stored in a separate column for easy reference. Once this step is completed, we can extract and organize the information in the correct order for further analysis or mapping to the genomic reference. + + +> Text manipulation for Assembly-derived peptides +> +> 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - In *"Database Table"*: +> - {% icon param-repeat %} *"Insert Database Table"* +> - {% icon param-file %} *"Tabular Dataset for Table"*: `assembly_formatted_table` (output of **Query Tabular - Assembly** {% icon tool %}) +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT t1.*, +> (t1.c4 - 1) * 3 AS c2_multiplied, +> t1.c5 * 3 AS c3_multiplied +> FROM t1 +> ``` +> - *"include query result column headers"*: `No` +> +> +{: .hands_on} + + +### Annotating the genomic coordinates - Assembly-derived peptides + +The Query Tabular step in this workflow is used to extract and calculate genomic coordinates based on the proteomic data. The SQL query within the tool defines two calculations for genomic coordinates, start and stop, based on the strand information of the data. For each row in the input dataset (t1), if the strand (t1.c7) is "-" (negative), the genomic coordinates are calculated by subtracting the position from the given end (t1.c3 - t1.c9 for start, and t1.c3 - t1.c10 for stop). If the strand is "+" (positive), the genomic coordinates are calculated by adding the respective positions (t1.c2 + t1.c9 for start, and t1.c2 + t1.c10 for stop). These calculated coordinates are then returned in the query results, where they will be included as new columns (start and stop). This step is essential for transforming the proteomic information into genomic positions for further analysis. + +> Table processing for Assembly-derived peptides +> +> 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - In *"Database Table"*: +> - {% icon param-repeat %} *"Insert Database Table"* +> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Text manipulation - Assembly** {% icon tool %}) +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT t1.*, +> CASE +> WHEN t1.c7 = '-' THEN t1.c3 - t1.c9 +> WHEN t1.c7 = '+' THEN t1.c2 + t1.c9 +> END AS start, +> CASE +> WHEN t1.c7 = '-' THEN t1.c3 - t1.c10 +> WHEN t1.c7 = '+' THEN t1.c2 + t1.c10 +> END AS stop FROM t1 +> ``` +> - *"include query result column headers"*: `No` +> +> +{: .hands_on} + + +> +> +> 1. Why do I need to differentiate between the positive and negative strands when calculating genomic coordinates? +> 2. What is the significance of the t1.c9 and t1.c10 columns in the SQL query? +> +> > +> > +> > 1. The positive and negative strands represent the two directions in which DNA is read, and they affect how genomic coordinates are calculated. For the negative strand, the coordinates are subtracted, whereas for the positive strand, they are added. This ensures the correct mapping of the protein sequence to the genome. +> > 2. The `t1.c9` and `t1.c10` columns represent specific offsets or lengths within the dataset, which are used to adjust the calculated genomic start and stop coordinates. These offsets could correspond to peptide lengths or specific features of the protein that need to be accounted for in the conversion to genomic coordinates. +> > +> {: .solution} +> +{: .question} + + +### Generating BED file for PepPointer - Assembly-derived peptides +This step is necessary to extract and reorganize relevant genomic information from the dataset. By querying specific columns such as chromosome (chromosome), start (chromStart), end (chromEnd), and strand (strand), we are preparing the data for further analysis. These values are essential for mapping proteomic or peptide data to the genomic coordinates, ensuring accurate alignment and interpretation of the sequence in the context of its genomic location. Additionally, renaming columns enhances clarity and standardizes the format, making it easier to work with the data in subsequent steps. + +> BED file for Assembly-derived peptides +> +> 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - In *"Database Table"*: +> - {% icon param-repeat %} *"Insert Database Table"* +> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Table processing - Assembly** {% icon tool %}) +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT +> c8 AS `chromosome`, +> c11 AS `chromStart`, +> c12 AS `chromEnd`, +> c1 AS `name`, +> c6 AS `score`, +> c7 AS `strand` +> FROM t1 +> ``` +> - *"include query result column headers"*: `No` +> +> 2. Change data type to ".bed". +> +{: .hands_on} + + +> +> +> 1. What is the purpose of renaming columns like chromosome, chromStart, and chromEnd in the SQL query? +> 2. Why is the strand column important in this query, and how does it affect genomic coordinate interpretation? +> +> > +> > +> > 1. Renaming the columns helps in providing more meaningful names for the data, making it easier to understand the genomic features such as the chromosome and start/end coordinates. These renamed columns align with common genomic nomenclature, improving readability and analysis. +> > 2. The strand column indicates whether the sequence is on the positive or negative strand of the DNA. This is important for correctly interpreting the orientation of the sequence and calculating its genomic coordinates. A positive strand means the coordinates will be calculated from the start, while a negative strand requires reverse calculations to adjust the coordinates properly. +> > +> {: .solution} +> +{: .question} + + + +## Mapping Assembly-derived Peptide sequences with PepPointer + +PepPointer is a tool designed to map peptide sequences to their respective genomic locations using data such as GTF and BED files. In this workflow, PepPointer takes the GTF file, which contains gene annotations and genomic coordinates, and combines it with a BED file, which provides chromosomal coordinates of the peptides. By doing this, PepPointer can identify the exact genomic locations of the peptides, linking them to genes and exons. This step is crucial for accurately correlating proteomic data to genomic sequences, enabling a better understanding of the genomic context in which the peptides are found. + +> PepPointer for Assembly-derived peptides +> +> 1. {% tool [PepPointer](toolshed.g2.bx.psu.edu/repos/galaxyp/pep_pointer/pep_pointer/0.1.3+galaxy1) %} with the following parameters: +> - *"Choose the source of the GTF file"*: `From history` +> - {% icon param-file %} *"GTF file with the genome of interest"*: `Homo_sapiens.GRCh38_canon.106.gtf` (Input dataset) +> - {% icon param-file %} *"BED file with chromosomal coordinates of peptide"*: `output` (from **Query Tabular - BED file for Assembly**) +> +> +{: .hands_on} + + +> +> +> 1. How does the GTF file help in identifying the genomic location of peptides? +> 2. What happens if the coordinates in the BED file don't align with the genomic features in the GTF file? +> +> > +> > +> > 1. The GTF file contains detailed annotations of the genome, including information about genes, exons, and other genomic features. By providing the chromosomal coordinates of these features, the GTF file allows PepPointer to match peptide sequences to the corresponding genes and exons. This ensures that the peptides are mapped accurately within the genome, linking them to the correct genomic context. +> > 2. If the coordinates in the BED file don't align with the genomic features in the GTF file, PepPointer may not be able to accurately map the peptides to their corresponding genes or exons. This misalignment could result in incorrect annotations or a failure to identify the proper genomic location of the peptides. It is important to ensure that both files are from the same genome assembly to avoid such discrepancies. +> > +> {: .solution} +> +{: .question} + + +## Visualization and Interpretation of Novel Assembly-derived peptides with annotation + +In this step, we are using Query Tabular to extract and format relevant information from the results produced by PepPointer. The SQL query in this tool is designed to structure the data into a more readable format, providing key details such as the peptide ID, chromosome location, start and end positions, strand orientation, and any annotations. Additionally, it generates genome coordinates in a format suitable for viewing in genome browsers like IGV and UCSC Genome Browser. This formatting step helps to visualize and interpret the data more effectively by linking the peptides to their genomic context. + +> Query Tabular +> +> 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: +> - In *"Database Table"*: +> - {% icon param-repeat %} *"Insert Database Table"* +> - {% icon param-file %} *"Tabular Dataset for Table"*: `classified` (output of **PepPointer - Assembly** {% icon tool %}) +> - *"SQL Query to generate tabular output"*: +> ``` +> SELECT +> c4 AS Peptide, +> c1 AS Chromosome, +> c2 AS Start, +> c3 AS End, +> c6 AS Strand, +> c7 AS Annotation, +> c1||':'||c2||'-'||c3 AS IGV_Genome_Coordinate, +> 'https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38&position='||c1||'%3A'||c2||'-'||c3 AS UCSC_Genome_Browser +> FROM t1 +> ``` +> - *"include query result column headers"*: `Yes` +> +> +{: .hands_on} + + +> +> +> 1. Why is it important to include both the "IGV_Genome_Coordinate" and "UCSC_Genome_Browser" columns? +> 2. What is the significance of the "Annotation" column, and what type of data might it contain? +> +> > +> > +> > 1. The IGV_Genome_Coordinate column provides a formatted coordinate that is directly usable in genome viewers like IGV, helping users visualize the peptide's genomic location. The UCSC_Genome_Browser column provides a clickable URL to launch UCSC Genome Browser with the coordinates, offering another convenient method for visualizing the peptide's location in a widely used genome browser. Including both makes it easier for users to explore the data in different genomic tools. +> > 2. The Annotation column provides additional context about the genomic feature associated with the peptide, such as whether the peptide is part of a gene, exon, or other regulatory element. This column can help to interpret the biological relevance of the peptide in the genomic region and is important for understanding the functional role of the peptide within the genome. The data might include annotations like "gene", "exon", "intron", or other functional genomic elements. +> > +> {: .solution} +> +{: .question} + + +## Extracting SAV-derived peptides + The **Select lines that match an expression** tool is used to extract only the rows corresponding to SAV-assembled transcripts from a mixed text file. > Select SAV transcript lines @@ -292,6 +477,7 @@ The **Select lines that match an expression** tool is used to extract only the r > {: .hands_on} + ### Converting delimiters In this step, we will use the Convert tool to modify characters in a dataset. Specifically, the tool will be used to convert all instances of pipe characters (|) to another format. The tool helps clean and standardize the data for subsequent processing or analysis. This is often a necessary step when preparing data for tools that require specific formats or when performing tasks such as parsing or file importation. @@ -305,9 +491,9 @@ In this step, we will use the Convert tool to modify characters in a dataset. Sp > {: .hands_on} -## Extracting Relevant Columns for SAV Peptides +### Extracting Relevant Columns for SAV Peptides -The **Cut Columns** tool is used to extract only the fields required for downstream annotation of SAV-derived peptides. Since the structure of SAV output differs from assembly-derived peptide annotations, this step isolates the key identifiers and coordinate fields that will later be merged with assembly annotations during unified peptide mapping. By selecting only the columns necessary for variant-driven peptide interpretation, we ensure a clean and consistent table for integration with tools such as PeptidePointer. +The **Cut Columns** tool is used to extract only the fields required for downstream annotation of SAV-derived peptides. Since the structure of SAV output differs from assembly-derived peptide annotations, this step isolates the key identifiers and coordinate fields that will later be used for peptide mapping. By selecting only the columns necessary for variant-driven peptide interpretation, we ensure a clean and consistent table for integration with tools such as PeptidePointer. > Extracting Columns for SAV > @@ -319,7 +505,7 @@ The **Cut Columns** tool is used to extract only the fields required for downstr > - *“Delimited by”*: `Tab` > - {% icon param-file %} *“From”*: (output of **Convert (SAV)** {% icon tool %}) > -> 2. Execute the tool to produce a reduced SAV-specific annotation table containing only the essential fields for downstream merging. +> 2. Execute the tool to produce a reduced SAV-specific annotation table containing only the essential fields for downstream processing. > {: .hands_on} @@ -330,17 +516,17 @@ The **Cut Columns** tool is used to extract only the fields required for downstr > > > > > -> > 1. Extracting only the required fields reduces unnecessary complexity, removes irrelevant columns, and ensures consistent formatting before merging with assembly-derived peptide information. +> > 1. Extracting only the required fields reduces unnecessary complexity, removes irrelevant columns, and ensures consistent formatting. > > -> > 2. The SAV table will later be merged with assembly annotations to generate a unified peptide annotation file for use in downstream tools such as PeptidePointer, enabling consistent mapping of peptides to genomic features. +> > 2. The SAV table will later be used in downstream tools such as PeptidePointer, enabling consistent mapping of peptides to genomic features. > > > {: .solution} > {: .question} -## Editing SAV Annotation Columns Using Column Regex Find and Replace +### Editing SAV Annotation Columns Using Column Regex Find and Replace -The **Column Regex Find and Replace** tool is used in this step to clean and standardize SAV (Single Amino-acid Variant) annotation fields. SAV entries often begin with prefixes such as `SNV_` or `INDEL_`, which must be reformatted to a consistent delimiter-based structure (`SNV|`, `INDEL|`) to be compatible with downstream peptide-annotation tools such as PepPointer. This formatting step ensures that SAV annotations can later be merged accurately with assembly-derived peptides, despite their inherently different annotation structures. +The **Column Regex Find and Replace** tool is used in this step to clean and standardize SAV (Single Amino-acid Variant) annotation fields. SAV entries often begin with prefixes such as `SAV_` or `INDEL_`, which must be reformatted to a consistent delimiter-based structure (`SAV|`, `INDEL|`) to be compatible with downstream peptide-annotation tools such as PepPointer. > Editing SAV Annotations > @@ -368,6 +554,7 @@ The **Column Regex Find and Replace** tool is used in this step to clean and sta > {: .hands_on} + > Query Tabular > > 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: @@ -398,7 +585,7 @@ The **Column Regex Find and Replace** tool is used in this step to clean and sta > - *“Delimited by”*: `Tab` > - {% icon param-file %} *“From”*: (output of **Query Tabular (SAV)** {% icon tool %}) > -> 2. Execute the tool to produce a reduced SAV-specific annotation table containing only the essential fields for downstream merging. +> 2. Execute the tool to produce a reduced SAV-specific annotation table containing only the essential fields for downstream processing. > {: .hands_on} @@ -421,49 +608,25 @@ The **Column Regex Find and Replace** tool is used in this step to clean and sta > {: .hands_on} -Because the annotation formats differ between **assembly-derived peptides** and **SAV-derived peptides**, we process them separately before merging. We first perform text formatting on the assembly peptide annotations, followed by formatting of the SAV annotations, and finally merge both sets for unified peptide annotation using **PepPointer**. - -## Merging Assembly and SAV Annotation Tables - -The **Concatenate Datasets** tool merges the formatted assembly-derived peptide annotations with the processed SAV-derived annotations into a single unified table. This consolidated dataset allows all peptide sources—assembly, genomic, and SAV—to be passed into **PepPointer** for final peptide-level annotation. Concatenation ensures that variant-driven peptides and assembly-derived peptides are integrated into one harmonized file before downstream processing. - -> Concatenate Assembly and SAV tables -> -> 1. {% tool [Concatenate datasets](toolshed.g2.bx.psu.edu/repos/iuc/concatenate/concatenate/1.0.0) %} with: -> - {% icon param-file %} *“Concatenate Dataset (input1)”*: `assembly_formatted_table` (output of **Query Tabular (assembly)** {% icon tool %}) -> - *“Select (input2)”*: `sav_formatted_table` (output of **Column Regex Find And Replace (SAV)** {% icon tool %}) -> -> -{: .hands_on} - -> -> -> 1. Why do we merge assembly and SAV annotations before running PepPointer? -> 2. What considerations should be taken into account when concatenating tables? -> -> > -> > -> > 1. PepPointer requires all peptide sources to be provided together so it can assign unified peptide-level annotations, including genomic, variant, and transcript context. -> > 2. Column order and formatting must match between tables to ensure correct alignment after concatenation. -> > -> {: .solution} -> -{: .question} +Because the annotation formats differ between **assembly-derived peptides** and **SAV-derived peptides**, we must process them separately. We first perform text formatting on the assembly peptide annotations, followed by formatting of the SAV annotations, and finally process both sets for separate peptide annotation using **PepPointer**. -### Performing calculations to convert proteomic coordinates to genomic coordinates. +### Convert proteomic coordinates to genomic coordinates - SAV-derived peptides To convert proteomic coordinates to genomic coordinates, it is essential to account for the relationship between the protein sequence and its corresponding gene or genomic region. In this workflow, the proteomic coordinates have already been extracted at the amino acid level. Since each amino acid in the protein sequence corresponds to a triplet of nucleotides (a codon) in the mRNA, we need to multiply the proteomic coordinate by 3 to obtain the genomic coordinate. This conversion will give us the position of each amino acid within the genomic sequence. The resulting genomic coordinates are stored in a separate column for easy reference. Once this step is completed, we can extract and organize the information in the correct order for further analysis or mapping to the genomic reference. -> Query Tabular +> Text manipulation - SAV-derived peptides > > 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: > - In *"Database Table"*: > - {% icon param-repeat %} *"Insert Database Table"* -> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Concatenate datasets** {% icon tool %}) +> - {% icon param-file %} *"Tabular Dataset for Table"*: `sav_formatted_table` (output of **Column Regex Find And Replace - SAV** {% icon tool %}) > - *"SQL Query to generate tabular output"*: > ``` -> SELECT t1.*, (t1.c4 - 1) * 3 AS c2_multiplied, t1.c5 * 3 AS c3_multiplied FROM t1 +> SELECT t1.*, +> (t1.c4 - 1) * 3 AS c2_multiplied, +> t1.c5 * 3 AS c3_multiplied +> FROM t1 > ``` > - *"include query result column headers"*: `No` > @@ -471,16 +634,16 @@ To convert proteomic coordinates to genomic coordinates, it is essential to acco {: .hands_on} -### Annotating the genomic coordinates +### Annotating the genomic coordinates - SAV-derived peptides The Query Tabular step in this workflow is used to extract and calculate genomic coordinates based on the proteomic data. The SQL query within the tool defines two calculations for genomic coordinates, start and stop, based on the strand information of the data. For each row in the input dataset (t1), if the strand (t1.c7) is "-" (negative), the genomic coordinates are calculated by subtracting the position from the given end (t1.c3 - t1.c9 for start, and t1.c3 - t1.c10 for stop). If the strand is "+" (positive), the genomic coordinates are calculated by adding the respective positions (t1.c2 + t1.c9 for start, and t1.c2 + t1.c10 for stop). These calculated coordinates are then returned in the query results, where they will be included as new columns (start and stop). This step is essential for transforming the proteomic information into genomic positions for further analysis. -> Query Tabular +> Text processing - SAV > > 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: > - In *"Database Table"*: > - {% icon param-repeat %} *"Insert Database Table"* -> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Query Tabular** {% icon tool %}) +> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Text manipulation - SAV** {% icon tool %}) > - *"SQL Query to generate tabular output"*: > ``` > SELECT t1.*, @@ -499,29 +662,16 @@ The Query Tabular step in this workflow is used to extract and calculate genomic {: .hands_on} -> -> -> 1. Why do I need to differentiate between the positive and negative strands when calculating genomic coordinates? -> 2. What is the significance of the t1.c9 and t1.c10 columns in the SQL query? -> -> > -> > -> > 1. The positive and negative strands represent the two directions in which DNA is read, and they affect how genomic coordinates are calculated. For the negative strand, the coordinates are subtracted, whereas for the positive strand, they are added. This ensures the correct mapping of the protein sequence to the genome. -> > 2. The `t1.c9` and `t1.c10` columns represent specific offsets or lengths within the dataset, which are used to adjust the calculated genomic start and stop coordinates. These offsets could correspond to peptide lengths or specific features of the protein that need to be accounted for in the conversion to genomic coordinates. -> > -> {: .solution} -> -{: .question} -### Generating BED file for PepPointer +### Generating BED file for PepPointer - SAV-derived peptides This step is necessary to extract and reorganize relevant genomic information from the dataset. By querying specific columns such as chromosome (chromosome), start (chromStart), end (chromEnd), and strand (strand), we are preparing the data for further analysis. These values are essential for mapping proteomic or peptide data to the genomic coordinates, ensuring accurate alignment and interpretation of the sequence in the context of its genomic location. Additionally, renaming columns enhances clarity and standardizes the format, making it easier to work with the data in subsequent steps. -> Query Tabular +> BED file for SAV-derived peptides > > 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: > - In *"Database Table"*: > - {% icon param-repeat %} *"Insert Database Table"* -> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Query Tabular** {% icon tool %}) +> - {% icon param-file %} *"Tabular Dataset for Table"*: `output` (output of **Text processing - SAV** {% icon tool %}) > - *"SQL Query to generate tabular output"*: > ``` > SELECT @@ -540,50 +690,22 @@ This step is necessary to extract and reorganize relevant genomic information fr {: .hands_on} -> -> -> 1. What is the purpose of renaming columns like chromosome, chromStart, and chromEnd in the SQL query? -> 2. Why is the strand column important in this query, and how does it affect genomic coordinate interpretation? -> -> > -> > -> > 1. Renaming the columns helps in providing more meaningful names for the data, making it easier to understand the genomic features such as the chromosome and start/end coordinates. These renamed columns align with common genomic nomenclature, improving readability and analysis. -> > 2. The strand column indicates whether the sequence is on the positive or negative strand of the DNA. This is important for correctly interpreting the orientation of the sequence and calculating its genomic coordinates. A positive strand means the coordinates will be calculated from the start, while a negative strand requires reverse calculations to adjust the coordinates properly. -> > -> {: .solution} -> -{: .question} - -## Mapping Peptide sequences with PepPointer +## Mapping SAV-derived Peptide sequences with PepPointer PepPointer is a tool designed to map peptide sequences to their respective genomic locations using data such as GTF and BED files. In this workflow, PepPointer takes the GTF file, which contains gene annotations and genomic coordinates, and combines it with a BED file, which provides chromosomal coordinates of the peptides. By doing this, PepPointer can identify the exact genomic locations of the peptides, linking them to genes and exons. This step is crucial for accurately correlating proteomic data to genomic sequences, enabling a better understanding of the genomic context in which the peptides are found. -> PepPointer +> PepPointer for SAV-derived peptides > > 1. {% tool [PepPointer](toolshed.g2.bx.psu.edu/repos/galaxyp/pep_pointer/pep_pointer/0.1.3+galaxy1) %} with the following parameters: > - *"Choose the source of the GTF file"*: `From history` > - {% icon param-file %} *"GTF file with the genome of interest"*: `Homo_sapiens.GRCh38_canon.106.gtf` (Input dataset) -> - {% icon param-file %} *"BED file with chromosomal coordinates of peptide"*: `output` (output of **Query Tabular** {% icon tool %}) +> - {% icon param-file %} *"BED file with chromosomal coordinates of peptide"*: `output` (from **Query Tabular - BED file for Assembly**) > > {: .hands_on} -> -> -> 1. How does the GTF file help in identifying the genomic location of peptides? -> 2. What happens if the coordinates in the BED file don't align with the genomic features in the GTF file? -> -> > -> > -> > 1. The GTF file contains detailed annotations of the genome, including information about genes, exons, and other genomic features. By providing the chromosomal coordinates of these features, the GTF file allows PepPointer to match peptide sequences to the corresponding genes and exons. This ensures that the peptides are mapped accurately within the genome, linking them to the correct genomic context. -> > 2. If the coordinates in the BED file don't align with the genomic features in the GTF file, PepPointer may not be able to accurately map the peptides to their corresponding genes or exons. This misalignment could result in incorrect annotations or a failure to identify the proper genomic location of the peptides. It is important to ensure that both files are from the same genome assembly to avoid such discrepancies. -> > -> {: .solution} -> -{: .question} - -## Visualization and Interpretation +## Visualization and Interpretation of Novel SAV-derived peptides with annotation In this step, we are using Query Tabular to extract and format relevant information from the results produced by PepPointer. The SQL query in this tool is designed to structure the data into a more readable format, providing key details such as the peptide ID, chromosome location, start and end positions, strand orientation, and any annotations. Additionally, it generates genome coordinates in a format suitable for viewing in genome browsers like IGV and UCSC Genome Browser. This formatting step helps to visualize and interpret the data more effectively by linking the peptides to their genomic context. @@ -592,7 +714,7 @@ In this step, we are using Query Tabular to extract and format relevant informat > 1. {% tool [Query Tabular](toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2) %} with the following parameters: > - In *"Database Table"*: > - {% icon param-repeat %} *"Insert Database Table"* -> - {% icon param-file %} *"Tabular Dataset for Table"*: `classified` (output of **PepPointer** {% icon tool %}) +> - {% icon param-file %} *"Tabular Dataset for Table"*: `classified` (output of **PepPointer - SAV** {% icon tool %}) > - *"SQL Query to generate tabular output"*: > ``` > SELECT @@ -612,21 +734,6 @@ In this step, we are using Query Tabular to extract and format relevant informat {: .hands_on} -> -> -> 1. Why is it important to include both the "IGV_Genome_Coordinate" and "UCSC_Genome_Browser" columns? -> 2. What is the significance of the "Annotation" column, and what type of data might it contain? -> -> > -> > -> > 1. The IGV_Genome_Coordinate column provides a formatted coordinate that is directly usable in genome viewers like IGV, helping users visualize the peptide's genomic location. The UCSC_Genome_Browser column provides a clickable URL to launch UCSC Genome Browser with the coordinates, offering another convenient method for visualizing the peptide's location in a widely used genome browser. Including both makes it easier for users to explore the data in different genomic tools. -> > 2. The Annotation column provides additional context about the genomic feature associated with the peptide, such as whether the peptide is part of a gene, exon, or other regulatory element. This column can help to interpret the biological relevance of the peptide in the genomic region and is important for understanding the functional role of the peptide within the genome. The data might include annotations like "gene", "exon", "intron", or other functional genomic elements. -> > -> {: .solution} -> -{: .question} - - # B: Database for IEDB ![Variant-annotation-overview-workflow]({% link topics/proteomics/images/neoantigen/PepPointer_Characterization_3.PNG %}) @@ -637,8 +744,8 @@ This output is an input for the next workflow (HLA Binding Novel Peptides). > > 1. {% tool [Tabular-to-FASTA](toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1) %} with the following parameters: > - {% icon param-file %} *"Tab-delimited file"*: `output` (output of **Query Tabular** {% icon tool %}) -> - *"Title column(s)"*: `c2` -> - *"Sequence column"*: `c1` +> - *"Title column(s)"*: `2` +> - *"Sequence column"*: `1` > > {: .hands_on} @@ -663,8 +770,7 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > Running the Workflow > > 1. **Import the workflow** into Galaxy: -> -> {% snippet faqs/galaxy/workflows_run_trs.md path="topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/main_workflow.ga" title="peppointer Annotation" %} +> - (Neoantigen Variant Annotation)[https://tinyurl.com/ipepgen-pep-annot-wf] > > > 2. Run **Workflow** {% icon workflow %} using the following parameters: @@ -677,6 +783,14 @@ To rerun this entire analysis at once, you can use our workflow. Below we show h > > {% snippet faqs/galaxy/workflows_run.md %} > +> DISCLAIMER +> +> - If any step in this workflow fails, please ensure that the input files have been correctly generated and formatted by the preceding tools. Workflow failures often result from improperly called or incomplete input data rather than errors in the workflow itself. Users are responsible for verifying their input before troubleshooting workflow issues. +> +> {: .comment} +> +> +> {: .hands_on} # Are you feeling adventurous? ✨ diff --git a/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/index.md b/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/index.md index 9f05c43c917f56..cc90adaf377b2b 100644 --- a/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/index.md +++ b/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/index.md @@ -3,5 +3,6 @@ layout: workflow-list redirect_from: - /topics/proteomics/tutorials/neoantigen-5-variant-annotation/workflows/ + - /topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/main_workflow.html --- diff --git a/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/main_workflow.ga b/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/main_workflow.ga deleted file mode 100644 index 03f11b1576f3f2..00000000000000 --- a/topics/proteomics/tutorials/neoantigen-variant-annotation/workflows/main_workflow.ga +++ /dev/null @@ -1 +0,0 @@ -{"a_galaxy_workflow": "true", "annotation": "Annotating the novel peptides", "comments": [], "creator": [{"class": "Organization", "name": "GalaxyP"}], "format-version": "0.1", "license": "GPL-3.0-or-later", "name": "GigaScience_Peptide_Annotation_demonstration_STS26T_neoantigen_candidates_workflow", "report": {"markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"}, "steps": {"0": {"annotation": "Candidate Neoantigens from Fragpipe Workflow", "content_id": null, "errors": null, "id": 0, "input_connections": {}, "inputs": [{"description": "Candidate Neoantigens from Fragpipe Workflow", "name": "Novel_Peptides"}], "label": "Novel_Peptides", "name": "Input dataset", "outputs": [], "position": {"left": 0, "top": 167.33203125}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "eb9a458f-4900-4554-98c5-985005dc219c", "when": null, "workflow_outputs": []}, "1": {"annotation": "Peptide report from Fragpipe workflow", "content_id": null, "errors": null, "id": 1, "input_connections": {}, "inputs": [{"description": "Peptide report from Fragpipe workflow", "name": "Fragpipe-Peptide-Report"}], "label": "Fragpipe-Peptide-Report", "name": "Input dataset", "outputs": [], "position": {"left": 10.46875, "top": 342.16796875}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "2d2a7ba3-61e5-4ecd-a652-f76e061910a2", "when": null, "workflow_outputs": []}, "2": {"annotation": "Annotated GffCompared GTF to BED", "content_id": null, "errors": null, "id": 2, "input_connections": {}, "inputs": [{"description": "Annotated GffCompared GTF to BED", "name": "Annotated-GffCompared-GTFtoBED"}], "label": "Annotated-GffCompared-GTFtoBED", "name": "Input dataset", "outputs": [], "position": {"left": 627.3046875, "top": 544.73046875}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "5f3169df-7268-4c4a-a71d-a4bf015a24f4", "when": null, "workflow_outputs": []}, "3": {"annotation": "Homo_sapiens.GRCh38_canon.106.gtf", "content_id": null, "errors": null, "id": 3, "input_connections": {}, "inputs": [{"description": "Homo_sapiens.GRCh38_canon.106.gtf", "name": "Homo_sapiens.GRCh38_canon.106.gtf"}], "label": "Homo_sapiens.GRCh38_canon.106.gtf", "name": "Input dataset", "outputs": [], "position": {"left": 1476.15625, "top": 144.72265625}, "tool_id": null, "tool_state": "{\"optional\": false, \"tag\": null}", "tool_version": null, "type": "data_input", "uuid": "01a15bcb-6098-4db9-b35c-2d2a28cd0160", "when": null, "workflow_outputs": []}, "4": {"annotation": "Extracting the information from Fragpipe for the Novel peptides ", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 4, "input_connections": {"tables_0|table": {"id": 0, "output_name": "output"}, "tables_1|table": {"id": 1, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Peptide_to_Protein_Annotation", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 300.5078125, "top": 207.3828125}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "Peptide_to_Protein_Annotation"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT t1.c1,t2.c13,t2.c5,t2.c6\\nFROM t1\\nINNER JOIN t2\\nON t1.c1 = t2.c1\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}, {\"__index__\": 1, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "a3bafb76-ae01-4f66-b127-c9a5f69354f0", "when": null, "workflow_outputs": [{"label": "Peptide_to_Protein_Annotation", "output_name": "output", "uuid": "4631011c-b1d4-4964-96d6-5a11201afb75"}]}, "5": {"annotation": "FASTA file for IEDB", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "errors": null, "id": 5, "input_connections": {"input": {"id": 4, "output_name": "output"}}, "inputs": [], "label": "FASTA-IEDB", "name": "Tabular-to-FASTA", "outputs": [{"name": "output", "type": "fasta"}], "position": {"left": 592.0859375, "top": 0}, "post_job_actions": {"RenameDatasetActionoutput": {"action_arguments": {"newname": "FASTA-IEDB"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", "tool_shed_repository": {"changeset_revision": "0a7799698fe5", "name": "tabular_to_fasta", "owner": "devteam", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"1\", \"title_col\": [\"2\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.1", "type": "tool", "uuid": "84c79f65-b078-419f-b226-669c942e2625", "when": null, "workflow_outputs": [{"label": "FASTA-IEDB", "output_name": "output", "uuid": "3b728452-3d59-4e5f-a9dc-8764ceb60094"}]}, "6": {"annotation": "converting pipes to columns", "content_id": "Convert characters1", "errors": null, "id": 6, "input_connections": {"input": {"id": 4, "output_name": "output"}}, "inputs": [], "label": "Converting_pipes_to_columns", "name": "Convert", "outputs": [{"name": "out_file1", "type": "tabular"}], "position": {"left": 607.6953125, "top": 163.65234375}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Table-processing-1"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Convert characters1", "tool_state": "{\"condense\": true, \"convert_from\": \"P\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"strip\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "033d58cb-8bb8-4e72-a192-5d08b124e877", "when": null, "workflow_outputs": [{"label": "Table-processing-1", "output_name": "out_file1", "uuid": "58ea288e-1160-4617-8207-453bf29b0d12"}]}, "7": {"annotation": "Modifying-string-name", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "errors": null, "id": 7, "input_connections": {"input": {"id": 6, "output_name": "out_file1"}}, "inputs": [], "label": "Modifying-string-name", "name": "Column Regex Find And Replace", "outputs": [{"name": "out_file1", "type": "input"}], "position": {"left": 853.640625, "top": 140.69921875}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Modified-string-name"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/regex_find_replace/regexColumn1/1.0.3", "tool_shed_repository": {"changeset_revision": "503bcd6ebe4b", "name": "regex_find_replace", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"checks\": [{\"__index__\": 0, \"pattern\": \"u_\", \"replacement\": \"u:\"}], \"field\": \"3\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.3", "type": "tool", "uuid": "cef6a8f7-4b50-4ca1-9986-0a3a4d3827b4", "when": null, "workflow_outputs": [{"label": "Modified-string-name", "output_name": "out_file1", "uuid": "a607fb3f-2e87-410a-9dfb-9286baf667e9"}]}, "8": {"annotation": "converting colons to columns", "content_id": "Convert characters1", "errors": null, "id": 8, "input_connections": {"input": {"id": 7, "output_name": "out_file1"}}, "inputs": [], "label": "Converting_colons_to_columns", "name": "Convert", "outputs": [{"name": "out_file1", "type": "tabular"}], "position": {"left": 637.84375, "top": 351.5859375}, "post_job_actions": {"RenameDatasetActionout_file1": {"action_arguments": {"newname": "Table-processing-2"}, "action_type": "RenameDatasetAction", "output_name": "out_file1"}}, "tool_id": "Convert characters1", "tool_state": "{\"condense\": true, \"convert_from\": \"Co\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"strip\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0.0", "type": "tool", "uuid": "43034770-78f6-46a7-832c-2b638b61b105", "when": null, "workflow_outputs": [{"label": "Table-processing-2", "output_name": "out_file1", "uuid": "e24ff14e-b512-4444-9abb-b3c46d4fb707"}]}, "9": {"annotation": "Extracting-info-from-GFFtobed", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 9, "input_connections": {"tables_0|table": {"id": 8, "output_name": "out_file1"}, "tables_1|table": {"id": 2, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Extracting-info-from-GFFtobed", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 917.42578125, "top": 341.90625}, "post_job_actions": {"ChangeDatatypeActionoutput": {"action_arguments": {"newtype": "bed"}, "action_type": "ChangeDatatypeAction", "output_name": "output"}, "RenameDatasetActionoutput": {"action_arguments": {"newname": "Extracting-info-from-GFFtobed"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT t1.c1,t1.c8,t1.c9,t1.c11,t1.c12,t2.c5,t2.c6,t1.c7\\nFROM t1\\nINNER JOIN t2\\nON t1.c3 = t2.c4\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}, {\"__index__\": 1, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "e23c9d12-fe1f-410c-bd60-5e577dd3b95b", "when": null, "workflow_outputs": [{"label": "Extracting-info-from-GFFtobed", "output_name": "output", "uuid": "e78166a9-a99b-429d-b01e-e5e19084e1c0"}]}, "10": {"annotation": "multiplying start and stop of the peptide location with 3 to calculate genomic coordinates", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 10, "input_connections": {"tables_0|table": {"id": 9, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Table-processing-3", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1229.76953125, "top": 352.421875}, "post_job_actions": {"ChangeDatatypeActionoutput": {"action_arguments": {"newtype": "tabular"}, "action_type": "ChangeDatatypeAction", "output_name": "output"}, "RenameDatasetActionoutput": {"action_arguments": {"newname": "Table-processing-3"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT t1.*, \\n (t1.c4 - 1) * 3 AS c2_multiplied, \\n t1.c5 * 3 AS c3_multiplied\\nFROM t1\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "e204a85f-7e06-4f2d-8995-cd78e8f5fae2", "when": null, "workflow_outputs": [{"label": "Table-processing-3", "output_name": "output", "uuid": "734962f8-efbf-4b30-a00b-0293b098eb68"}]}, "11": {"annotation": "calculation genomic coordinates looking at peptide location", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 11, "input_connections": {"tables_0|table": {"id": 10, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Peptide-location-generation", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1490.5625, "top": 364.171875}, "post_job_actions": {"ChangeDatatypeActionoutput": {"action_arguments": {"newtype": "tabular"}, "action_type": "ChangeDatatypeAction", "output_name": "output"}, "RenameDatasetActionoutput": {"action_arguments": {"newname": "Peptide-location-generation"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT t1.*, \\n CASE \\n WHEN t1.c7 = '-' THEN t1.c3 - t1.c9\\n WHEN t1.c7 = '+' THEN t1.c2 + t1.c9\\n END AS start,\\n CASE \\n WHEN t1.c7 = '-' THEN t1.c3 - t1.c10\\n WHEN t1.c7 = '+' THEN t1.c2 + t1.c10\\n END AS stop\\nFROM t1\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "af2d679e-d179-455f-ad99-ad90b7d93e59", "when": null, "workflow_outputs": [{"label": "Peptide-location-generation", "output_name": "output", "uuid": "0d9f1cbf-fa72-42c1-ab78-4faa5e123bb1"}]}, "12": {"annotation": "Extracting bed file for Peppointer", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 12, "input_connections": {"tables_0|table": {"id": 11, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "BED-File-for-PepPointer", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 1758.3515625, "top": 424.62890625}, "post_job_actions": {"ChangeDatatypeActionoutput": {"action_arguments": {"newtype": "bed"}, "action_type": "ChangeDatatypeAction", "output_name": "output"}, "RenameDatasetActionoutput": {"action_arguments": {"newname": "BED-File-for-PepPointer"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"no\", \"__current_case__\": 1}, \"save_db\": false, \"sqlquery\": \"SELECT \\n c8 AS `chromosome`, \\n c11 AS `chromStart`, \\n c12 AS `chromEnd`, \\n c1 AS `name`, \\n c6 AS `score`, \\n c7 AS `strand` \\nFROM \\n t1\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "ca9b0790-001a-4afb-9485-2e0b066eb886", "when": null, "workflow_outputs": [{"label": "BED-File-for-PepPointer", "output_name": "output", "uuid": "3970ec24-eb99-459f-8bb4-bc3b69faaca9"}]}, "13": {"annotation": "PepPointer to add annotation to the peptides at Genomic level", "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pep_pointer/pep_pointer/0.1.3+galaxy1", "errors": null, "id": 13, "input_connections": {"bed": {"id": 12, "output_name": "output"}, "gtf_input|gtf": {"id": 3, "output_name": "output"}}, "inputs": [{"description": "runtime parameter for tool PepPointer", "name": "gtf_input"}], "label": "PepPointer", "name": "PepPointer", "outputs": [{"name": "classified", "type": "tabular"}], "position": {"left": 1943.69921875, "top": 157.91796875}, "post_job_actions": {"RenameDatasetActionclassified": {"action_arguments": {"newname": "PepPointer-Annotated-Peptides"}, "action_type": "RenameDatasetAction", "output_name": "classified"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/pep_pointer/pep_pointer/0.1.3+galaxy1", "tool_shed_repository": {"changeset_revision": "a6282baa8c6f", "name": "pep_pointer", "owner": "galaxyp", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"bed\": {\"__class__\": \"ConnectedValue\"}, \"gtf_input\": {\"gtf_source\": \"history\", \"__current_case__\": 1, \"gtf\": {\"__class__\": \"ConnectedValue\"}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "0.1.3+galaxy1", "type": "tool", "uuid": "69a0de4a-8da2-41b0-94d4-fc6ac913db3e", "when": null, "workflow_outputs": [{"label": "PepPointer-Annotated-Peptides", "output_name": "classified", "uuid": "ee5ad6db-d9d4-4ae4-9d69-c37ccb3f4475"}]}, "14": {"annotation": "Summary file for Neoantigen", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "errors": null, "id": 14, "input_connections": {"tables_0|table": {"id": 13, "output_name": "classified"}}, "inputs": [{"description": "runtime parameter for tool Query Tabular", "name": "add_to_database"}], "label": "Summary-Neoantigen", "name": "Query Tabular", "outputs": [{"name": "output", "type": "tabular"}], "position": {"left": 2262.7421875, "top": 236.875}, "post_job_actions": {"ChangeDatatypeActionoutput": {"action_arguments": {"newtype": "tabular"}, "action_type": "ChangeDatatypeAction", "output_name": "output"}, "RenameDatasetActionoutput": {"action_arguments": {"newname": "Neoantigen_summary"}, "action_type": "RenameDatasetAction", "output_name": "output"}}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/query_tabular/query_tabular/3.3.2", "tool_shed_repository": {"changeset_revision": "cf4397560712", "name": "query_tabular", "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu"}, "tool_state": "{\"add_to_database\": {\"withdb\": {\"__class__\": \"RuntimeValue\"}}, \"addqueries\": {\"queries\": []}, \"modify_database\": {\"sql_stmts\": []}, \"query_result\": {\"header\": \"yes\", \"__current_case__\": 0, \"header_prefix\": null}, \"save_db\": false, \"sqlquery\": \"SELECT \\nc4 AS Peptide,\\nc1 AS Chromosome,\\nc2 AS Start,\\nc3 AS End,\\nc6 AS Strand,\\nc7 AS Annotation,\\nc1||':'||c2||'-'||c3 AS IGV_Genome_Coordinate,\\n'https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38&position='||c1||'%3A'||c2||'-'||c3 AS UCSC_Genome_Browser\\nFROM t1\", \"tables\": [{\"__index__\": 0, \"table\": {\"__class__\": \"ConnectedValue\"}, \"input_opts\": {\"linefilters\": []}, \"tbl_opts\": {\"table_name\": \"\", \"column_names_from_first_line\": false, \"col_names\": \"\", \"load_named_columns\": false, \"pkey_autoincr\": \"\", \"indexes\": []}}], \"workdb\": \"workdb.sqlite\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "3.3.2", "type": "tool", "uuid": "92131fb4-5500-4f64-96a5-ff8ed83c0065", "when": null, "workflow_outputs": [{"label": "Neoantigen_summary", "output_name": "output", "uuid": "dbcedf04-dffe-49bc-bac5-a3ef2d41fabd"}]}}, "tags": ["name:neoantigen"], "uuid": "13870faf-ea40-4689-b01f-f53b3369055d", "version": 0}