Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tools/sourmash/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
owner: iuc
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/sourmash
homepage_url: https://github.com/sourmash-bio/sourmash
categories:
- Sequence Analysis
- Metagenomics
suite:
name: suite_sourmash
description: "A suite for metagenome analysis and genome comparison using k-mers."
homepage_url: https://github.com/sourmash-bio/sourmash
long_description: |
TODO
20 changes: 20 additions & 0 deletions tools/sourmash/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<macros>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">sourmash</requirement>
</requirements>
</xml>
<token name="@TOOL_VERSION@">4.9.4</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">21.05</token>
<xml name="citations">
<citations>
<citation type="doi">10.21105/joss.06830</citation>
</citations>
<creator>
<organization name="Galaxy Europe"/>
<person givenName="Ahmad" familyName="Mahagna" url="https://github.com/Smkingsize"/>
<person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12"/>
</creator>
</xml>
</macros>
232 changes: 232 additions & 0 deletions tools/sourmash/sourmash_plot.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
<tool id="sourmash_plot" name="sourmash plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
ln -s $input cmp.dist &&
#if $labels_cond.labels_param != "--no-labels --no-indices":
ln -s $labels_cond.labels_file cmp.dist.labels.txt &&
#end if

sourmash plot cmp.dist
$labels_cond.labels_param

### advanced settings
--vmin $advanced_settings.vmin
--vmax $advanced_settings.vmax
--subsample-seed=$advanced_settings.subsample_seed

#if $advanced_settings.subsample:
--subsample=$advanced_settings.subsample
#end if
]]></command>
<inputs>
<param name="input" type="data" format="binary" label="Input file" help="input file generated distance matrix file generated with sourmash compare"/>
<conditional name="labels_cond">
<param name="labels_param" type="select" label="Show label or indices" help="show sample labels on dendrogram/matrix">
<option value="--labels">Labels</option>
<option value="--indices">Indices</option>
<option value="--no-labels --no-indices" selected="true"> None</option>
</param>
<when value="--labels">
<param name="labels_file" type="data" format="txt" label="Lables file" help="labels file generated with sourmash compare"/>
</when>
<when value="--indices">
<param name="labels_file" type="data" format="txt" label="Lables file" help="labels file generated with sourmash compare"/>
</when>
<when value="--no-labels --no-indices"/>
</conditional>
<section name="advanced_settings" title="Advanced Settings" expanded="no">
<param argument="--vmax" type="float" value="1.0" min="0" max="1.0" label="Max Heatmap value" help=""/>
<param argument="--vmin" type="float" value="0.0" min="0" max="1.0" label="Max Heatmap value" help=""/>
<param name="subsample" type="integer" optional="true" min="1" label="Subsample count" help="Randomly downsample the distance matrix to this number of samples before plotting. Use to speed up plotting large matrices. Leave empty to use all samples."/>
<param name="subsample_seed" type="integer" value="1" min="0" label="Random seed" help="Seed for the subsampling process. Use a fixed seed for reproducible plots."/>
</section>
</inputs>
<outputs>
<data name="matrix" format="png" from_work_dir="cmp.dist.matrix.png" label='${tool.name} on ${on_string}: matrix'/>
<data name="histogram" format="png" from_work_dir="cmp.dist.hist.png" label='${tool.name} on ${on_string}: histogram'/>
<data name="dendrogram" format="png" from_work_dir="cmp.dist.dendro.png" label='${tool.name} on ${on_string}: dendrogram'/>
</outputs>
<tests>
<!-- 1) no labels only indices-->
<test expect_num_outputs="3">
<param name="input" value="cmp.dist" />
<output name="matrix" ftype="png">
<assert_contents>
<has_size size="12k" delta="1k"/>
</assert_contents>
</output>
<output name="histogram" ftype="png">
<assert_contents>
<has_size size="9k" delta="1k"/>
</assert_contents>
</output>
<output name="dendrogram" ftype="png">
<assert_contents>
<has_size size="4k" delta="1k"/>
</assert_contents>
</output>
<assert_stdout>
<not_has_text text="GCF_000005845.2_ASM584v2_genomic.fna.gz"/>
</assert_stdout>
</test>
<!-- 2) labels-->
<test expect_num_outputs="3">
<param name="input" value="cmp.dist" />
<conditional name="labels_cond">
<param name="labels_param" value="--labels"/>
<param name="labels_file" value="cmp.dist.labels.txt"/>
</conditional>
<output name="matrix" ftype="png">
<assert_contents>
<has_size size="22k" delta="2k"/>
</assert_contents>
</output>
<output name="histogram" ftype="png">
<assert_contents>
<has_size size="9k" delta="1k"/>
</assert_contents>
</output>
<output name="dendrogram" ftype="png">
<assert_contents>
<has_size size="7k" delta="1k"/>
</assert_contents>
</output>
<assert_stdout>
<has_text_matching expression="GCF_000005845.2_ASM584v2_genomic.fna.gz"/>
</assert_stdout>
</test>
<!-- 3) no labels no indices-->
<test expect_num_outputs="3">
<param name="input" value="cmp.dist" />
<conditional name="labels_cond">
<param name="labels_param" value="--no-labels --no-indices"/>
</conditional>
<output name="matrix" ftype="png">
<assert_contents>
<has_size size="11k" delta="1k"/>
</assert_contents>
</output>
<output name="histogram" ftype="png">
<assert_contents>
<has_size size="9k" delta="1k"/>
</assert_contents>
</output>
<output name="dendrogram" ftype="png">
<assert_contents>
<has_size size="3k" delta="1k"/>
</assert_contents>
</output>
<assert_stdout>
<not_has_text text="GCF_000005845.2_ASM584v2_genomic.fna.gz"/>
</assert_stdout>
</test>
<!-- 4) vmin vmax -->
<test expect_num_outputs="3">
<param name="input" value="cmp.dist" />
<conditional name="labels_cond">
<param name="labels_param" value="--no-labels --no-indices"/>
</conditional>
<section name="advanced_settings">
<param name="vmax" value="0.5"/>
<param name="vmin" value="0.3"/>
</section>
<output name="matrix" ftype="png">
<assert_contents>
<has_size size="17k" delta="1k"/>
</assert_contents>
</output>
<output name="histogram" ftype="png">
<assert_contents>
<has_size size="10k" delta="1k"/>
</assert_contents>
</output>
<output name="dendrogram" ftype="png">
<assert_contents>
<has_size size="4k" delta="1k"/>
</assert_contents>
</output>
<assert_stdout>
<not_has_text text="GCF_000005845.2_ASM584v2_genomic.fna.gz"/>
</assert_stdout>
</test>
<!-- 5) subsample -->
<test expect_num_outputs="3">
<param name="input" value="cmp.dist" />
<conditional name="labels_cond">
<param name="labels_param" value="--no-labels --no-indices"/>
</conditional>
<section name="advanced_settings">
<param name="subsample" value="5"/>
</section>
<output name="matrix" ftype="png">
<assert_contents>
<has_size size="12k" delta="1k"/>
</assert_contents>
</output>
<output name="histogram" ftype="png">
<assert_contents>
<has_size size="10k" delta="1k"/>
</assert_contents>
</output>
<output name="dendrogram" ftype="png">
<assert_contents>
<has_size size="4k" delta="1k"/>
</assert_contents>
</output>
<assert_stdout>
<not_has_text text="GCF_000005845.2_ASM584v2_genomic.fna.gz"/>
</assert_stdout>
</test>
<!-- 6) subsample seed -->
<test expect_num_outputs="3">
<param name="input" value="cmp.dist" />
<conditional name="labels_cond">
<param name="labels_param" value="--no-labels --no-indices"/>
</conditional>
<section name="advanced_settings">
<param name="subsample_seed" value="15"/>
</section>
<output name="matrix" ftype="png">
<assert_contents>
<has_size size="12k" delta="1k"/>
</assert_contents>
</output>
<output name="histogram" ftype="png">
<assert_contents>
<has_size size="10k" delta="1k"/>
</assert_contents>
</output>
<output name="dendrogram" ftype="png">
<assert_contents>
<has_size size="4k" delta="1k"/>
</assert_contents>
</output>
<assert_stdout>
<not_has_text text="GCF_000005845.2_ASM584v2_genomic.fna.gz"/>
</assert_stdout>
</test>
</tests>
<help><![CDATA[
Sourmash plot -

usage: plot [-h] [--pdf] [--labels] [--no-labels] [--labeltext LABELTEXT] [--indices] [--no-indices] [--vmin VMIN] [--vmax VMAX]
[--subsample N] [--subsample-seed S] [-f] [--output-dir DIR] [--csv F] [--labels-from LABELS_FROM]
distances

options:

--labeltext LABELTEXT filename containing list of labels (overrides signature names); implies --labels
--vmin VMIN lower limit of heatmap scale; default=0.000000
--vmax VMAX upper limit of heatmap scale; default=1.000000
--subsample N randomly downsample to this many samples, max
--subsample-seed S random seed for --subsample; default=1
-f, --force forcibly plot non-distance matrices
--output-dir DIR directory for output plots
--csv F write clustered matrix and labels out in CSV format (with column headers) to this file
--labels-from, --labels-load LABELS_FROM a CSV file containing label information to use on plot; implies --labels
]]></help>
<expand macro="citations"/>
</tool>
Loading