Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tools/haltools/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: haltools
owner: iuc
description: A set of tools to for manipulating HAL alignment files
long_description: |
A set of tools to for manipulating HAL alignment files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please write a long description

homepage_url: https://github.com/ComparativeGenomicsToolkit/hal
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools
categories:
- Sequence Analysis
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for hal tool suite: {{ tool_name }}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
description_template: "Wrapper for hal tool suite: {{ tool_name }}"
description_template: "Wrapper for haltools suite: {{ tool_name }}"

suite:
name: "suite_haltools"
description: "A set of tools to for manipulating HAL alignment files"
long_description: |
A set of tools to for manipulating HAL alignment files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here too

238 changes: 238 additions & 0 deletions tools/haltools/hal_halStats.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
<tool id="hal_halstats" name="HAL halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.0">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<tool id="hal_halstats" name="HAL halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.0">
<tool id="hal_halstats" name="HAL halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Define a PROFILE token in macros.xml

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<tool id="hal_halstats" name="HAL halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.0">
<tool id="hal_halstats" name="halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="25.0">

<description>retrieves basic statistics from a hal file</description>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<description>retrieves basic statistics from a hal file</description>
<description>retrieve basic statistics from a HAL file</description>

<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements"/>
<command detect_errors="aggressive"><![CDATA[
halStats
#if $inputFormat.type == "mmap":
--format 'mmap'
#end if
#if $mode.option == "--allCoverage"
$mode.allCoverage
#else if $mode.option == "--branches"
$mode.branches
#else if $mode.option == "--genomes"
$mode.genomes
#else if $mode.option == "--metaData"
$mode.metaData
#else if $mode.option == "--root"
$mode.root
#else if $mode.option == "--tree"
$mode.tree
#else if $mode.option == "--baseComp"
--baseComp '$mode.baseComp'
#else if $mode.option == "--bedSequences"
--bedSequences '$mode.bedSequences'
#else if $mode.option == "--bottomSegments"
--bottomSegments '$mode.bottomSegments'
#else if $mode.option == "--branchLength"
--branchLength '$mode.branchLength'
#else if $mode.option == "--children"
--children '$mode.children'
#else if $mode.option == "--chromSizes"
--chromSizes '$mode.chromSizes'
#else if $mode.option == "--coverage"
--coverage '$mode.coverage'
#else if $mode.option == "--genomeMetaData"
--genomeMetaData '$mode.genomeMetaData'
#else if $mode.option == "--numSegments"
--numSegments '$mode.numSegments'
#else if $mode.option == "--parent"
--parent '$mode.parent'
#else if $mode.option == "--percentID"
--percentID '$mode.percentID'
#else if $mode.option == "--sequenceStats"
--sequenceStats '$mode.sequenceStats'
#else if $mode.option == "--sequences"
--sequences '$mode.sequences'
#else if $mode.option == "--span"
--span '$mode.span'
#else if $mode.option == "--spanRoot"
--spanRoot '$mode.spanRoot'
#else if $mode.option == "--topSegments"
--topSegments '$mode.topSegments'
#end if
#if $mode.option == "--allCoverage" and $mode.allCoverage
'$in_hal_file' > stats.csv
#else
'$in_hal_file' > stats.txt
#end if
]]></command>
<inputs>
<expand macro="in_hal_file"/>
<expand macro="params_inputFormat"/>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<expand macro="params_inputFormat"/>
<expand macro="hal_backend_format"/>

<conditional name="mode">
<param name="option" type="select" label="Select a print option" refresh_on_change="true"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<param name="option" type="select" label="Select a print option" refresh_on_change="true"
<param name="option" type="select" label="Select a print option"

help="Select a print option from the drop down to display its description and to enable it accordingly">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

format

<option value="" selected="true">Default</option>
<option value="--allCoverage">All Coverage (--allCoverage)</option>
<option value="--branches">List of branches (--branches)</option>
<option value="--sequences">List of sequences (--sequences)</option>
<option value="--genomes">List of genomes (--genomes)</option>
<option value="--baseComp">Base composition (--baseComp)</option>
<option value="--bedSequences">Bed sequences (--bedSequences)</option>
<option value="--sequenceStats">Stats for each sequence (--sequenceStats)</option>
<option value="--topSegments">Top segments (--topSegments)</option>
<option value="--bottomSegments">Bottom segments (--bottomSegments)</option>
<option value="--numSegments">Number of segments (--numSegments)</option>
<option value="--branchLength">Branch length (--branchLength)</option>
<option value="--parent">Parent name (--parent)</option>
<option value="--root">Root genome name (--root)</option>
<option value="--children">Names of children (--children)</option>
<option value="--chromSizes">Chrom sizes (--chromSizes)</option>
<option value="--coverage">Histogram of coverage (--coverage)</option>
<option value="--metaData">Metadata for alignment (--metaData)</option>
<option value="--genomeMetaData">Metadata for given genome (--genomeMetaData)</option>
<option value="--percentID">Percent ID (--percentID)</option>
<option value="--span">Span (--span)</option>
<option value="--spanRoot">Span root (--spanRoot)</option>
<option value="--tree">NEWICK tree (--tree)</option>
</param>
<when value="">
</when>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not required

<when value="--allCoverage">
<param name="allCoverage" type="boolean" truevalue="--allCoverage" falsevalue="" checked="false"
label="All Coverage"
help="Print histogram of coverage from all genomes to all genomes" />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Format

</when>
<when value="--baseComp">
<param name="baseComp" type="text" value=""
label="Base composition"
help="Print base composition for given genome by sampling every step bases. Parameter value is of the form genome,step. Ex: human,1000. The output is of the form fraction_of_As fraction_of_Gs fraction_of_Cs fraction_of_Ts" />
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe specify an example and considering defining a sanitizer

</when>
<when value="--bedSequences">
<param name="bedSequences" type="text" value=""
label="Bed sequences"
help="Print sequences of given genome in bed format" />
</when>
<when value="--topSegments">
<param name="topSegments" type="text" value=""
label="Top segments"
help="Print coordinates of all top segments of given genome in BED format" />
</when>
<when value="--bottomSegments">
<param name="bottomSegments" type="text" value=""
label="Bottom segments"
help="Print coordinates of all bottom segments of given genome in BED format" />
</when>
<when value="--branchLength">
<param name="branchLength" type="text" value=""
label="Branch length"
help="Print branch length between given genome and its parent in the tree" />
</when>
<when value="--branches">
<param name="branches" type="boolean" truevalue="--branches" falsevalue="" checked="false"
label="List of branches"
help="Print list of branches. Each branch is specified by the child genome" />
</when>
<when value="--children">
<param name="children" type="text" value=""
label="Names of children"
help="Print names of children of given genome" />
</when>
<when value="--chromSizes">
<param name="chromSizes" type="text" value=""
label="Chrom sizes"
help="Print the name and length of each sequence in a given genome. This is a subset of the information returned by --sequenceStats but is useful because it is in the format used by wigToBigWig" />
</when>
<when value="--coverage">
<param name="coverage" type="text" value=""
label="Histogram of coverage"
help="Print histogram of coverage of a genome with all genomes" />
</when>
<when value="--genomeMetaData">
<param name="genomeMetaData" type="text" value=""
label="Metadata for given genome"
help="Print metadata for given genome, one entry per line, tab-seperated" />
</when>
<when value="--genomes">
<param name="genomes" type="boolean" truevalue="--genomes" falsevalue="" checked="false"
label="List of genomes"
help="Print only a list of genomes in alignment" />
</when>
<when value="--metaData">
<param name="metaData" type="boolean" truevalue="--metaData" falsevalue="" checked="false"
label="Metadata for alignment"
help="Print metadata for the entire alignment" />
</when>
<when value="--numSegments">
<param name="numSegments" type="text" value=""
label="Number of segments"
help="Print numTopSegments numBottomSegments for given genome" />
</when>
<when value="--parent">
<param name="parent" type="text" value=""
label="Parent name"
help="Print name of parent of given genome" />
</when>
<when value="--percentID">
<param name="percentID" type="text" value=""
label="Percent ID"
help="Print % ID of a genome with all other genomes. Only non-duplicated and unambiguous sites areconsidered" />
</when>
<when value="--root">
<param name="root" type="boolean" truevalue="--root" falsevalue="" checked="false"
label="Root genome name"
help="Print root genome name" />
</when>
<when value="--sequenceStats">
<param name="sequenceStats" type="text" value=""
label="Stats for each sequence"
help="Print stats for each sequence in given genome" />
</when>
<when value="--sequences">
<param name="sequences" type="text" value=""
label="List of sequence"
help="Print list of sequences in given genome" />
</when>
<when value="--span">
<param name="span" type="text" value=""
label="Span"
help="Print branches on path (or spanning tree) between comma separated list of genomes" />
</when>
<when value="--spanRoot">
<param name="spanRoot" type="text" value=""
label="Span root"
help="Print genomes on path (or spanning tree) between comma separated list of genomes. Different from --spanonly in that the spanning tree root is also given" />
</when>
<when value="--tree">
<param name="tree" type="boolean" truevalue="--tree" falsevalue="" checked="false"
label="NEWICK tree"
help="Print only the NEWICK tree" />
</when>
</conditional>
</inputs>
<outputs>
<data name="out_file_txt" format="txt" from_work_dir="stats.txt" label="${tool.name} on ${on_string}: Stats">
<filter>mode['option'] != '--allCoverage'</filter>
</data>
<data name="out_file_csv" format="tabular" from_work_dir="stats.csv" label="${tool.name} on ${on_string}: CSV Stats">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<data name="out_file_csv" format="tabular" from_work_dir="stats.csv" label="${tool.name} on ${on_string}: CSV Stats">
<data name="out_file_csv" format="csv" from_work_dir="stats.csv" label="${tool.name} on ${on_string}: CSV Stats">

<filter>mode['option'] == '--allCoverage' and mode['allCoverage']</filter>
</data>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="in_hal_file" value="halTest.hal"/>
<output name="out_file_txt" file="halStats_output.txt"/>
</test>
<test expect_num_outputs="1">
<param name="in_hal_file" value="halTest.hal"/>
<conditional name="mode">
<param name="option" value="--allCoverage"/>
<param name="allCoverage" value="true"/>
</conditional>
<output name="out_file_csv" file="halStats_allCoverage_output.csv"/>
</test>
</tests>
Comment on lines 228 to 416
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please extend your test for other parameters.

<help><![CDATA[
Some global information from a HAL file can be quickly obtained using halStats. It will return the number of genomes, their phylogenetic tree, and the size of each array in each genome.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some other help text would be benefits


**USAGE:**
halStats [Options] <halFile>

The --tree, --sequences, and --genomes options can be used to print out only specific information to simplify iterating over the alignment in shell or Python scripts.
]]></help>
<expand macro="citation" />
</tool>
112 changes: 112 additions & 0 deletions tools/haltools/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
<macros>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">cactus</requirement>
</requirements>
</xml>
<token name="@TOOL_VERSION@">2.9.9</token>
<token name="@VERSION_SUFFIX@">0</token>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Introduce @Profile@ Token

<xml name="in_hal_file">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<xml name="in_hal_file">
<xml name="input_hal">

<param name="in_hal_file" type="data" format="hal" label="Input hal file to analyze" optional="false"/>
</xml>
<xml name="params_mmap_size">
<param argument="--mmapFileSize" type="integer" label="mmap HAL file initial size"
value="60" min="0" optional="false"
help="mmap HAL file initial size (in gigabytes)" />
</xml>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can skip this

<xml name="params_inputFormat">
<conditional name="inputFormat">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

conditional not required here

<param name="type" type="select" refresh_on_change="true"
label="Back-end storage format"
help="Choose the back-end storage format of the input hal file">
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

format please

<option value="hdf5" selected="true">HDF5</option>
<option value="mmap">mmap</option>
</param>
<when value="hdf5">
</when>
<when value="mmap">
</when>
</conditional>
</xml>
<xml name="params_cache">
<conditional name="settings">
<param name="cache" type="select" label="Specify HDF5 caching parameters">
<option value="default" selected="true">No, use program defaults.</option>
<option value="custom">Yes, see full parameter list.</option>
</param>
<when value="default">
</when>
<when value="custom">
<conditional name="memory">
<param name="hdf5InMemory" type="boolean" label="HDF5 in memory"
help="Load all data in memory (and disable hdf5 cache) (--hdf5InMemory)" />
<when value="true">
</when>
<when value="false">
<param argument="--hdf5CacheBytes" type="integer" label="HDF5 cache bytes"
value="1048576" min="0"
help="Maximum size in bytes of regular hdf5 cache" />
<param argument="--hdf5CacheMDC" type="integer" label="HDF5 cache MDC"
value="113" min="0"
help="Number of metadata slots in hdf5 cache" />
<param argument="--hdf5CacheRDC" type="integer" label="HDF5 cache RDC"
value="521" min="0"
help="Number of regular slots in hdf5 cache. Should be a prime number ~= 10 * DefaultCacheRDCBytes / chunk" />
<param argument="--hdf5CacheW0" type="float" label="HDF5 cache W0"
value="0.75" min="0"
help="w0 parameter for hdf5 cache" />
</when>
</conditional>
</when>
</conditional>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed, will be taken care by TPV rule

</xml>
<xml name="params_cache_extended">
<conditional name="settings">
<param name="cache" type="select" label="Specify HDF5 parameters">
<option value="default" selected="true">No, use program defaults.</option>
<option value="custom">Yes, see full parameter list.</option>
</param>
<when value="default">
</when>
<when value="custom">
<conditional name="memory">
<param name="hdf5InMemory" type="boolean" label="HDF5 in memory" optional="false"
help="Load all data in memory (and disable hdf5 cache) (--hdf5InMemory)" />
<when value="true">
</when>
<when value="false">
<param argument="--hdf5CacheBytes" type="integer" label="HDF5 cache bytes"
value="1048576" min="0" optional="false"
help="Maximum size in bytes of regular hdf5 cache" />
<param argument="--hdf5CacheMDC" type="integer" label="HDF5 cache MDC"
value="113" min="0" optional="false"
help="Number of metadata slots in hdf5 cache" />
<param argument="--hdf5CacheRDC" type="integer" label="HDF5 cache RDC"
value="521" min="0" optional="false"
help="Number of regular slots in hdf5 cache. Should be a prime number ~= 10 * DefaultCacheRDCBytes / chunk" />
<param argument="--hdf5CacheW0" type="float" label="HDF5 cache W0"
value="0.75" min="0" optional="false"
help="w0 parameter for hdf5 cache" />
</when>
</conditional>
<param argument="--hdf5Chunk" type="integer" label="HDF5 chunk size"
value="1000" min="0" optional="false"
help="HDF5 chunk size" />
<param argument="--hdf5Compression" type="integer" label="HDF5 compression factor"
value="2" min="0" max="9" optional="false"
help="HDF5 compression factor" />
</when>
</conditional>
</xml>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This as well

<xml name="citation">
<citations>
<citation type="doi">10.1093/bioinformatics/btt128</citation>
<citation type="bibtex">
@misc{githubhal,
title = {HAL GitHub page},
url = {https://github.com/ComparativeGenomicsToolkit/hal},
}
</citation>
Comment on lines +38 to +43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
<citation type="bibtex">
@misc{githubhal,
title = {HAL GitHub page},
url = {https://github.com/ComparativeGenomicsToolkit/hal},
}
</citation>

</citations>
</xml>
</macros>
10 changes: 10 additions & 0 deletions tools/haltools/test-data/halStats_allCoverage_output.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
FromGenome, ToGenome, sitesCovered1Times, sitesCovered2Times, sitesCovered3Times, sitesCovered4Times, sitesCovered5Times
Genome_1, Genome_1, 5472, 4688, 3516, 2637, 1465
Genome_2, Genome_1, 4688, 2637, 2637, 1172, 0
Genome_3, Genome_1, 4688, 4688, 3516, 2960, 880
Genome_1, Genome_2, 3516, 3516, 2344, 1465, 293
Genome_2, Genome_2, 4270, 2930, 2930, 1172, 0
Genome_3, Genome_2, 3516, 3516, 2344, 1788, 176
Genome_1, Genome_3, 4725, 4725, 3553, 2381, 1348
Genome_2, Genome_3, 4725, 2791, 2791, 1033, 0
Genome_3, Genome_3, 6139, 4725, 3553, 3136, 880
Loading
Loading