diff --git a/tools/haltools/.shed.yml b/tools/haltools/.shed.yml new file mode 100644 index 00000000000..8dafdbdde1f --- /dev/null +++ b/tools/haltools/.shed.yml @@ -0,0 +1,17 @@ +name: haltools +owner: iuc +description: A set of tools to for manipulating HAL alignment files +long_description: | + HAL is a graph based, phylogenetically structured format for multiple genome alignments. Unlike block based formats such as MAF, it stores genomes in a hierarchy, which allows efficient handling of rearrangements, indels, ancestral states, and flexible queries across any genome or subclade. +homepage_url: https://github.com/ComparativeGenomicsToolkit/hal +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools +categories: +- Sequence Analysis +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Wrapper for haltool suite: {{ tool_name }}" +suite: + name: "suite_haltools" + description: "A set of tools to for manipulating HAL alignment files" + long_description: | + HAL is a graph based, phylogenetically structured format for multiple genome alignments. Unlike block based formats such as MAF, it stores genomes in a hierarchy, which allows efficient handling of rearrangements, indels, ancestral states, and flexible queries across any genome or subclade. diff --git a/tools/haltools/hal_halStats.xml b/tools/haltools/hal_halStats.xml new file mode 100644 index 00000000000..0d5df5786d6 --- /dev/null +++ b/tools/haltools/hal_halStats.xml @@ -0,0 +1,424 @@ + + retrieve basic statistics from a HAL file + + macros.xml + + + '$out_file' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^[^\s,](?:[^,]*[^\s,])?,[0-9]+$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^\S(?:[^,]*\S)?(?:,\S(?:[^,]*\S)?)*$ + + + + + + ^\S(?:[^,]*\S)?(?:,\S(?:[^,]*\S)?)*$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/haltools/macros.xml b/tools/haltools/macros.xml new file mode 100644 index 00000000000..46f37c95a7f --- /dev/null +++ b/tools/haltools/macros.xml @@ -0,0 +1,46 @@ + + + + cactus + + + 2.9.9 + 0 + 25.0 + + + + + + + + + + + + + + + + + + ^\S(?:.*\S)?$ + + + + + + + + + + 10.1093/bioinformatics/btt128 + + @misc{githubhal, + title = {HAL GitHub page}, + url = {https://github.com/ComparativeGenomicsToolkit/hal}, + } + + + + \ No newline at end of file diff --git a/tools/haltools/test-data/halStats_allCoverage_output.csv b/tools/haltools/test-data/halStats_allCoverage_output.csv new file mode 100644 index 00000000000..0b418eb2a1e --- /dev/null +++ b/tools/haltools/test-data/halStats_allCoverage_output.csv @@ -0,0 +1,10 @@ +FromGenome, ToGenome, sitesCovered1Times, sitesCovered2Times, sitesCovered3Times, sitesCovered4Times, sitesCovered5Times +Genome_1, Genome_1, 5472, 4688, 3516, 2637, 1465 +Genome_2, Genome_1, 4688, 2637, 2637, 1172, 0 +Genome_3, Genome_1, 4688, 4688, 3516, 2960, 880 +Genome_1, Genome_2, 3516, 3516, 2344, 1465, 293 +Genome_2, Genome_2, 4270, 2930, 2930, 1172, 0 +Genome_3, Genome_2, 3516, 3516, 2344, 1788, 176 +Genome_1, Genome_3, 4725, 4725, 3553, 2381, 1348 +Genome_2, Genome_3, 4725, 2791, 2791, 1033, 0 +Genome_3, Genome_3, 6139, 4725, 3553, 3136, 880 diff --git a/tools/haltools/test-data/halStats_baseComp_output.tsv b/tools/haltools/test-data/halStats_baseComp_output.tsv new file mode 100644 index 00000000000..a55068ebdf1 --- /dev/null +++ b/tools/haltools/test-data/halStats_baseComp_output.tsv @@ -0,0 +1,2 @@ +fraction_of_As fraction_of_Gs fraction_of_Cs fraction_of_Ts +0.5 0 0.5 0 diff --git a/tools/haltools/test-data/halStats_bedSequences_output.bed b/tools/haltools/test-data/halStats_bedSequences_output.bed new file mode 100644 index 00000000000..6338c42b6d3 --- /dev/null +++ b/tools/haltools/test-data/halStats_bedSequences_output.bed @@ -0,0 +1 @@ +Genome_0_seq 0 1758 diff --git a/tools/haltools/test-data/halStats_bottomSegments_output.bed b/tools/haltools/test-data/halStats_bottomSegments_output.bed new file mode 100644 index 00000000000..8306a4d14ac --- /dev/null +++ b/tools/haltools/test-data/halStats_bottomSegments_output.bed @@ -0,0 +1,8 @@ +Genome_0_seq 0 293 +Genome_0_seq 293 586 +Genome_0_seq 586 879 +Genome_0_seq 879 1033 +Genome_0_seq 1033 1172 +Genome_0_seq 1172 1348 +Genome_0_seq 1348 1465 +Genome_0_seq 1465 1758 diff --git a/tools/haltools/test-data/halStats_chromSizes_output.tsv b/tools/haltools/test-data/halStats_chromSizes_output.tsv new file mode 100644 index 00000000000..0021e1ef5b9 --- /dev/null +++ b/tools/haltools/test-data/halStats_chromSizes_output.tsv @@ -0,0 +1 @@ +Genome_1_seq 5472 diff --git a/tools/haltools/test-data/halStats_output.txt b/tools/haltools/test-data/halStats_output.txt new file mode 100644 index 00000000000..d3da9b49e95 --- /dev/null +++ b/tools/haltools/test-data/halStats_output.txt @@ -0,0 +1,10 @@ + +hal v2.2 +(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0; + +GenomeName, NumChildren, Length, NumSequences, NumTopSegments, NumBottomSegments +Genome_0, 3, 1758, 1, 0, 8 +Genome_1, 0, 5472, 1, 28, 0 +Genome_2, 0, 4270, 1, 20, 0 +Genome_3, 0, 6139, 1, 28, 0 + diff --git a/tools/haltools/test-data/halStats_percentID_output.csv b/tools/haltools/test-data/halStats_percentID_output.csv new file mode 100644 index 00000000000..bc51e8f1b64 --- /dev/null +++ b/tools/haltools/test-data/halStats_percentID_output.csv @@ -0,0 +1,5 @@ +Genome, % ID, numID, numSites +Genome_0, 1, 1758, 1758 +Genome_1, nan, 0, 0 +Genome_2, 1, 586, 586 +Genome_3, nan, 0, 0 diff --git a/tools/haltools/test-data/halStats_sequenceStats_output.csv b/tools/haltools/test-data/halStats_sequenceStats_output.csv new file mode 100644 index 00000000000..49a0143bc8f --- /dev/null +++ b/tools/haltools/test-data/halStats_sequenceStats_output.csv @@ -0,0 +1,3 @@ +SequenceName, Length, NumTopSegments, NumBottomSegments +Genome_0_seq, 1758, 0, 8 + diff --git a/tools/haltools/test-data/halStats_topSegments_output.bed b/tools/haltools/test-data/halStats_topSegments_output.bed new file mode 100644 index 00000000000..ec679718f4a --- /dev/null +++ b/tools/haltools/test-data/halStats_topSegments_output.bed @@ -0,0 +1,28 @@ +Genome_1_seq 0 293 +Genome_1_seq 293 586 +Genome_1_seq 586 879 +Genome_1_seq 879 1033 +Genome_1_seq 1033 1172 +Genome_1_seq 1172 1348 +Genome_1_seq 1348 1465 +Genome_1_seq 1465 1758 +Genome_1_seq 1758 2051 +Genome_1_seq 2051 2227 +Genome_1_seq 2227 2344 +Genome_1_seq 2344 2498 +Genome_1_seq 2498 2637 +Genome_1_seq 2637 2930 +Genome_1_seq 2930 3223 +Genome_1_seq 3223 3399 +Genome_1_seq 3399 3516 +Genome_1_seq 3516 3809 +Genome_1_seq 3809 4102 +Genome_1_seq 4102 4256 +Genome_1_seq 4256 4395 +Genome_1_seq 4395 4571 +Genome_1_seq 4571 4688 +Genome_1_seq 4688 4864 +Genome_1_seq 4864 4981 +Genome_1_seq 4981 5135 +Genome_1_seq 5135 5274 +Genome_1_seq 5274 5472 diff --git a/tools/haltools/test-data/halTest.hal b/tools/haltools/test-data/halTest.hal new file mode 100644 index 00000000000..14186494d6c Binary files /dev/null and b/tools/haltools/test-data/halTest.hal differ