Skip to content

Commit 378faa7

Browse files
committed
Chunk SanntiS - and "sync" inputs for this
This is an attempt to make reduce SanntiS runtime and memory more manageable. The input contigs, their proteins and interproscan annotations are "synced" not with a bespoke script. This should make SanntiS resource usage more predictable.
1 parent 1e3b7e4 commit 378faa7

File tree

3 files changed

+30
-23
lines changed

3 files changed

+30
-23
lines changed

tests/assembly_erz101.fasta.gz

5.72 KB
Binary file not shown.

tests/default.nf.test.snap

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"-profile test": {
33
"content": [
4-
151,
4+
155,
55
{
66
"ANTISMASH_ANTISMASH": {
77
"antismash": "8.0.1",
@@ -62,6 +62,9 @@
6262
"CONCATENATE_INTERPROSCAN_TSV": {
6363
"pigz": "2.3.4"
6464
},
65+
"CONCATENATE_SANNTIS_GFFS": {
66+
"python": "3.13.1"
67+
},
6568
"CREATE_GFF_SUMMARY": {
6669
"mgnify-pipelines-toolkit": "1.4.9"
6770
},
@@ -106,6 +109,10 @@
106109
"seqkit": "v2.10.0",
107110
"samtools": "1.22.1+htslib-1.22.1"
108111
},
112+
"FILTER_IPS_AND_FAA_BY_CONTIGS": {
113+
"seqkit": "2.13.0",
114+
"python": "3.14.3"
115+
},
109116
"FIND_UNPIGZ": {
110117
"find": "4.6.0",
111118
"pigz": 2.8
@@ -432,14 +439,14 @@
432439
"qc_failed_assemblies.csv"
433440
],
434441
[
435-
"ERZ101_annotation_summary.gff.gz:md5,76972e908b4d8272614fbe079d1b106d",
436-
"ERZ101_annotation_summary.gff.gz.csi:md5,6373535e5e29235e1a74919d299cd730",
442+
"ERZ101_annotation_summary.gff.gz:md5,1dfb47ffc087a1f40c77afe955027a9e",
443+
"ERZ101_annotation_summary.gff.gz.csi:md5,d87619e95adab1761233f9ba24ca7f80",
437444
"ERZ101_annotation_summary.gff.gz.gzi:md5,ae516cdccf54e18c35d9d77c7aae27f4",
438-
"ERZ101_predicted_cds.gff.gz:md5,8449d2fa579a5c8d188b2d13a09439c4",
439-
"ERZ101_dbcan_cgc.gff.gz:md5,e14511869398914ec5ec9037ae60f7b0",
445+
"ERZ101_predicted_cds.gff.gz:md5,aa149f822f5c05de1c63674a22f911d4",
446+
"ERZ101_dbcan_cgc.gff.gz:md5,83e22b309e638d71766423f3cc7ad498",
440447
"ERZ101_dbcan_overview.tsv.gz:md5,71857c4ba58ecf8345838f2f2aef11cb",
441448
"ERZ101_dbcan_standard_out.tsv.gz:md5,6be9ab29b289ff46cc6e4b6fe48dc3d7",
442-
"ERZ101_dbcan_sub_hmm.tsv.gz:md5,b9813dc2c2c30d062597f6ff81807706",
449+
"ERZ101_dbcan_sub_hmm.tsv.gz:md5,05bc3a17c1ffcd562352bf10cc4d8da9",
443450
"ERZ101_dbcan_substrates.tsv.gz:md5,d41d8cd98f00b204e9800998ecf8427e",
444451
"ERZ101_emapper_annotations.tsv.gz:md5,acf16652adebcb33083edd74c5ec6e5f",
445452
"ERZ101_emapper_seed_orthologs.tsv.gz:md5,57c36dd24cfd371f6a183fcd8cc85619",
@@ -455,7 +462,7 @@
455462
"ERZ101_pfam_summary.tsv.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474",
456463
"ERZ101_proteins2rhea.tsv.gz:md5,d41d8cd98f00b204e9800998ecf8427e",
457464
"ERZ101_proteins2rhea.tsv.gz.gzi:md5,c2cb56f4c5bf656faca0986e7eba0308",
458-
"ERZ101_antismash_summary.tsv.gz:md5,6ad987c8844dc8cf1ef764afca708c12",
465+
"ERZ101_antismash_summary.tsv.gz:md5,0fd7d1a0e04f0a25a8e49a697c986a4f",
459466
"ERZ101_antismash_summary.tsv.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474",
460467
"ERZ101_dram.html.gz:md5,65198bf5083e96a1e00861290586a0c4",
461468
"ERZ101_dram.tsv.gz:md5,b913fe1ba596ac43f9a38058c3aae645",
@@ -466,15 +473,15 @@
466473
"ERZ101_kegg_modules_summary.tsv.gz:md5,9d9859bfbbfac5b1708b6472f7eb74bb",
467474
"ERZ101_kegg_modules_summary.tsv.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474",
468475
"ERZ101_sanntis.gff.gz:md5,df19e1b84ba6f691d20c72b397c88abf",
469-
"ERZ101_filtered_contigs.fasta.gz:md5,6499031bc92e9dd14320dc30de65b438",
470-
"ERZ101_filtered_contigs.fasta.gz.fai:md5,7a69e27810bebf9e31e84bc9b6d62206",
471-
"ERZ101_filtered_contigs.fasta.gz.gzi:md5,4e8f724db1539b04d8cde5564f0b471c",
472-
"ERZ101_quast_stats.tsv.gz:md5,94b537397fa8207f31e84d798eca4f67",
476+
"ERZ101_filtered_contigs.fasta.gz:md5,010fd0b578bdd9629e9a9b41e8b5bbd6",
477+
"ERZ101_filtered_contigs.fasta.gz.fai:md5,57034897e5827c1dba986c70145a1d63",
478+
"ERZ101_filtered_contigs.fasta.gz.gzi:md5,5737309e5668e4669c50a0a7f3c7ff0b",
479+
"ERZ101_quast_stats.tsv.gz:md5,f1c11d01769d2f12c567067c0f732864",
473480
"ERZ101_aligned_to_contaminant.tsv.gz:md5,bf678dade1ff3067918b61ce0d4225e6",
474481
"ERZ101_aligned_to_human.tsv.gz:md5,515849d5bcd7dfbdb518539785d47f50",
475482
"ERZ101.html:md5,e56d9f9358ef11fc76a5c4d95bcea909",
476483
"ERZ101.krona.txt.gz:md5,e8f24f3ddbf7289920fae9f28ab185c8",
477-
"ERZ101_contigs_taxonomy.tsv.gz:md5,79cd8563ff9bca613332d1a516f0ef3e",
484+
"ERZ101_contigs_taxonomy.tsv.gz:md5,3f110e2b35b9b2c79be1607c51b27ecf",
478485
"ERZ102_annotation_summary.gff.gz:md5,fbbaf2729dc18319f9ee628c28202b24",
479486
"ERZ102_annotation_summary.gff.gz.csi:md5,227cc48ce8973f38f00dd7bd67365899",
480487
"ERZ102_annotation_summary.gff.gz.gzi:md5,ae516cdccf54e18c35d9d77c7aae27f4",
@@ -522,21 +529,21 @@
522529
"analysed_assemblies.csv:md5,333c3997aa4621cf3a644719e9021296",
523530
"samplesheet_dram.tsv.gz:md5,d9509874f9f6bbc2ab165106655fbce1",
524531
"multiqc_citations.txt:md5,2d1a8ef8ba06c7eada06ab3e96552ea4",
525-
"multiqc_general_stats.txt:md5,214026c97163f61612fc28449b589f97",
532+
"multiqc_general_stats.txt:md5,8b5ca1d9ddb8883eac63fffefd2ae6b8",
526533
"multiqc_host_decontamination_table.txt:md5,b8d06985826c0451c759e08382db2347",
527534
"multiqc_host_decontamination_table_table.txt:md5,b8d06985826c0451c759e08382db2347",
528535
"multiqc_human_decontamination_table.txt:md5,f8bc5f4b45b46d49826e5d8207a3b8f2",
529536
"multiqc_human_decontamination_table_table.txt:md5,f8bc5f4b45b46d49826e5d8207a3b8f2",
530-
"multiqc_quast.txt:md5,d8600bbb835979a7817be9827ff85be2",
537+
"multiqc_quast.txt:md5,dc380d5fca13b91b7c21552ba8cd9f70",
531538
"quast_num_contigs.txt:md5,a078f48987914adbb249f0d8382f31f1",
532-
"quast_table.txt:md5,be0aea9b4cf84382aa6dc71787f4203a",
539+
"quast_table.txt:md5,4dfa7d5c1a3715c24c29d1fe8c458319",
533540
"qc_failed_assemblies.csv:md5,3292564aa6122920901d9bb724447ad4"
534541
]
535542
],
543+
"timestamp": "2026-03-09T09:26:09.063484904",
536544
"meta": {
537-
"nf-test": "0.9.2",
538-
"nextflow": "25.10.3"
539-
},
540-
"timestamp": "2026-02-25T22:19:26.921976"
545+
"nf-test": "0.9.4",
546+
"nextflow": "25.10.0"
547+
}
541548
}
542549
}

tests/test_samplesheet.csv

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
sample,assembly_fasta,contaminant_reference,human_reference,phix_reference
2-
ERZ101,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/main/tests/assembly_erz101.fasta.gz,contamination,human,
3-
ERZ102,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/main/tests/assembly_erz102.fasta.gz,,,
4-
ERZ666,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/main/tests/assembly_qc_fail_length_filtered.fasta.gz,,,
5-
ERZ999,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/main/tests/assembly_qc_fail_n_bases.fasta.gz,,,
2+
ERZ101,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/feature/chunk-contigs-and-ips/tests/assembly_erz101.fasta.gz,contamination,human,
3+
ERZ102,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/feature/chunk-contigs-and-ips/tests/assembly_erz102.fasta.gz,,,
4+
ERZ666,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/feature/chunk-contigs-and-ips/tests/assembly_qc_fail_length_filtered.fasta.gz,,,
5+
ERZ999,https://github.com/EBI-Metagenomics/assembly-analysis-pipeline/raw/refs/heads/feature/chunk-contigs-and-ips/tests/assembly_qc_fail_n_bases.fasta.gz,,,

0 commit comments

Comments
 (0)