Skip to content

Commit 652b0c1

Browse files
authored
Merge pull request nf-core#1063 from nf-core/nf-test-conversion
Add nf-test
2 parents 1001dd2 + 43c6bba commit 652b0c1

File tree

8 files changed

+1148
-16
lines changed

8 files changed

+1148
-16
lines changed

.github/workflows/ci.yml

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
name: nf-core CI
21
# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
2+
name: nf-core CI
33
on:
44
push:
55
branches:
6-
- dev
6+
- "dev"
77
pull_request:
8+
branches:
9+
- "dev"
10+
- "master"
811
release:
912
types: [published]
1013
workflow_dispatch:
@@ -15,16 +18,31 @@ env:
1518
NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
1619

1720
concurrency:
18-
group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
21+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
1922
cancel-in-progress: true
2023

2124
jobs:
25+
define_nxf_versions:
26+
name: Choose nextflow versions to test against depending on target branch
27+
runs-on: ubuntu-latest
28+
outputs:
29+
matrix: ${{ steps.nxf_versions.outputs.matrix }}
30+
steps:
31+
- id: nxf_versions
32+
run: |
33+
if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "dev" && "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then
34+
echo matrix='["latest-everything"]' | tee -a $GITHUB_OUTPUT
35+
else
36+
echo matrix='["latest-everything", "23.10.0"]' | tee -a $GITHUB_OUTPUT
37+
fi
38+
2239
test:
2340
name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})"
2441
# Only run on push if this is the nf-core dev branch (merged PRs)
2542
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/eager') }}"
2643
runs-on: ubuntu-latest
2744
strategy:
45+
fail-fast: false
2846
matrix:
2947
NXF_VER:
3048
- "24.04.2"

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ testing*
88
*.pyc
99
null/
1010
.nf-test*
11+

conf/modules.config

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,19 +1067,6 @@ process {
10671067
ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" }
10681068
}
10691069

1070-
//
1071-
// QUALIMAP
1072-
//
1073-
1074-
withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' {
1075-
tag = { "${meta.reference}|${meta.sample_id}" }
1076-
publishDir = [
1077-
path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/" },
1078-
mode: params.publish_dir_mode,
1079-
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
1080-
]
1081-
}
1082-
10831070
//
10841071
// DAMAGE CALCULATION
10851072
//

conf/test.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ params {
4444
bamfiltering_minreadlength = 30
4545
bamfiltering_mappingquality = 37
4646
deduplication_tool = 'markduplicates'
47+
bamfiltering_savefilteredbams = true
4748

4849
// PreSeq
4950
mapstats_preseq_mode = 'c_curve'

nf-test.config

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
config {
2+
3+
testsDir "tests"
4+
workDir ".nf-test"
5+
configFile "tests/nextflow.config"
6+
profile ""
7+
8+
// load the necessary plugins
9+
plugins {
10+
load "nft-utils@0.0.3"
11+
load "nft-vcf@1.0.7"
12+
}
13+
14+
}

tests/nextflow.config

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/*
2+
========================================================================================
3+
Nextflow config file for running tests
4+
========================================================================================
5+
*/

tests/test.nf.test

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
nextflow_pipeline {
2+
3+
name "Test pipeline: NFCORE_EAGER"
4+
script "main.nf"
5+
tag "pipeline"
6+
tag "nfcore_eager"
7+
tag "test"
8+
9+
test("test_profile") {
10+
11+
when {
12+
params {
13+
outdir = "$outputDir"
14+
}
15+
}
16+
17+
then {
18+
19+
///////////////////
20+
// DOCUMENTATION //
21+
///////////////////
22+
23+
// The contents of each top level results directory should be tested with individually named snapshots.
24+
// Within each snapshot, there should be two to three distinct variables, that contain the files to be tested.
25+
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
26+
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
27+
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
28+
// If a directory is fully stable, you can drop `stable_name_*`
29+
// If a directory contains no BAMs, you can drop `bams_*`
30+
31+
// Generate with: nf-test test --tag test --profile docker,test --update-snapshot
32+
// Test with: nf-test test --tag test --profile docker,test
33+
// NOTE: BAMs are always only stable in name, because:
34+
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
35+
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
36+
// point b) also causes BAIs to be unstable.
37+
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)
38+
39+
//////////////////////
40+
// DEFINE VARIABLES //
41+
//////////////////////
42+
43+
// Define exclusion patterns for files with unstable contents
44+
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
45+
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
46+
def unstable_patterns_auth = [
47+
'**/mapped_reads_gc-content_distribution.txt',
48+
'**/mapped_reads_nucleotide_content.txt',
49+
'**/genome_gc_content_per_window.png',
50+
'**/*.{svg,pdf,html,png}',
51+
'**/DamageProfiler.log',
52+
'**/3p_freq_misincorporations.txt',
53+
'**/5p_freq_misincorporations.txt',
54+
'**/DNA_comp_genome.txt',
55+
'**/DNA_composition_sample.txt',
56+
'**/misincorporation.txt',
57+
'**/genome_results.txt',
58+
]
59+
60+
// Check that no files are missing/added
61+
// Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns
62+
def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] )
63+
64+
// Authentication
65+
def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] )
66+
def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth)
67+
68+
// Deduplication - TODO -> snapshot both lists are empty!?
69+
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
70+
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
71+
72+
// Final_bams
73+
def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
74+
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
75+
76+
// Mapping (incl. bam_input flasgstat)
77+
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
78+
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
79+
80+
// Preprocessing
81+
// NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing.
82+
def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] )
83+
def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] )
84+
85+
// Read filtering
86+
def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] )
87+
def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] )
88+
89+
// Genotyping
90+
def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] )
91+
def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] )
92+
// We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above).
93+
def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] )
94+
95+
// Metagenomics
96+
def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'] , ignoreFile: null , include: ['**/*'] )
97+
def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.biom', '**/*table.tsv'] )
98+
99+
// MultiQC
100+
def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] )
101+
102+
///////////////////////
103+
// DEFINE ASSERTIONS //
104+
///////////////////////
105+
106+
assertAll(
107+
{ assert workflow.success },
108+
// This checks that there are no missing or additional output files.
109+
// Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections.
110+
{ assert snapshot( stable_name_all*.name ).match("all_files") },
111+
112+
// Checking changes to contents of each section
113+
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
114+
// Each section should first check stable_content, stable_name second (if applicable).
115+
{ assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") },
116+
{ assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
117+
{ assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
118+
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
119+
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
120+
{ assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
121+
{ assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") },
122+
{ assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") },
123+
// Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT, INFO, FILTER, CONTIG lines, and sample names
124+
{ assert snapshot(
125+
genotyping_vcfs.collect {
126+
file ->
127+
def vcf_head = path(file.toString()).vcf.header
128+
// The header contains lines in the "OTHER" category, which contain a timestamp and/or work dir paths, so we need to filter those out, then calculate md5sums.
129+
def header_md5 = [
130+
vcf_head.getFormatHeaderLines().toString(),
131+
vcf_head.getInfoHeaderLines().toString(),
132+
vcf_head.getFilterLines().toString(),
133+
vcf_head.getIDHeaderLines().toString(),
134+
vcf_head.getGenotypeSamples().toString(),
135+
vcf_head.getContigLines().toString(),
136+
].join(' ').md5()
137+
file.getName() + ":header_md5," + header_md5
138+
}
139+
).match("genotyping_vcfs")},
140+
{ assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") },
141+
{ assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
142+
143+
// Versions
144+
{ assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() },
145+
146+
)
147+
}
148+
}
149+
}

0 commit comments

Comments
 (0)