|
| 1 | +nextflow_pipeline { |
| 2 | + |
| 3 | + name "Test pipeline: NFCORE_EAGER" |
| 4 | + script "main.nf" |
| 5 | + tag "pipeline" |
| 6 | + tag "nfcore_eager" |
| 7 | + tag "test" |
| 8 | + |
| 9 | + test("test_profile") { |
| 10 | + |
| 11 | + when { |
| 12 | + params { |
| 13 | + outdir = "$outputDir" |
| 14 | + } |
| 15 | + } |
| 16 | + |
| 17 | + then { |
| 18 | + |
| 19 | + /////////////////// |
| 20 | + // DOCUMENTATION // |
| 21 | + /////////////////// |
| 22 | + |
| 23 | + // The contents of each top level results directory should be tested with individually named snapshots. |
| 24 | + // Within each snapshot, there should be two to three distinct variables, that contain the files to be tested. |
| 25 | + // - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared |
| 26 | + // - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared |
| 27 | + // - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable) |
| 28 | + // If a directory is fully stable, you can drop `stable_name_*` |
| 29 | + // If a directory contains no BAMs, you can drop `bams_*` |
| 30 | + |
| 31 | + // Generate with: nf-test test --tag test --profile docker,test --update-snapshot |
| 32 | + // Test with: nf-test test --tag test --profile docker,test |
| 33 | + // NOTE: BAMs are always only stable in name, because: |
| 34 | + // a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112) |
| 35 | + // b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order) |
| 36 | + // point b) also causes BAIs to be unstable. |
| 37 | + // c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes) |
| 38 | + |
| 39 | + ////////////////////// |
| 40 | + // DEFINE VARIABLES // |
| 41 | + ////////////////////// |
| 42 | + |
| 43 | + // Define exclusion patterns for files with unstable contents |
| 44 | + // NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here |
| 45 | + // This is particularly important if the patterns excluded in the stable content section should be included in the stable name section |
| 46 | + def unstable_patterns_auth = [ |
| 47 | + '**/mapped_reads_gc-content_distribution.txt', |
| 48 | + '**/mapped_reads_nucleotide_content.txt', |
| 49 | + '**/genome_gc_content_per_window.png', |
| 50 | + '**/*.{svg,pdf,html,png}', |
| 51 | + '**/DamageProfiler.log', |
| 52 | + '**/3p_freq_misincorporations.txt', |
| 53 | + '**/5p_freq_misincorporations.txt', |
| 54 | + '**/DNA_comp_genome.txt', |
| 55 | + '**/DNA_composition_sample.txt', |
| 56 | + '**/misincorporation.txt', |
| 57 | + '**/genome_results.txt', |
| 58 | + ] |
| 59 | + |
| 60 | + // Check that no files are missing/added |
| 61 | + // Command legend: Result directory to index , includeDir: include dirs?, ignore: exclude patterns , ignoreFile: exclude pattern list , include: include patterns |
| 62 | + def stable_name_all = getAllFilesFromDir("$outputDir/" , includeDir: false , ignore: ['pipeline_info/*'] , ignoreFile: null , include: ['*', '**/*'] ) |
| 63 | + |
| 64 | + // Authentication |
| 65 | + def stable_content_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: unstable_patterns_auth , ignoreFile: null , include: ['*', '**/*'] ) |
| 66 | + def stable_name_authentication = getAllFilesFromDir("$outputDir/authentication" , includeDir: false , ignore: null , ignoreFile: null , include: unstable_patterns_auth) |
| 67 | + |
| 68 | + // Deduplication - TODO -> snapshot both lists are empty!? |
| 69 | + def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) |
| 70 | + def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) |
| 71 | + |
| 72 | + // Final_bams |
| 73 | + def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) |
| 74 | + def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) |
| 75 | + |
| 76 | + // Mapping (incl. bam_input flasgstat) |
| 77 | + def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) |
| 78 | + def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) |
| 79 | + |
| 80 | + // Preprocessing |
| 81 | + // NOTE: FastQC html appears stable, but I worry it might just include a day timestamp instead of a full timestamp. To keep the expression simpler I removed both from checksum testing. |
| 82 | + def stable_content_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: ['**/*.{zip,log,html}'], ignoreFile: null , include: ['**/*'] ) |
| 83 | + def stable_name_preprocessing = getAllFilesFromDir("$outputDir/preprocessing" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{zip,log,html}'] ) |
| 84 | + |
| 85 | + // Read filtering |
| 86 | + def stable_content_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.flagstat'] ) |
| 87 | + def stable_name_readfiltering = getAllFilesFromDir("$outputDir/read_filtering" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.{bam,bai}'] ) |
| 88 | + |
| 89 | + // Genotyping |
| 90 | + def stable_content_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: ['**/*.{tbi,vcf.gz}'] , ignoreFile: null , include: ['**/*'] ) |
| 91 | + def stable_name_genotyping = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.tbi'] ) |
| 92 | + // We need to collect the vcfs separately to run more specific md5sum checks on the header (contnts are unstable due to same reasons as BAMs, explained above). |
| 93 | + def genotyping_vcfs = getAllFilesFromDir("$outputDir/genotyping" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.vcf.gz'] ) |
| 94 | + |
| 95 | + // Metagenomics |
| 96 | + def stable_content_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: ['**/*.biom', '**/*table.tsv'] , ignoreFile: null , include: ['**/*'] ) |
| 97 | + def stable_name_metagenomics = getAllFilesFromDir("$outputDir/metagenomics" , includeDir: false , ignore: null , ignoreFile: null , include: ['**/*.biom', '**/*table.tsv'] ) |
| 98 | + |
| 99 | + // MultiQC |
| 100 | + def stable_name_multiqc = getAllFilesFromDir("$outputDir/multiqc" , includeDir: false , ignore: null , ignoreFile: null , include: ['*', '**/*'] ) |
| 101 | + |
| 102 | + /////////////////////// |
| 103 | + // DEFINE ASSERTIONS // |
| 104 | + /////////////////////// |
| 105 | + |
| 106 | + assertAll( |
| 107 | + { assert workflow.success }, |
| 108 | + // This checks that there are no missing or additional output files. |
| 109 | + // Also a good starting point to look at all the files in the output folder than need to be checked in subsequent sections. |
| 110 | + { assert snapshot( stable_name_all*.name ).match("all_files") }, |
| 111 | + |
| 112 | + // Checking changes to contents of each section |
| 113 | + // NOTE: Keep the order of the sections in the alphanumeric order of the output directories. |
| 114 | + // Each section should first check stable_content, stable_name second (if applicable). |
| 115 | + { assert snapshot( stable_content_authentication , stable_name_authentication*.name ).match("authentication") }, |
| 116 | + { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") }, |
| 117 | + { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") }, |
| 118 | + // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279 |
| 119 | + { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") }, |
| 120 | + { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") }, |
| 121 | + { assert snapshot( stable_content_readfiltering , stable_name_readfiltering*.name ).match("read_filtering") }, |
| 122 | + { assert snapshot( stable_content_genotyping , stable_name_genotyping*.name ).match("genotyping") }, |
| 123 | + // Additional checks on the genotyping VCFs for content. Specifically the md5sums of the header FORMAT, INFO, FILTER, CONTIG lines, and sample names |
| 124 | + { assert snapshot( |
| 125 | + genotyping_vcfs.collect { |
| 126 | + file -> |
| 127 | + def vcf_head = path(file.toString()).vcf.header |
| 128 | + // The header contains lines in the "OTHER" category, which contain a timestamp and/or work dir paths, so we need to filter those out, then calculate md5sums. |
| 129 | + def header_md5 = [ |
| 130 | + vcf_head.getFormatHeaderLines().toString(), |
| 131 | + vcf_head.getInfoHeaderLines().toString(), |
| 132 | + vcf_head.getFilterLines().toString(), |
| 133 | + vcf_head.getIDHeaderLines().toString(), |
| 134 | + vcf_head.getGenotypeSamples().toString(), |
| 135 | + vcf_head.getContigLines().toString(), |
| 136 | + ].join(' ').md5() |
| 137 | + file.getName() + ":header_md5," + header_md5 |
| 138 | + } |
| 139 | + ).match("genotyping_vcfs")}, |
| 140 | + { assert snapshot( stable_content_metagenomics , stable_name_metagenomics*.name ).match("metagenomics") }, |
| 141 | + { assert snapshot( stable_name_multiqc*.name ).match("multiqc") }, |
| 142 | + |
| 143 | + // Versions |
| 144 | + { assert new File("$outputDir/pipeline_info/nf_core_eager_software_mqc_versions.yml").exists() }, |
| 145 | + |
| 146 | + ) |
| 147 | + } |
| 148 | + } |
| 149 | +} |
0 commit comments