Skip to content

Commit 59a9648

Browse files
authored
Merge pull request #908 from drpatelh/updates
Closing #896 #897 #900 #902 #907
2 parents 721d772 + d410de1 commit 59a9648

File tree

7 files changed

+51
-20
lines changed

7 files changed

+51
-20
lines changed

CHANGELOG.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
- Bump minimum Nextflow version from `21.10.3` -> `22.10.1`
1111
- Updated pipeline template to [nf-core/tools 2.7.1](https://github.com/nf-core/tools/releases/tag/2.7.1)
12+
- [[#896](https://github.com/nf-core/rnaseq/issues/896)] - Remove `copyTo` call for iGenomes README
13+
- [[#897](https://github.com/nf-core/rnaseq/issues/897)] - Use `--skip_preseq` by default
14+
- [[#900](https://github.com/nf-core/rnaseq/issues/900)] - Add `--recursive` option to `fastq_dir_to_samplesheet.py` script
15+
- [[#902](https://github.com/nf-core/rnaseq/issues/902)] - `check_samplesheet.py` script doesn't output optional columns in samplesheet
16+
- [[#907](https://github.com/nf-core/rnaseq/issues/907)] - Add `--extra_star_align_args` and `--extra_salmon_quant_args` parameter
1217

1318
### Parameters
1419

15-
| Old parameter | New parameter |
16-
| ---------------- | ------------- |
17-
| `--enable_conda` | |
20+
| Old parameter | New parameter |
21+
| ---------------- | --------------------------- |
22+
| `--enable_conda` | |
23+
| | `--extra_star_align_args` |
24+
| | `--extra_salmon_quant_args` |
1825

1926
> **NB:** Parameter has been **updated** if both old and new parameter information is present.
2027
> **NB:** Parameter has been **added** if just the new parameter information is present.

bin/check_samplesheet.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def check_samplesheet(file_in, file_out):
7070
line,
7171
)
7272

73-
num_cols = len([x for x in lspl if x])
73+
num_cols = len([x for x in lspl[: len(HEADER)] if x])
7474
if num_cols < MIN_COLS:
7575
print_error(
7676
f"Invalid number of populated columns (minimum = {MIN_COLS})!",
@@ -124,6 +124,7 @@ def check_samplesheet(file_in, file_out):
124124
print_error("Invalid combination of columns provided!", "Line", line)
125125

126126
## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, strandedness ]]}
127+
sample_info = sample_info + lspl[len(HEADER) :]
127128
if sample not in sample_mapping_dict:
128129
sample_mapping_dict[sample] = [sample_info]
129130
else:
@@ -137,7 +138,9 @@ def check_samplesheet(file_in, file_out):
137138
out_dir = os.path.dirname(file_out)
138139
make_dir(out_dir)
139140
with open(file_out, "w") as fout:
140-
fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2", "strandedness"]) + "\n")
141+
fout.write(
142+
",".join(["sample", "single_end", "fastq_1", "fastq_2", "strandedness"] + header[len(HEADER) :]) + "\n"
143+
)
141144
for sample in sorted(sample_mapping_dict.keys()):
142145

143146
## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
@@ -149,7 +152,7 @@ def check_samplesheet(file_in, file_out):
149152
)
150153

151154
## Check that multiple runs of the same sample are of the same strandedness
152-
if not all(x[-1] == sample_mapping_dict[sample][0][-1] for x in sample_mapping_dict[sample]):
155+
if not all(x[3] == sample_mapping_dict[sample][0][3] for x in sample_mapping_dict[sample]):
153156
print_error(
154157
f"Multiple runs of a sample must have the same strandedness!",
155158
"Sample",

bin/fastq_dir_to_samplesheet.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ def parse_args(args=None):
6767
default=1,
6868
help="After splitting FastQ file name by --sanitise_name_delimiter all elements before this index (1-based) will be joined to create final sample name.",
6969
)
70+
parser.add_argument(
71+
"-re",
72+
"--recursive",
73+
dest="RECURSIVE",
74+
action="store_true",
75+
help="Whether or not to search for FastQ files recursively in <FASTQ_DIR>.",
76+
)
7077
return parser.parse_args(args)
7178

7279

@@ -80,6 +87,7 @@ def fastq_dir_to_samplesheet(
8087
sanitise_name=False,
8188
sanitise_name_delimiter="_",
8289
sanitise_name_index=1,
90+
recursive=False,
8391
):
8492
def sanitize_sample(path, extension):
8593
"""Retrieve sample id from filename"""
@@ -90,27 +98,30 @@ def sanitize_sample(path, extension):
9098
)
9199
return sample
92100

93-
def get_fastqs(extension):
101+
def get_fastqs(extension, recursive=False):
94102
"""
95103
Needs to be sorted to ensure R1 and R2 are in the same order
96104
when merging technical replicates. Glob is not guaranteed to produce
97105
sorted results.
98106
See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered
99107
"""
100-
return sorted(glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False))
108+
search_path = f"*{extension}"
109+
if recursive:
110+
search_path = f"**/*{extension}"
111+
return sorted(glob.glob(os.path.join(fastq_dir, search_path), recursive=recursive))
101112

102113
read_dict = {}
103114

104115
## Get read 1 files
105-
for read1_file in get_fastqs(read1_extension):
116+
for read1_file in get_fastqs(read1_extension, recursive):
106117
sample = sanitize_sample(read1_file, read1_extension)
107118
if sample not in read_dict:
108119
read_dict[sample] = {"R1": [], "R2": []}
109120
read_dict[sample]["R1"].append(read1_file)
110121

111122
## Get read 2 files
112123
if not single_end:
113-
for read2_file in get_fastqs(read2_extension):
124+
for read2_file in get_fastqs(read2_extension, recursive):
114125
sample = sanitize_sample(read2_file, read2_extension)
115126
read_dict[sample]["R2"].append(read2_file)
116127

@@ -157,6 +168,7 @@ def main(args=None):
157168
sanitise_name=args.SANITISE_NAME,
158169
sanitise_name_delimiter=args.SANITISE_NAME_DELIMITER,
159170
sanitise_name_index=args.SANITISE_NAME_INDEX,
171+
recursive=args.RECURSIVE,
160172
)
161173

162174

conf/modules.config

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,8 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
517517
'--outSAMattributes NH HI AS NM MD',
518518
'--quantTranscriptomeBan Singleend',
519519
'--outSAMstrandField intronMotif',
520-
params.save_unaligned ? '--outReadsUnmapped Fastx' : ''
520+
params.save_unaligned ? '--outReadsUnmapped Fastx' : '',
521+
params.extra_star_align_args ?: ''
521522
].join(' ').trim()
522523
publishDir = [
523524
[
@@ -541,6 +542,7 @@ if (!params.skip_alignment && params.aligner == 'star_salmon') {
541542
}
542543

543544
withName: '.*:QUANTIFY_STAR_SALMON:SALMON_QUANT' {
545+
ext.args = params.extra_salmon_quant_args ?: ''
544546
publishDir = [
545547
path: { "${params.outdir}/${params.aligner}" },
546548
mode: params.publish_dir_mode,
@@ -1045,6 +1047,7 @@ if (!params.skip_multiqc) {
10451047
if (params.pseudo_aligner == 'salmon') {
10461048
process {
10471049
withName: '.*:QUANTIFY_SALMON:SALMON_QUANT' {
1050+
ext.args = params.extra_salmon_quant_args ?: ''
10481051
publishDir = [
10491052
path: { "${params.outdir}/${params.pseudo_aligner}" },
10501053
mode: params.publish_dir_mode,

nextflow.config

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ params {
6868
hisat2_build_memory = '200.GB' // Amount of memory required to build HISAT2 index with splice sites
6969
stringtie_ignore_gtf = false
7070
min_mapped_reads = 5
71+
extra_star_align_args = null
72+
extra_salmon_quant_args = null
7173
save_merged_fastq = false
7274
save_unaligned = false
7375
save_align_intermeds = false
@@ -79,7 +81,7 @@ params {
7981
skip_bigwig = false
8082
skip_stringtie = false
8183
skip_fastqc = false
82-
skip_preseq = false
84+
skip_preseq = true
8385
skip_dupradar = false
8486
skip_qualimap = false
8587
skip_rseqc = false

nextflow_schema.json

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,16 @@
424424
"description": "Perform reference-guided de novo assembly of transcripts using StringTie i.e. dont restrict to those in GTF file.",
425425
"fa_icon": "fas fa-ban"
426426
},
427+
"extra_star_align_args": {
428+
"type": "string",
429+
"description": "Extra arguments to pass to STAR alignment command in addition to defaults defined by the pipeline.",
430+
"fa_icon": "fas fa-plus"
431+
},
432+
"extra_salmon_quant_args": {
433+
"type": "string",
434+
"description": "Extra arguments to pass to Salmon quant command in addition to defaults defined by the pipeline.",
435+
"fa_icon": "fas fa-plus"
436+
},
427437
"save_unaligned": {
428438
"type": "boolean",
429439
"fa_icon": "fas fa-save",
@@ -485,7 +495,8 @@
485495
"skip_preseq": {
486496
"type": "boolean",
487497
"description": "Skip Preseq.",
488-
"fa_icon": "fas fa-fast-forward"
498+
"fa_icon": "fas fa-fast-forward",
499+
"default": true
489500
},
490501
"skip_dupradar": {
491502
"type": "boolean",

workflows/rnaseq.nf

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,6 @@ if (params.bam_csi_index) {
5656
}
5757
}
5858

59-
// Save AWS IGenomes file containing annotation version
60-
def anno_readme = params.genomes[ params.genome ]?.readme
61-
if (anno_readme && file(anno_readme).exists()) {
62-
file("${params.outdir}/genome/").mkdirs()
63-
file(anno_readme).copyTo("${params.outdir}/genome/")
64-
}
65-
6659
// Stage dummy file to be used as an optional input where required
6760
ch_dummy_file = file("$projectDir/assets/dummy_file.txt", checkIfExists: true)
6861

0 commit comments

Comments
 (0)