Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,30 @@ from snakemake.utils import min_version
min_version("5.2.0")

configfile: "config.defaults.yml"
sample_files = snakemake.utils.listfiles(config["fastq_file_pattern"])
sample_files = snakemake.utils.listfiles(config["fastq_file_pattern"]+"/{sample}.fastq.gz")
samples = dict((y[0], x) for x, y in sample_files)
assert len(samples) > 0, "ERROR: No fastq files were found using pattern '{}' (set in configfile)".format(config["fastq_file_pattern"])
SAMPLES_ALL = glob_wildcards(config['fastq_file_pattern']+"/{sample}.fastq.gz").sample
SAMPLES_PAIRED = glob_wildcards(config['fastq_file_pattern']+"/{sample}_R1_001.fastq.gz").sample

log_dir = config["results_dir"] + "/logs"

def get_fastq(wildcards):
return samples[wildcards.sample]

if_SE = all("R2" not in name for name in samples.keys())

def get_matched_fastq(wildcards):
paired_files = [samples[i] for i in samples.keys() if ("R1" in i or "R2" in i) and wildcards.sample in i]
if len(paired_files) == 2 :
return sorted(paired_files)
else:
raise ValueError(f"Error in matched pairs {wildcards.sample}")

def get_paired_fastq(wildcards):
return get_matched_fastq(wildcards)



rule all:
input:
Expand Down
2 changes: 1 addition & 1 deletion config.defaults.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Pattern to input fastq files, use {sample} to indicate portion of filename
# that corresponds to the sample name (e.g. data/{sample}.fq.gz)
# The pipeline will automatically detect matching files and process each of them
fastq_file_pattern: "data/{sample}.fastq.gz"
fastq_file_pattern: "data"

# Output directories
# Directory to put intermediate working files (trimmed reads, bam files, etc.)
Expand Down
79 changes: 53 additions & 26 deletions rules/align.smk
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,56 @@ rule star_genome_index:
"--runThreadN {threads} "
">{log:q} 2>&1"


rule star_align:
input:
fastq=config["working_dir"] + "/trimmed/{sample}.fastq.gz",
genome_index=star_genome_dir
output:
bam=config["working_dir"] + "/star/{sample}/Aligned.sortedByCoord.out.bam",
log=config["working_dir"] + "/star/{sample}/Log.final.out"
params:
prefix=config["working_dir"] + "/star/{sample}/"
log:
log_dir + "/star/{sample}.log"
threads: 32
conda:
"../envs/star.yml"
shell:
"STAR "
"{config[params][star][extra]} "
"--runThreadN {threads} "
"--genomeDir {input.genome_index:q} "
"--readFilesIn {input.fastq:q} "
"--readFilesCommand {config[params][star][zcat_command]} "
"--outSAMtype BAM SortedByCoordinate "
"--outFileNamePrefix {params.prefix:q} "
"--outStd Log "
">{log:q} 2>&1"
if if_SE:
rule star_align_SE:
input:
fastq=config["working_dir"] + "/trimmed/{sample}.fastq.gz",
genome_index=star_genome_dir
output:
bam=config["working_dir"] + "/star/{sample}/Aligned.sortedByCoord.out.bam",
log=config["working_dir"] + "/star/{sample}/Log.final.out"
params:
prefix=config["working_dir"] + "/star/{sample}/"
log:
log_dir + "/star/{sample}.log"
threads: 32
conda:
"../envs/star.yml"
shell:
"STAR "
"{config[params][star][extra]} "
"--runThreadN {threads} "
"--genomeDir {input.genome_index:q} "
"--readFilesIn {input.fastq:q} "
"--readFilesCommand {config[params][star][zcat_command]} "
"--outSAMtype BAM SortedByCoordinate "
"--outFileNamePrefix {params.prefix:q} "
"--outStd Log "
">{log:q} 2>&1"
else:
rule star_align_PE:
input:
fastq1=config["working_dir"] + "/trimmed/{sample}_R1_paired.fastq.gz",
fastq2=config["working_dir"] + "/trimmed/{sample}_R2_paired.fastq.gz",
genome_index=star_genome_dir
output:
bam=config["working_dir"] + "/star/{sample}/Aligned.sortedByCoord.out.bam",
log=config["working_dir"] + "/star/{sample}/Log.final.out"
params:
prefix=config["working_dir"] + "/star/{sample}/"
log:
log_dir + "/star/{sample}.log"
threads: 32
conda:
"../envs/star.yml"
shell:
"STAR "
"{config[params][star][extra]} "
"--runThreadN {threads} "
"--genomeDir {input.genome_index:q} "
"--readFilesIn {input.fastq1:q} {input.fastq2:q} "
"--readFilesCommand {config[params][star][zcat_command]} "
"--outSAMtype BAM SortedByCoordinate "
"--outFileNamePrefix {params.prefix:q} "
"--outStd Log "
">{log:q} 2>&1"
2 changes: 1 addition & 1 deletion rules/count.smk
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ rule count:
rule combined_counts:
input:
expand(config["working_dir"] + "/featurecounts/{sample}_counts.tsv",
sample=samples.keys())
sample = SAMPLES_ALL if if_SE else SAMPLES_PAIRED)
output:
config["results_dir"] + "/combined_gene_counts.tsv"
run:
Expand Down
8 changes: 4 additions & 4 deletions rules/summary.smk
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ from os import path

rule multiqc:
input:
expand(log_dir + "/trimmomatic/{sample}.log", sample=samples.keys()),
expand(config["working_dir"] + "/star/{sample}/Log.final.out", sample=samples.keys()),
expand(config["working_dir"] + "/featurecounts/{sample}_counts.tsv.summary", sample=samples.keys()),
expand(config["working_dir"] + "/fastqc/{sample}_fastqc.zip", sample=samples.keys())
expand(log_dir + "/trimmomatic/{sample}.log", sample = SAMPLES_ALL if if_SE else SAMPLES_PAIRED),
expand(config["working_dir"] + "/star/{sample}/Log.final.out", sample = SAMPLES_ALL if if_SE else SAMPLES_PAIRED),
expand(config["working_dir"] + "/featurecounts/{sample}_counts.tsv.summary", sample = SAMPLES_ALL if if_SE else SAMPLES_PAIRED),
expand(config["working_dir"] + "/fastqc/{sample}_fastqc.zip", sample=SAMPLES_ALL)
output:
config["results_dir"] + "/multiqc.html"
params:
Expand Down
63 changes: 44 additions & 19 deletions rules/trim.smk
Original file line number Diff line number Diff line change
@@ -1,19 +1,44 @@
rule trimmomatic:
input:
get_fastq
output:
fastq=config["working_dir"] + "/trimmed/{sample}.fastq.gz"
params:
trimmer=" ".join(config["params"]["trimmomatic"]["trimmers"]),
extra=config["params"]["trimmomatic"]["extra"]
log:
log_dir + "/trimmomatic/{sample}.log"
threads:
32
conda:
"../envs/trimmomatic.yml"
shell:
"trimmomatic SE {params.extra} "
"{input:q} {output:q} "
"{params.trimmer} "
">{log} 2>&1"
if if_SE:
rule trimmomatic_SE:
input:
get_fastq
output:
fastq=config["working_dir"] + "/trimmed/{sample}.fastq.gz",
params:
trimmer=" ".join(config["params"]["trimmomatic"]["trimmers"]),
extra=config["params"]["trimmomatic"]["extra"]
log:
log_dir + "/trimmomatic/{sample}.log"
threads:
32
conda:
"../envs/trimmomatic.yml"
shell:
"trimmomatic SE {params.extra} "
"{input:q} {output:q} "
"{params.trimmer} "
">{log} 2>&1"

else:
rule trimmomatic_PE:
input:
get_paired_fastq
output:
paired1=config["working_dir"] + "/trimmed/{sample}_R1_paired.fastq.gz",
unpaired1=config["working_dir"] + "/trimmed/{sample}_R1_unpaired.fastq.gz",
paired2=config["working_dir"] + "/trimmed/{sample}_R2_paired.fastq.gz",
unpaired2=config["working_dir"] + "/trimmed/{sample}_R2_unpaired.fastq.gz"
params:
trimmer=" ".join(config["params"]["trimmomatic"]["trimmers"]),
extra=config["params"]["trimmomatic"]["extra"]
log:
log_dir + "/trimmomatic/{sample}.log"
threads:
32
conda:
"../envs/trimmomatic.yml"
shell:
"trimmomatic PE {params.extra} "
"{input:q} {output:q} "
"{params.trimmer} "
">{log} 2>&1"