Skip to content

Commit 80d4ec3

Browse files
committed
Added pipeline to call viral consensus.
1 parent 9afcfe3 commit 80d4ec3

File tree

3 files changed

+101
-0
lines changed

3 files changed

+101
-0
lines changed

pipeline_consensus/Snakefile

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import os
2+
from datetime import datetime
3+
import time
4+
import re
5+
from shutil import copyfile
6+
7+
import pandas as pd
8+
import re
9+
10+
configfile: "config.json"
11+
12+
reference = config["reference"]
13+
bed_file = config["bed_file"]
14+
out_dir = config["out_dir"]
15+
samples_path = config["samples_path"]
16+
17+
df = pd.read_table(samples_path, sep="\t")
18+
_ = df.groupby("sample").sum()
19+
lib_delim = config["library_delimiter"]
20+
21+
_["sample_library"] = _["sample_library"].apply(lambda x: x.split(lib_delim)[0] +"_" + "_".join(re.findall("L[0-9]+", x)))
22+
23+
rule all:
24+
input:
25+
expand("{out_dir}/consensus_sequences/{sample}.fa", out_dir = out_dir, sample = _["sample_library"])
26+
27+
rule call_consensus:
28+
input:
29+
"{out_dir}/trimmed_bams/{sample}.trimmed.sorted.bam"
30+
output:
31+
"{out_dir}/consensus_sequences/{sample}.fa"
32+
shell:
33+
"""
34+
samtools mpileup -A -Q 0 -d 300000 {input} | ivar consensus -p {output} -n N -m 10
35+
"""
36+
37+
rule trim_reads:
38+
input:
39+
"{out_dir}/merged_aligned_bams/{sample}.sorted.bam"
40+
output:
41+
"{out_dir}/trimmed_bams/{sample}.trimmed.sorted.bam"
42+
params:
43+
bed="{bed}".format(bed = bed_file),
44+
tmp="{out_dir}/trimmed_bams/{sample}.trimmed.bam"
45+
shell:
46+
"""
47+
ivar trim -i {input} -b {params.bed} -p {params.tmp}
48+
samtools sort -T {wildcards.sample}.trim -o {output} {params.tmp}
49+
rm {params.tmp}
50+
"""
51+
52+
rule merge_multiple_libraries:
53+
input:
54+
bams=lambda wildcards: df[df["sample"] == _[_["sample_library"] == wildcards.sample].index.values[0]]["sample_library"].apply(lambda x: os.path.join(out_dir, "aligned_bams", x +".sorted.bam")).tolist(),
55+
forward=lambda wildcards: df[df["sample"] == _[_["sample_library"] == wildcards.sample].index.values[0]]["forward"].sort_values().tolist(),
56+
reverse=lambda wildcards: df[df["sample"] == _[_["sample_library"] == wildcards.sample].index.values[0]]["reverse"].sort_values().tolist()
57+
output:
58+
bam="{out_dir}/merged_aligned_bams/{sample}.sorted.bam",
59+
fastq=expand("{{out_dir}}/merged_fastq/{{sample}}_R{readno}.fastq.gz", readno=[1,2])
60+
params:
61+
tmp="{out_dir}/merged_aligned_bams/{sample}.bam"
62+
shell:
63+
"""
64+
samtools merge {params.tmp} {input.bams}
65+
samtools sort -T {wildcards.sample}.merge -o {output.bam} {params.tmp}
66+
rm {params.tmp}
67+
cat {input.forward} > {output.fastq[0]}
68+
cat {input.reverse} > {output.fastq[1]}
69+
"""
70+
71+
rule align_reads:
72+
input:
73+
lambda wildcards: df[df["sample_library"]==wildcards.sample][["forward", "reverse"]].values[0].tolist()
74+
output:
75+
temp("{out_dir}/aligned_bams/{sample}.sorted.bam")
76+
params:
77+
ref= "{ref}".format(ref = reference),
78+
tmp="{out_dir}/aligned_bams/{sample}.sorted.tmp.bam"
79+
shell:
80+
"""
81+
bwa mem {params.ref} {input[0]} {input[1]} | samtools view -F 4 -Sb | samtools sort -T {wildcards.sample}.align -o {params.tmp}
82+
samtools addreplacerg -r "ID:{wildcards.sample}" -o {output} {params.tmp}
83+
rm {params.tmp}
84+
"""

pipeline_consensus/config.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"reference": "/path/to/bwa/ref",
3+
"bed_file": "/path/to/primer/bed/file",
4+
"out_dir": "/path/to/outdir",
5+
"samples_path": "example_samples.tsv",
6+
"library_delimiter": "_"
7+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
forward reverse sample sample_library
2+
Z025_L1_R1.fastq.gz Z025_L1_R2.fastq.gz Z025 Z025_L1
3+
Z019_L1_R1.fastq.gz Z019_L1_R2.fastq.gz Z019 Z019_L1
4+
Z019_L2_R1.fastq.gz Z019_L2_R2.fastq.gz Z019 Z019_L2
5+
Z019_L3_R1.fastq.gz Z019_L3_R2.fastq.gz Z019 Z019_L3
6+
Z023_L1_R1.fastq.gz Z023_L1_R2.fastq.gz Z023 Z023_L1
7+
Z023_L2_R1.fastq.gz Z023_L2_R2.fastq.gz Z023 Z023_L2
8+
Z023_L3_R1.fastq.gz Z023_L3_R2.fastq.gz Z023 Z023_L3
9+
Z829_L1_R1.fastq.gz Z829_L1_R2.fastq.gz Z829 Z829_L1
10+
Z829_L2_R1.fastq.gz Z829_L2_R2.fastq.gz Z829 Z829_L2

0 commit comments

Comments
 (0)