|
| 1 | +import os |
| 2 | +from datetime import datetime |
| 3 | +import time |
| 4 | +import re |
| 5 | +from shutil import copyfile |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +import re |
| 9 | + |
| 10 | +configfile: "config.json" |
| 11 | + |
| 12 | +reference = config["reference"] |
| 13 | +bed_file = config["bed_file"] |
| 14 | +out_dir = config["out_dir"] |
| 15 | +samples_path = config["samples_path"] |
| 16 | + |
| 17 | +df = pd.read_table(samples_path, sep="\t") |
| 18 | +_ = df.groupby("sample").sum() |
| 19 | +lib_delim = config["library_delimiter"] |
| 20 | + |
| 21 | +_["sample_library"] = _["sample_library"].apply(lambda x: x.split(lib_delim)[0] +"_" + "_".join(re.findall("L[0-9]+", x))) |
| 22 | + |
| 23 | +rule all: |
| 24 | + input: |
| 25 | + expand("{out_dir}/consensus_sequences/{sample}.fa", out_dir = out_dir, sample = _["sample_library"]) |
| 26 | + |
| 27 | +rule call_consensus: |
| 28 | + input: |
| 29 | + "{out_dir}/trimmed_bams/{sample}.trimmed.sorted.bam" |
| 30 | + output: |
| 31 | + "{out_dir}/consensus_sequences/{sample}.fa" |
| 32 | + shell: |
| 33 | + """ |
| 34 | + samtools mpileup -A -Q 0 -d 300000 {input} | ivar consensus -p {output} -n N -m 10 |
| 35 | + """ |
| 36 | + |
| 37 | +rule trim_reads: |
| 38 | + input: |
| 39 | + "{out_dir}/merged_aligned_bams/{sample}.sorted.bam" |
| 40 | + output: |
| 41 | + "{out_dir}/trimmed_bams/{sample}.trimmed.sorted.bam" |
| 42 | + params: |
| 43 | + bed="{bed}".format(bed = bed_file), |
| 44 | + tmp="{out_dir}/trimmed_bams/{sample}.trimmed.bam" |
| 45 | + shell: |
| 46 | + """ |
| 47 | + ivar trim -i {input} -b {params.bed} -p {params.tmp} |
| 48 | + samtools sort -T {wildcards.sample}.trim -o {output} {params.tmp} |
| 49 | + rm {params.tmp} |
| 50 | + """ |
| 51 | + |
| 52 | +rule merge_multiple_libraries: |
| 53 | + input: |
| 54 | + bams=lambda wildcards: df[df["sample"] == _[_["sample_library"] == wildcards.sample].index.values[0]]["sample_library"].apply(lambda x: os.path.join(out_dir, "aligned_bams", x +".sorted.bam")).tolist(), |
| 55 | + forward=lambda wildcards: df[df["sample"] == _[_["sample_library"] == wildcards.sample].index.values[0]]["forward"].sort_values().tolist(), |
| 56 | + reverse=lambda wildcards: df[df["sample"] == _[_["sample_library"] == wildcards.sample].index.values[0]]["reverse"].sort_values().tolist() |
| 57 | + output: |
| 58 | + bam="{out_dir}/merged_aligned_bams/{sample}.sorted.bam", |
| 59 | + fastq=expand("{{out_dir}}/merged_fastq/{{sample}}_R{readno}.fastq.gz", readno=[1,2]) |
| 60 | + params: |
| 61 | + tmp="{out_dir}/merged_aligned_bams/{sample}.bam" |
| 62 | + shell: |
| 63 | + """ |
| 64 | + samtools merge {params.tmp} {input.bams} |
| 65 | + samtools sort -T {wildcards.sample}.merge -o {output.bam} {params.tmp} |
| 66 | + rm {params.tmp} |
| 67 | + cat {input.forward} > {output.fastq[0]} |
| 68 | + cat {input.reverse} > {output.fastq[1]} |
| 69 | + """ |
| 70 | + |
| 71 | +rule align_reads: |
| 72 | + input: |
| 73 | + lambda wildcards: df[df["sample_library"]==wildcards.sample][["forward", "reverse"]].values[0].tolist() |
| 74 | + output: |
| 75 | + temp("{out_dir}/aligned_bams/{sample}.sorted.bam") |
| 76 | + params: |
| 77 | + ref= "{ref}".format(ref = reference), |
| 78 | + tmp="{out_dir}/aligned_bams/{sample}.sorted.tmp.bam" |
| 79 | + shell: |
| 80 | + """ |
| 81 | + bwa mem {params.ref} {input[0]} {input[1]} | samtools view -F 4 -Sb | samtools sort -T {wildcards.sample}.align -o {params.tmp} |
| 82 | + samtools addreplacerg -r "ID:{wildcards.sample}" -o {output} {params.tmp} |
| 83 | + rm {params.tmp} |
| 84 | + """ |
0 commit comments