@@ -29,13 +29,13 @@ rule build_insertion_reference:
2929 log :
3030 log = f"{ outdir } /intermediate/log/detection/build_insertion_reference/out.log"
3131 output :
32- temp ( f"{ outdir } /intermediate/mapping/insertion_ref_genome.fa" )
32+ f"{ outdir } /intermediate/mapping/insertion_ref_genome.fa"
3333 conda :
3434 "../envs/VIS_dummy_env.yml"
3535 shell :
3636 """
3737 (
38- cat {input.ref} {input.insertion} > {output}
38+ cat {input.ref} {input.insertion} | awk 'NF' > {output}
3939 ) > {log.log} 2>&1
4040 """
4141
@@ -60,19 +60,61 @@ rule minimap_index:
6060
6161rule make_fasta_without_tags : #fasta of raw data no trimming whatsoever
6262 input :
63- fq = lambda wildcards : config ["samples" ][wildcards .sample ]
63+ bam = lambda wildcards : config ["samples" ][wildcards .sample ]
6464 log :
6565 log = f"{ outdir } /intermediate/log/detection/make_fasta_without_tags/{{sample}}.log"
6666 output :
6767 fasta = f"{ outdir } /intermediate/fasta/Full_{{sample}}.fa"
6868 conda :
6969 "../envs/VIS_samtools_env.yml"
7070 shell :
71+ """
72+ (
73+ samtools fasta {input.bam} -o {output.fasta} > {output.fasta}
74+ ) > {log.log} 2>&1
75+ """
76+
77+ ######
78+ ######
79+ ###### Only use reads with insertions for speed
80+ ######
81+ ######
82+
83+ rule insertion_reads :
84+ input :
85+ bam = lambda wildcards : config ["samples" ][wildcards .sample ],
86+ readnames = f"{ outdir } /intermediate/blastn/Readnames_{ fragmentsize } _InsertionMatches_{{sample}}.txt"
87+ log :
88+ log = f"{ outdir } /intermediate/log/detection/insertion_reads_cmod/{{sample}}.log"
89+ output :
90+ isobam = f"{ outdir } /intermediate/mapping/Isolated_Reads_{{sample}}.bam"
91+ conda :
92+ "../envs/VIS_samtools_env.yml"
93+ shell :
94+ """
95+ (
96+ samtools view -b -N {input.readnames} {input.bam} | samtools sort > {output.isobam}
97+ samtools index {output.isobam}
98+ ) > {log.log} 2>&1
99+ """
100+
101+ rule fasta_insertion_reads :
102+ input :
103+ f"{ outdir } /intermediate/mapping/Isolated_Reads_{{sample}}.bam"
104+ log :
105+ log = f"{ outdir } /intermediate/log/detection/fasta_insertion_reads_cmod/{{sample}}.log"
106+ output :
107+ f"{ outdir } /intermediate/fasta/Isolated_Reads_{{sample}}.fa"
108+ conda :
109+ "../envs/VIS_samtools_env.yml"
110+ shell :
71111 """
72112 (
73113 samtools fasta {input} -o {output} > {output}
74114 ) > {log.log} 2>&1
75115 """
116+
117+
76118######
77119######
78120###### "Clean" BAM: Cut-out fasta to BAM via Mapping to reference
@@ -90,19 +132,21 @@ rule Non_insertion_mapping: #mapping against the unaltered referenc egenome
90132 log = f"{ outdir } /intermediate/log/detection/Non_insertion_mapping/{{sample}}.log"
91133 resources :
92134 mem_mb = 5000
135+ threads : config ["threads" ]
93136 conda :
94137 "../envs/VIS_minimap_env.yml"
95138 shell : #N=0 instead of default N=1
96139 """
97140 (
98- minimap2 -y -ax map-ont --score-N 0 {input.genome} {input.fasta} | samtools sort | samtools view -F 2304 -o {output}
141+ minimap2 -t {threads} - y -ax map-ont --score-N 0 {input.genome} {input.fasta} | samtools sort | samtools view -F 2304 -o {output}
99142 samtools index {output}
100143 ) > {log.log} 2>&1
101144 """
102145
103146rule insertion_mapping : #conserves tags!
104147 input :
105- bam = lambda wildcards : config ["samples" ][wildcards .sample ],
148+ #bam=lambda wildcards: config["samples"][wildcards.sample], #full data
149+ bam = f"{ outdir } /intermediate/mapping/Isolated_Reads_{{sample}}.bam" , #only reads with insertions
106150 minimapref = f"{ outdir } /intermediate/mapping/ref_genome_index.mmi" ,
107151 ref = f"{ outdir } /intermediate/mapping/insertion_ref_genome.fa"
108152 output :
@@ -111,12 +155,13 @@ rule insertion_mapping: #conserves tags!
111155 log = f"{ outdir } /intermediate/log/detection/insertion_mapping/{{sample}}.log"
112156 resources :
113157 mem_mb = 5000
158+ threads : config ["threads" ]
114159 conda :
115160 "../envs/VIS_minimap_env.yml"
116161 shell :
117162 """
118163 (
119- samtools bam2fq -T '*' {input.bam}| minimap2 -y -ax map-ont {input.minimapref} - | samtools sort | samtools view -F 2304 -o {output}
164+ samtools bam2fq -T '*' {input.bam}| minimap2 -t {threads} - y -ax map-ont {input.minimapref} - | samtools sort | samtools view -F 2304 -o {output}
120165 samtools index {output}
121166 ) > {log.log} 2>&1
122167 """
@@ -196,7 +241,8 @@ rule get_coordinates_for_fasta: #filters and combines matches
196241rule split_fasta :
197242 input :
198243 breakpoints = f"{ outdir } /intermediate/blastn/Coordinates_{ fragmentsize } _InsertionMatches_{{sample}}.blastn" ,
199- fasta = f"{ outdir } /intermediate/fasta/Full_{{sample}}.fa"
244+ #fasta=f"{outdir}/intermediate/fasta/Full_{{sample}}.fa"
245+ fasta = f"{ outdir } /intermediate/fasta/Isolated_Reads_{{sample}}.fa"
200246 params :
201247 mode = config ["splitmode" ] #if each split fasta substring should be used individually, use "Separated" Join, New mode: Buffer
202248 log :
@@ -287,6 +333,8 @@ rule find_insertion_BLASTn:
287333 log = f"{ outdir } /intermediate/log/detection/find_insertion_BLASTn/{{sample}}.log"
288334 output :
289335 temp (f"{ outdir } /intermediate/blastn/{ fragmentsize } _InsertionMatches_{{sample}}.blastn" )
336+ threads :
337+ config ["threads" ]
290338 conda :
291339 "../envs/VIS_blastn_env.yml"
292340 shell :
@@ -295,6 +343,7 @@ rule find_insertion_BLASTn:
295343 mkdir {params.tempdir}
296344
297345 blastn \
346+ -num_threads {threads} \
298347 -query {input.fasta} \
299348 -db {input.insertion} \
300349 -out {params.tempdir}/temp_output.blastn \
@@ -315,6 +364,8 @@ rule find_insertion_BLASTn_in_Ref:
315364 fasta = f"{ outdir } /intermediate/fasta/fragments/{ fragmentsize } _Insertion_fragments.fa"
316365 params :
317366 refdb = config .get ("blastn_db" , "" ) # Optional blastn database path
367+ threads :
368+ config ["threads" ]
318369 log :
319370 log = f"{ outdir } /intermediate/log/detection/find_insertion_BLASTn_in_Ref/{{sample}}.log"
320371 output :
@@ -330,6 +381,7 @@ rule find_insertion_BLASTn_in_Ref:
330381 else
331382 # If blastn_db is provided, run the blastn command
332383 blastn \
384+ -num_threads {threads} \
333385 -query {input.fasta} \
334386 -db {params.refdb} \
335387 -out {output} \
@@ -380,13 +432,12 @@ rule extract_by_length:
380432 ) > {log.log} 2>&1
381433 """
382434
435+
383436######
384437######
385438###### Visualisation of insertions
386439######
387440
388-
389-
390441rule basic_insertion_plots :
391442 input :
392443 expand (f"{ outdir } /intermediate/localization/ExactInsertions_{{sample}}.bed" , sample = SAMPLES )
0 commit comments