-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinc_fac.py
More file actions
72 lines (63 loc) · 2.62 KB
/
inc_fac.py
File metadata and controls
72 lines (63 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gzip
import subprocess as sp
import os
from helpers import find_mean, timestamp
def inc_factor(input, db_path, input_path, out_path, result_path, err_file):
try:
n, frag = input
input_file = f'{input_path}/{n}'
out_file = f"{out_path}/{n}.inc.blast.out"
db_file = f"{db_path}/inc-types.fasta"
cmd = [
'blastn',
'-query', db_file,
'-subject', input_file,
'-num_threads', '1',
'-perc_identity', '90',
'-culling_limit', '1',
'-outfmt', '6 qseqid sstart send sstrand pident qcovs bitscore',
'-out', out_file,
]
proc = sp.run(
cmd,
stdout=sp.PIPE,
stderr=sp.PIPE,
universal_newlines=True
)
bitscore_mean, coverage_mean, length_mean, count = 0, 0, 0, 0
if(proc.returncode == 0 and not os.stat(out_file).st_size == 0):
bitscores = []
coverages = []
lengths = []
hit_poses = []
count = 0
with out_file.open() as fh:
for line in fh:
cols = line.rstrip().split('\t')
start = int(cols[1])
end = int(cols[2])
length = abs(end - start) + 1
strand = '+' if cols[3] == 'plus' else '-',
coverage = float(cols[5]) / 100
bitscore = float(cols[6])
hit_pos = end if strand == '+' else start
if (hit_pos in hit_poses):
former_hit = hit_poses.index(hit_pos)
if (bitscores[former_hit] < bitscore):
bitscores[former_hit] = bitscore
coverages[former_hit] = coverage
lengths[former_hit] = length
count+=1
else:
bitscores.append(bitscore)
coverages.append(coverage)
lengths.append(length)
count+=1
bitscore_mean = find_mean(bitscores)
coverage_mean = find_mean(coverages)
length_mean = find_mean(lengths)
with gzip.open(f"{result_path}/{n}.gz", mode='wt') as fout:
fout.write(f"{bitscore_mean}\t{coverage_mean}\t{length_mean}\t{count}\n")
except Exception as err:
with open(err_file, 'a') as fout:
fout.write(f"{timestamp()} Error calculating inc. factor for file:{n} {err}\n")