Skip to content

Commit 0ed7e55

Browse files
committed
**Much** better PEP8 conformance.
1 parent 6bac7ea commit 0ed7e55

File tree

1 file changed

+61
-49
lines changed

1 file changed

+61
-49
lines changed

BAM_to_TCS.py

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
from math import ceil
2020

2121

22-
def TCSwriter(bamfile_name, fasta_d, minqual, mincov):
22+
def tcs_writer(bamfile_name, fasta_d, minqual, mincov):
2323
"""
2424
Convert the bamfile into the TCS format. SNP filtering is done here.
2525
The TCS writing and BAM parsing are done simultaneously.
2626
"""
2727

2828
# Set TCS file 'settings'
2929
tcsfile_name = bamfile_name[:bamfile_name.rindex(".")] + "_out.short.tcs"
30-
TCS = open(tcsfile_name, 'w')
30+
tcs = open(tcsfile_name, 'w')
3131

3232
# Set bamfile 'settings'
3333
bamfile = pysam.Samfile(bamfile_name, 'rb')
@@ -36,11 +36,11 @@ def TCSwriter(bamfile_name, fasta_d, minqual, mincov):
3636
basetrans = "ACGT*"
3737

3838
# Write TCS Header
39-
TCS.write("#TCS V1.0\n")
40-
TCS.write("#\n")
41-
TCS.write("# contig name padPos upadPos| B Q | tcov covA ")
42-
TCS.write("covC covG covT cov* | qA qC qG qT q* | S | Tags\n")
43-
TCS.write("#\n")
39+
tcs.write("#TCS V1.0\n")
40+
tcs.write("#\n")
41+
tcs.write("# contig name padPos upadPos| B Q | tcov covA ")
42+
tcs.write("covC covG covT cov* | qA qC qG qT q* | S | Tags\n")
43+
tcs.write("#\n")
4444

4545
for refs in bamfile.references:
4646
numpads = 0
@@ -104,46 +104,19 @@ def TCSwriter(bamfile_name, fasta_d, minqual, mincov):
104104
# Define padded and unpadded positions (AKA "padPos" and "upadPos")
105105
if refbase == "*":
106106
numpads += 1
107-
unpadPos = -1
107+
unpad_pos = -1
108108
else:
109-
unpadPos = position - numpads
110-
padPos = position
109+
unpad_pos = position - numpads
110+
pad_pos = position
111111

112112
# Write TCS lines #
113-
if keepline == True:
114-
# Contig name
115-
TCS_line = refs
116-
TCS_line += " " * (24 - len(refs))
117-
# padPos
118-
TCS_line += " " * (5 - len(str(padPos)))
119-
TCS_line += str(padPos)
120-
# unpadPos
121-
TCS_line += " " * (8 - len(str(unpadPos)))
122-
TCS_line += str(unpadPos)
123-
# "B" and "Q"
124-
TCS_line += " | "
125-
TCS_line += refbase
126-
TCS_line += " " * (3 - len(str(refqual)))
127-
TCS_line += str(refqual)
128-
# Coverages
129-
TCS_line += " | "
130-
TCS_line += " " * (6 - len(str(tcov)))
131-
TCS_line += str(tcov)
132-
for c in covs:
133-
TCS_line += " " * (6 - len(str(c)))
134-
TCS_line += str(c)
135-
# Qualities
136-
TCS_line += " | "
137-
for q in quals:
138-
if q == 0:
139-
q = "--"
140-
TCS_line += " " * (2 - len(str(q)))
141-
TCS_line += str(q) + " "
142-
# Discard all tags (not necessary for 4Pipe4 anyway)
143-
TCS_line += "| : |\n"
144-
TCS.write(TCS_line)
145-
146-
TCS.close()
113+
if keepline is True:
114+
tcs_line = tcs_line_composer(refs, pad_pos, unpad_pos, refbase,
115+
refqual, tcov, covs, quals)
116+
117+
tcs.write(tcs_line)
118+
119+
tcs.close()
147120
bamfile.close()
148121

149122

@@ -158,14 +131,14 @@ def covs_and_quals(bases):
158131
for i in ordered:
159132
covs.append(len(bases[i]))
160133
if len(bases[i]) > 0:
161-
quals.append(QualityCalc(bases[i]))
134+
quals.append(quality_calc(bases[i]))
162135
else:
163136
quals.append(0)
164137

165138
return covs, quals
166139

167140

168-
def QualityCalc(quals):
141+
def quality_calc(quals):
169142
"""
170143
Calculate and return individual bases qualities, just like mira does,
171144
as seen here:
@@ -193,14 +166,53 @@ def QualityCalc(quals):
193166

194167
return qual
195168

169+
def tcs_line_composer(refs, pad_pos, unpad_pos, refbase,
170+
refqual, tcov, covs, quals):
171+
"""
172+
Composes and returns a TCS line based on the data harvested from the BAM
173+
file.
174+
"""
175+
# Contig name
176+
tcs_line = refs
177+
tcs_line += " " * (24 - len(refs))
178+
# pad_pos
179+
tcs_line += " " * (5 - len(str(pad_pos)))
180+
tcs_line += str(pad_pos)
181+
# unpad_pos
182+
tcs_line += " " * (8 - len(str(unpad_pos)))
183+
tcs_line += str(unpad_pos)
184+
# "B" and "Q"
185+
tcs_line += " | "
186+
tcs_line += refbase
187+
tcs_line += " " * (3 - len(str(refqual)))
188+
tcs_line += str(refqual)
189+
# Coverages
190+
tcs_line += " | "
191+
tcs_line += " " * (6 - len(str(tcov)))
192+
tcs_line += str(tcov)
193+
for cov in covs:
194+
tcs_line += " " * (6 - len(str(cov)))
195+
tcs_line += str(cov)
196+
# Qualities
197+
tcs_line += " | "
198+
for basequal in quals:
199+
if basequal == 0:
200+
basequal = "--"
201+
tcs_line += " " * (2 - len(str(basequal)))
202+
tcs_line += str(basequal) + " "
203+
# Discard all tags (not necessary for 4Pipe4 anyway)
204+
tcs_line += "| : |\n"
205+
206+
return tcs_line
207+
196208

197209
def RunModule(bamfile_name, padded_fasta_name, minqual, mincov):
198210
"""
199211
Run the module.
200212
"""
201-
TCSwriter(bamfile_name, FASTA_parser(padded_fasta_name), minqual, mincov)
213+
tcs_writer(bamfile_name, FASTA_parser(padded_fasta_name), minqual, mincov)
202214

203215
if __name__ == "__main__":
204-
# Usage: python3 BAM_to_TCS.py file.bam file_out.padded.fasta minqual mincov
216+
# Usage: python3 BAM_to_tcs.py file.bam file_out.padded.fasta minqual mincov
205217
from sys import argv
206-
RunModule(argv[1], argv[2], argv[3], argv[4])
218+
RunModule(argv[1], argv[2], int(argv[3]), int(argv[4]))

0 commit comments

Comments
 (0)