Skip to content

Commit dc69a05

Browse files
committed
Started implementing a filtering mechanism in the bam_reader.
1 parent 163eea6 commit dc69a05

File tree

1 file changed

+39
-33
lines changed

1 file changed

+39
-33
lines changed

BAM_to_TCS.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pipeutils import ASCII_to_num, FASTA_parser
1919

2020

21-
def TCSwriter(bamfile_name, fasta_d):
21+
def TCSwriter(bamfile_name, fasta_d, minqual, mincov):
2222
"""Convert the bamfile into the TCS format. The writing and the parsing
2323
are done simultaneously."""
2424

@@ -45,6 +45,7 @@ def TCSwriter(bamfile_name, fasta_d):
4545
# Define usefull variables that need to be reset
4646
bases = {"A": [], "C": [], "G": [], "T": [], "*": []}
4747
position = pileupcolumn.pos
48+
keepline = True
4849
tcov = 0 # Workaround
4950
# Define total coverage (AKA "Tcov")
5051
# tcov = pileupcolumn.n #TODO - submit bug for wrong counting
@@ -53,16 +54,17 @@ def TCSwriter(bamfile_name, fasta_d):
5354
for pileupread in pileupcolumn.pileups:
5455
if str(pileupread).startswith("*"):
5556
continue
56-
if str(pileupread.alignment.seq
57-
[pileupread.query_position]) not in basetrans:
57+
if str(pileupread.alignment.
58+
seq[pileupread.query_position]) not in basetrans:
5859
tcov += 1 # Workaround
5960
continue
6061

6162
if pileupread.is_del:
6263
bases["*"].append(1)
6364

6465
else:
65-
base = pileupread.alignment.seq[pileupread.query_position].upper()
66+
base = pileupread.alignment.seq[pileupread.
67+
query_position].upper()
6668
qual = pileupread.alignment.qual[pileupread.query_position]
6769
if pileupread.alignment.is_reverse:
6870
bases[base].append(ASCII_to_num(qual) * -1)
@@ -73,6 +75,9 @@ def TCSwriter(bamfile_name, fasta_d):
7375

7476
tcov += sum(covs) # Workaround
7577

78+
if tcov < mincov: # Discard position from TCS in this case
79+
keepline = False
80+
7681
# Define reference base (AKA "B") and qual (AKA "Q")
7782

7883
refbase = fasta_d[refs][position]
@@ -88,35 +93,36 @@ def TCSwriter(bamfile_name, fasta_d):
8893
padPos = position
8994

9095
# Write TCS lines #
91-
# Contig name
92-
TCS_line = refs
93-
TCS_line += " " * (24 - len(refs))
94-
# padPos
95-
TCS_line += " " * (5 - len(str(padPos)))
96-
TCS_line += str(padPos)
97-
# unpadPos
98-
TCS_line += " " * (8 - len(str(unpadPos)))
99-
TCS_line += str(unpadPos)
100-
# "B" and "Q"
101-
TCS_line += " | "
102-
TCS_line += refbase
103-
TCS_line += " " * (3 - len(str(refqual)))
104-
TCS_line += str(refqual)
105-
# Coverages
106-
TCS_line += " | "
107-
TCS_line += " " * (6 - len(str(tcov)))
108-
TCS_line += str(tcov)
109-
for c in covs:
110-
TCS_line += " " * (6 - len(str(c)))
111-
TCS_line += str(c)
112-
# Qualities
113-
TCS_line += " | "
114-
for q in quals:
115-
TCS_line += " " * (2 - len(str(q)))
116-
TCS_line += str(q) + " "
117-
# Discard all tags (not necessary for 4Pipe4 anyway)
118-
TCS_line += "| : |\n"
119-
TCS.write(TCS_line)
96+
if keepline == True:
97+
# Contig name
98+
TCS_line = refs
99+
TCS_line += " " * (24 - len(refs))
100+
# padPos
101+
TCS_line += " " * (5 - len(str(padPos)))
102+
TCS_line += str(padPos)
103+
# unpadPos
104+
TCS_line += " " * (8 - len(str(unpadPos)))
105+
TCS_line += str(unpadPos)
106+
# "B" and "Q"
107+
TCS_line += " | "
108+
TCS_line += refbase
109+
TCS_line += " " * (3 - len(str(refqual)))
110+
TCS_line += str(refqual)
111+
# Coverages
112+
TCS_line += " | "
113+
TCS_line += " " * (6 - len(str(tcov)))
114+
TCS_line += str(tcov)
115+
for c in covs:
116+
TCS_line += " " * (6 - len(str(c)))
117+
TCS_line += str(c)
118+
# Qualities
119+
TCS_line += " | "
120+
for q in quals:
121+
TCS_line += " " * (2 - len(str(q)))
122+
TCS_line += str(q) + " "
123+
# Discard all tags (not necessary for 4Pipe4 anyway)
124+
TCS_line += "| : |\n"
125+
TCS.write(TCS_line)
120126

121127
TCS.close()
122128
bamfile.close()

0 commit comments

Comments
 (0)