1818from pipeutils import ASCII_to_num , FASTA_parser
1919
2020
21- def TCSwriter (bamfile_name , fasta_d ):
21+ def TCSwriter (bamfile_name , fasta_d , minqual , mincov ):
2222 """Convert the bamfile into the TCS format. The writing and the parsing
2323 are done simultaneously."""
2424
@@ -45,6 +45,7 @@ def TCSwriter(bamfile_name, fasta_d):
4545 # Define usefull variables that need to be reset
4646 bases = {"A" : [], "C" : [], "G" : [], "T" : [], "*" : []}
4747 position = pileupcolumn .pos
48+ keepline = True
4849 tcov = 0 # Workaround
4950 # Define total coverage (AKA "Tcov")
5051 # tcov = pileupcolumn.n #TODO - submit bug for wrong counting
@@ -53,16 +54,17 @@ def TCSwriter(bamfile_name, fasta_d):
5354 for pileupread in pileupcolumn .pileups :
5455 if str (pileupread ).startswith ("*" ):
5556 continue
56- if str (pileupread .alignment .seq
57- [pileupread .query_position ]) not in basetrans :
57+ if str (pileupread .alignment .
58+ seq [pileupread .query_position ]) not in basetrans :
5859 tcov += 1 # Workaround
5960 continue
6061
6162 if pileupread .is_del :
6263 bases ["*" ].append (1 )
6364
6465 else :
65- base = pileupread .alignment .seq [pileupread .query_position ].upper ()
66+ base = pileupread .alignment .seq [pileupread .
67+ query_position ].upper ()
6668 qual = pileupread .alignment .qual [pileupread .query_position ]
6769 if pileupread .alignment .is_reverse :
6870 bases [base ].append (ASCII_to_num (qual ) * - 1 )
@@ -73,6 +75,9 @@ def TCSwriter(bamfile_name, fasta_d):
7375
7476 tcov += sum (covs ) # Workaround
7577
78+ if tcov < mincov : # Discard position from TCS in this case
79+ keepline = False
80+
7681 # Define reference base (AKA "B") and qual (AKA "Q")
7782
7883 refbase = fasta_d [refs ][position ]
@@ -88,35 +93,36 @@ def TCSwriter(bamfile_name, fasta_d):
8893 padPos = position
8994
9095 # Write TCS lines #
91- # Contig name
92- TCS_line = refs
93- TCS_line += " " * (24 - len (refs ))
94- # padPos
95- TCS_line += " " * (5 - len (str (padPos )))
96- TCS_line += str (padPos )
97- # unpadPos
98- TCS_line += " " * (8 - len (str (unpadPos )))
99- TCS_line += str (unpadPos )
100- # "B" and "Q"
101- TCS_line += " | "
102- TCS_line += refbase
103- TCS_line += " " * (3 - len (str (refqual )))
104- TCS_line += str (refqual )
105- # Coverages
106- TCS_line += " | "
107- TCS_line += " " * (6 - len (str (tcov )))
108- TCS_line += str (tcov )
109- for c in covs :
110- TCS_line += " " * (6 - len (str (c )))
111- TCS_line += str (c )
112- # Qualities
113- TCS_line += " | "
114- for q in quals :
115- TCS_line += " " * (2 - len (str (q )))
116- TCS_line += str (q ) + " "
117- # Discard all tags (not necessary for 4Pipe4 anyway)
118- TCS_line += "| : |\n "
119- TCS .write (TCS_line )
96+ if keepline == True :
97+ # Contig name
98+ TCS_line = refs
99+ TCS_line += " " * (24 - len (refs ))
100+ # padPos
101+ TCS_line += " " * (5 - len (str (padPos )))
102+ TCS_line += str (padPos )
103+ # unpadPos
104+ TCS_line += " " * (8 - len (str (unpadPos )))
105+ TCS_line += str (unpadPos )
106+ # "B" and "Q"
107+ TCS_line += " | "
108+ TCS_line += refbase
109+ TCS_line += " " * (3 - len (str (refqual )))
110+ TCS_line += str (refqual )
111+ # Coverages
112+ TCS_line += " | "
113+ TCS_line += " " * (6 - len (str (tcov )))
114+ TCS_line += str (tcov )
115+ for c in covs :
116+ TCS_line += " " * (6 - len (str (c )))
117+ TCS_line += str (c )
118+ # Qualities
119+ TCS_line += " | "
120+ for q in quals :
121+ TCS_line += " " * (2 - len (str (q )))
122+ TCS_line += str (q ) + " "
123+ # Discard all tags (not necessary for 4Pipe4 anyway)
124+ TCS_line += "| : |\n "
125+ TCS .write (TCS_line )
120126
121127 TCS .close ()
122128 bamfile .close ()
0 commit comments