1919from math import ceil
2020
2121
22- def TCSwriter (bamfile_name , fasta_d , minqual , mincov ):
22+ def tcs_writer (bamfile_name , fasta_d , minqual , mincov ):
2323 """
2424 Convert the bamfile into the TCS format. SNP filtering is done here.
2525 The TCS writing and BAM parsing are done simultaneously.
2626 """
2727
2828 # Set TCS file 'settings'
2929 tcsfile_name = bamfile_name [:bamfile_name .rindex ("." )] + "_out.short.tcs"
30- TCS = open (tcsfile_name , 'w' )
30+ tcs = open (tcsfile_name , 'w' )
3131
3232 # Set bamfile 'settings'
3333 bamfile = pysam .Samfile (bamfile_name , 'rb' )
@@ -36,11 +36,11 @@ def TCSwriter(bamfile_name, fasta_d, minqual, mincov):
3636 basetrans = "ACGT*"
3737
3838 # Write TCS Header
39- TCS .write ("#TCS V1.0\n " )
40- TCS .write ("#\n " )
41- TCS .write ("# contig name padPos upadPos| B Q | tcov covA " )
42- TCS .write ("covC covG covT cov* | qA qC qG qT q* | S | Tags\n " )
43- TCS .write ("#\n " )
39+ tcs .write ("#TCS V1.0\n " )
40+ tcs .write ("#\n " )
41+ tcs .write ("# contig name padPos upadPos| B Q | tcov covA " )
42+ tcs .write ("covC covG covT cov* | qA qC qG qT q* | S | Tags\n " )
43+ tcs .write ("#\n " )
4444
4545 for refs in bamfile .references :
4646 numpads = 0
@@ -104,46 +104,19 @@ def TCSwriter(bamfile_name, fasta_d, minqual, mincov):
104104 # Define padded and unpadded positions (AKA "padPos" and "upadPos")
105105 if refbase == "*" :
106106 numpads += 1
107- unpadPos = - 1
107+ unpad_pos = - 1
108108 else :
109- unpadPos = position - numpads
110- padPos = position
109+ unpad_pos = position - numpads
110+ pad_pos = position
111111
112112 # Write TCS lines #
113- if keepline == True :
114- # Contig name
115- TCS_line = refs
116- TCS_line += " " * (24 - len (refs ))
117- # padPos
118- TCS_line += " " * (5 - len (str (padPos )))
119- TCS_line += str (padPos )
120- # unpadPos
121- TCS_line += " " * (8 - len (str (unpadPos )))
122- TCS_line += str (unpadPos )
123- # "B" and "Q"
124- TCS_line += " | "
125- TCS_line += refbase
126- TCS_line += " " * (3 - len (str (refqual )))
127- TCS_line += str (refqual )
128- # Coverages
129- TCS_line += " | "
130- TCS_line += " " * (6 - len (str (tcov )))
131- TCS_line += str (tcov )
132- for c in covs :
133- TCS_line += " " * (6 - len (str (c )))
134- TCS_line += str (c )
135- # Qualities
136- TCS_line += " | "
137- for q in quals :
138- if q == 0 :
139- q = "--"
140- TCS_line += " " * (2 - len (str (q )))
141- TCS_line += str (q ) + " "
142- # Discard all tags (not necessary for 4Pipe4 anyway)
143- TCS_line += "| : |\n "
144- TCS .write (TCS_line )
145-
146- TCS .close ()
113+ if keepline is True :
114+ tcs_line = tcs_line_composer (refs , pad_pos , unpad_pos , refbase ,
115+ refqual , tcov , covs , quals )
116+
117+ tcs .write (tcs_line )
118+
119+ tcs .close ()
147120 bamfile .close ()
148121
149122
@@ -158,14 +131,14 @@ def covs_and_quals(bases):
158131 for i in ordered :
159132 covs .append (len (bases [i ]))
160133 if len (bases [i ]) > 0 :
161- quals .append (QualityCalc (bases [i ]))
134+ quals .append (quality_calc (bases [i ]))
162135 else :
163136 quals .append (0 )
164137
165138 return covs , quals
166139
167140
168- def QualityCalc (quals ):
141+ def quality_calc (quals ):
169142 """
170143 Calculate and return individual bases qualities, just like mira does,
171144 as seen here:
@@ -193,14 +166,53 @@ def QualityCalc(quals):
193166
194167 return qual
195168
169+ def tcs_line_composer (refs , pad_pos , unpad_pos , refbase ,
170+ refqual , tcov , covs , quals ):
171+ """
172+ Composes and returns a TCS line based on the data harvested from the BAM
173+ file.
174+ """
175+ # Contig name
176+ tcs_line = refs
177+ tcs_line += " " * (24 - len (refs ))
178+ # pad_pos
179+ tcs_line += " " * (5 - len (str (pad_pos )))
180+ tcs_line += str (pad_pos )
181+ # unpad_pos
182+ tcs_line += " " * (8 - len (str (unpad_pos )))
183+ tcs_line += str (unpad_pos )
184+ # "B" and "Q"
185+ tcs_line += " | "
186+ tcs_line += refbase
187+ tcs_line += " " * (3 - len (str (refqual )))
188+ tcs_line += str (refqual )
189+ # Coverages
190+ tcs_line += " | "
191+ tcs_line += " " * (6 - len (str (tcov )))
192+ tcs_line += str (tcov )
193+ for cov in covs :
194+ tcs_line += " " * (6 - len (str (cov )))
195+ tcs_line += str (cov )
196+ # Qualities
197+ tcs_line += " | "
198+ for basequal in quals :
199+ if basequal == 0 :
200+ basequal = "--"
201+ tcs_line += " " * (2 - len (str (basequal )))
202+ tcs_line += str (basequal ) + " "
203+ # Discard all tags (not necessary for 4Pipe4 anyway)
204+ tcs_line += "| : |\n "
205+
206+ return tcs_line
207+
196208
197209def RunModule (bamfile_name , padded_fasta_name , minqual , mincov ):
198210 """
199211 Run the module.
200212 """
201- TCSwriter (bamfile_name , FASTA_parser (padded_fasta_name ), minqual , mincov )
213+ tcs_writer (bamfile_name , FASTA_parser (padded_fasta_name ), minqual , mincov )
202214
203215if __name__ == "__main__" :
204- # Usage: python3 BAM_to_TCS .py file.bam file_out.padded.fasta minqual mincov
216+ # Usage: python3 BAM_to_tcs .py file.bam file_out.padded.fasta minqual mincov
205217 from sys import argv
206- RunModule (argv [1 ], argv [2 ], argv [3 ], argv [4 ])
218+ RunModule (argv [1 ], argv [2 ], int ( argv [3 ]), int ( argv [4 ]) )
0 commit comments