66
77from argparse import ArgumentDefaultsHelpFormatter , ArgumentParser , Namespace
88import matplotlib .pyplot as plt
9+ import matplotlib
10+ matplotlib .use ('Agg' )
911import read5_ont as r5
1012from os .path import splitext , join
1113import numpy as np
@@ -39,15 +41,14 @@ def readDynamont(file: str, readid : str) -> list:
3941 continue
4042 try :
4143 _ , _ , start , end , _ , base , motif , * _ = line .strip ().split (',' )
44+ segments .append ((int (start ), int (end ), base , motif ))
4245 except ValueError : # empty line in file
4346 pass
44- segments .append ((int (start ), int (end ), base , motif ))
4547 return segments
4648
47- def readUncalled4 (file : str , readid : str , kmerSize : int = 5 ) -> list :
49+ def readUncalled4 (file : str , readid : str ) -> list :
4850 print ("Reading Uncalled4 output from " + file )
4951 segments = []
50- HALFKMERSIZE = kmerSize // 2
5152 with open (file , 'r' ) as f :
5253 next (f ) # skip header
5354 # seq.name seq.pos seq.strand aln.id seq.kmer aln.read_id dtw.start dtw.length dtw.current dtw.current_sd
@@ -57,7 +58,7 @@ def readUncalled4(file : str, readid : str, kmerSize : int = 5) -> list:
5758 continue
5859 if start == "*" :
5960 continue
60- segments .append ((int (start ), int (start ) + int (length ), kmer [HALFKMERSIZE ], kmer ))
61+ segments .append ((int (start ), int (start ) + int (length ), kmer [len ( kmer ) // 2 ], kmer ))
6162 return segments
6263
6364def readF5CResquiggle (file : str , readid : str , read : str , kmerSize : int = 5 ) -> list :
@@ -73,13 +74,12 @@ def readF5CResquiggle(file : str, readid : str, read : str, kmerSize : int = 5)
7374 _ , kmer_idx , start , end = line .strip ().split ('\t ' )
7475 if start == '.' or end == '.' :
7576 continue
76- segments .append ((int (start ), int (end ), read [int (kmer_idx ) + HALFKMERSIZE ], read [int (kmer_idx ) + kmerSize ]))
77+ segments .append ((int (start ), int (end ), read [int (kmer_idx ) + HALFKMERSIZE ], read [int (kmer_idx ): int ( kmer_idx ) + kmerSize ]))
7778 return segments
7879
79- def readF5CEventalign (file : str , summary : str , readid : str , kmerSize : int = 5 ) -> list :
80+ def readF5CEventalign (file : str , summary : str , readid : str ) -> list :
8081 print ("Reading f5c Eventalign output from " + file )
8182 readNum = None
82- HALFKMERSIZE = kmerSize // 2
8383 with open (summary , 'r' ) as f :
8484 # read_index read_name fast5_path model_name strand num_events num_steps num_skips num_stays total_duration shift scale drift var
8585 for line in f :
@@ -94,10 +94,10 @@ def readF5CEventalign(file: str, summary : str, readid : str, kmerSize : int = 5
9494 next (f ) # skip header
9595 # contig position reference_kmer read_index strand event_index event_level_mean event_stdv event_length model_kmer model_mean model_stdv standardized_level start_idx end_idx
9696 for line in f :
97- _ , _ , _ , rNum , _ , _ , _ , _ , _ , modelKmer , _ , _ , _ , start , end = line .strip ().split ('\t ' )
97+ _ , _ , _ , rNum , _ , _ , _ , _ , _ , kmer , _ , _ , _ , start , end = line .strip ().split ('\t ' )
9898 if not rNum == readNum :
9999 continue
100- segments .append ((int (start ), int (end ), modelKmer [ HALFKMERSIZE ], modelKmer ))
100+ segments .append ((int (start ), int (end ), kmer [ len ( kmer ) // 2 ], kmer ))
101101 return segments
102102
103103def readDorado (file : str , readid : str ) -> list :
@@ -113,28 +113,22 @@ def readDorado(file : str, readid : str) -> list:
113113 segments .append ((int (start ), int (end ), base , motif ))
114114 return segments
115115
116- def readBasecalls (file : str , readid : str ) -> str :
117- import pysam
118- with pysam . AlignmentFile (file , "rb" , check_sq = False ) as bamfile :
116+ def readBasecalls (file : str , readid : str ) -> str :
117+ from pysam import AlignmentFile
118+ with AlignmentFile (file , "rb" , check_sq = False ) as bamfile :
119119 for read in bamfile .fetch (until_eof = True ):
120- if not readid == read .query_name :
121- continue
122- return read . query_sequence
120+ if readid == read .query_name :
121+ return read . query_sequence
122+ return None
123123
124124def main () -> None :
125125 basecolors = {
126- 'A' :'#377eb8' ,
127- 'a' :'#377eb8' ,
128- 'C' :'#ff7f00' ,
129- 'c' :'#ff7f00' ,
130- 'G' :'#4daf4a' ,
131- 'g' :'#4daf4a' ,
132- 'T' :'#f781bf' ,
133- 't' :'#f781bf' ,
134- 'U' :'#f781bf' ,
135- 'u' :'#f781bf' ,
136- 'N' :'#ffffff' ,
137- 'n' :'#ffffff' ,
126+ 'A' :'#377eb8' , 'a' :'#377eb8' ,
127+ 'C' :'#ff7f00' , 'c' :'#ff7f00' ,
128+ 'G' :'#4daf4a' , 'g' :'#4daf4a' ,
129+ 'T' :'#f781bf' , 't' :'#f781bf' ,
130+ 'U' :'#f781bf' , 'u' :'#f781bf' ,
131+ 'N' :'#ffffff' , 'n' :'#ffffff' ,
138132 }
139133
140134 args = parse ()
@@ -144,7 +138,7 @@ def main() -> None:
144138 tools = {
145139 "Dynamont" : np .array (readDynamont (args .dynamont , args .readid )),
146140 "Uncalled4" : np .array (readUncalled4 (args .uncalled4 , args .readid )),
147- "f5c Resquiggle" : np .array (readF5CResquiggle (args .f5cresquiggle , args .readid , read , args .k // 2 )),
141+ "f5c Resquiggle" : np .array (readF5CResquiggle (args .f5cresquiggle , args .readid , read , args .k )),
148142 "f5c Eventalign" : np .array (readF5CEventalign (args .f5ceventalign , splitext (args .f5ceventalign )[0 ] + '.sum' , args .readid )),
149143 # "Tombo" : readTombo(args.tombo, args.readid),
150144 "Dorado" : np .array (readDorado (args .dorado , args .readid )),
@@ -159,20 +153,16 @@ def main() -> None:
159153 fig .supxlabel ("Rel. Time (Sequencing Data Points)" )
160154
161155 for i , (tool , segments ) in enumerate (tools .items ()):
162- segments = np .unique (segments , axis = 1 )
163- # segments = segments[np.argsort(segments[:, 0].astype(int))]
156+ segments = np .unique (segments , axis = 0 )
164157 ax [i ].plot (signal , linewidth = 1.0 , c = 'black' )
165158 ax [i ].set_xlim ((0 , len (signal )))
166- # ax[i].vlines(segments[:, 0], ymin=min(signal), ymax=max(signal), linewidth=0.5, linestyle='--', colors='red')
167159 ax [i ].set_title (tool )
168- # ax[i].legend()
169160 ax [i ].set_xticks (np .arange (0 , len (signal ), 1000 ))
170161
171162 for s in segments :
172163 start = int (s [0 ])
173164 end = int (s [1 ])
174165 base = str (s [2 ])
175- # ax[i].text(start + (end - start)/2 - 6, -3.5, base, fontdict={'size' : 6, 'color':'black'}) # rotation=90
176166 ax [i ].vlines ([start , end ], ymin = min (signal ), ymax = max (signal ), colors = basecolors [base ], linestyles = '--' , linewidth = 0.7 )
177167 ax [i ].add_patch (Rectangle ((start , min (signal )), end - start , max (signal )- min (signal ), alpha = 0.4 , edgecolor = basecolors [base ], facecolor = basecolors [base ]))
178168
@@ -182,32 +172,35 @@ def main() -> None:
182172 plt .savefig (join (outpath , args .readid + "_tool_segmentation.png" ), dpi = 300 )
183173 plt .close ()
184174
185- # fig, ax = plt.subplots(nrows = 5, figsize=(20,10), dpi=300)
186- # fig.suptitle(f"Segmentation of {args.readid} by Different Tools", fontsize=20)
187- # fig.supylabel("Current (pA)", fontsize=18)
188- # fig.supxlabel("Rel. Time (Sequencing Data Points)", fontsize=18)
189-
190- # for i, (tool, segments) in enumerate(tools.items()):
191- # segments = np.unique(segments, axis=1)
192- # # segments = segments[np.argsort(segments[:, 0].astype(int))]
193- # ax[i].plot(signal, linewidth=1.0, c='black')
194- # # ax[i].vlines(segments[:, 0], ymin=min(signal), ymax=max(signal), linewidth=0.5, linestyle='--', colors='red')
195- # ax[i].set_title(tool, fontsize=18)
196- # # ax[i].legend()
197-
198- # for s in segments:
199- # start = int(s[0])
200- # end = int(s[1])
201- # base = str(s[2])
202- # # ax[i].text(start + (end - start)/2 - 6, -3.5, base, fontdict={'size' : 6, 'color':'black'}) # rotation=90
203- # ax[i].vlines([start], ymin=min(signal), ymax=max(signal), colors=basecolors[base], linestyles='--', linewidth=0.7)
204- # ax[i].add_patch(Rectangle((start, min(signal)), end - start, max(signal)-min(signal), alpha=0.4, edgecolor=basecolors[base], facecolor=basecolors[base]))
205-
206- # ax[i].set_xticks(np.arange(0, len(signal), 500))
207- # ax[i].set_xlim((19000, 22000))
208-
209- # plt.tight_layout()
210- # plt.savefig(join(outpath, args.readid + "_tool_segmentation_region.svg"), dpi=300)
175+ print ("Plotted: " + args .readid + "_tool_segmentation.svg" )
176+
177+ fig , ax = plt .subplots (nrows = 5 , figsize = (10 ,10 ), dpi = 300 )
178+ fig .suptitle (f"Segmentation of { args .readid } by Different Tools" , fontsize = 20 )
179+ fig .supylabel ("Current (pA)" , fontsize = 18 )
180+ fig .supxlabel ("Rel. Time (Sequencing Data Points)" , fontsize = 18 )
181+
182+ for i , (tool , segments ) in enumerate (tools .items ()):
183+ segments = np .unique (segments , axis = 0 )
184+ ax [i ].plot (signal , linewidth = 1.0 , c = 'black' )
185+ ax [i ].set_title (tool , fontsize = 18 )
186+
187+ for s in segments :
188+ start = int (s [0 ])
189+ end = int (s [1 ])
190+ base = str (s [2 ])
191+ ax [i ].vlines ([start ], ymin = min (signal ), ymax = max (signal ), colors = basecolors [base ], linestyles = '--' , linewidth = 0.7 )
192+ ax [i ].add_patch (Rectangle ((start , min (signal )), end - start , max (signal )- min (signal ), alpha = 0.4 , edgecolor = basecolors [base ], facecolor = basecolors [base ]))
193+
194+ ax [i ].set_xticks (np .arange (0 , len (signal ), 500 ))
195+ # ax[i].set_xlim((19000, 22000)) #! for c28
196+ ax [i ].set_xlim ((7400 , 8000 ))
197+
198+ plt .tight_layout ()
199+ plt .savefig (join (outpath , args .readid + "_tool_segmentation_region.svg" ), dpi = 300 )
200+ plt .savefig (join (outpath , args .readid + "_tool_segmentation_region.pdf" ), dpi = 300 )
201+ plt .savefig (join (outpath , args .readid + "_tool_segmentation_region.png" ), dpi = 300 )
202+
203+ print ("Plotted: " + args .readid + "_tool_segmentation_region.svg" )
211204
212205if __name__ == '__main__' :
213206 main ()
0 commit comments