Skip to content

Commit 9e5321b

Browse files
committed
bug fixes
1 parent 8362a32 commit 9e5321b

File tree

1 file changed

+52
-59
lines changed

1 file changed

+52
-59
lines changed

src/python/plot/plotToolSegments.py

Lines changed: 52 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, Namespace
88
import matplotlib.pyplot as plt
9+
import matplotlib
10+
matplotlib.use('Agg')
911
import read5_ont as r5
1012
from os.path import splitext, join
1113
import numpy as np
@@ -39,15 +41,14 @@ def readDynamont(file: str, readid : str) -> list:
3941
continue
4042
try:
4143
_, _, start, end, _, base, motif, *_ = line.strip().split(',')
44+
segments.append((int(start), int(end), base, motif))
4245
except ValueError: # empty line in file
4346
pass
44-
segments.append((int(start), int(end), base, motif))
4547
return segments
4648

47-
def readUncalled4(file : str, readid : str, kmerSize : int = 5) -> list:
49+
def readUncalled4(file : str, readid : str) -> list:
4850
print("Reading Uncalled4 output from " + file)
4951
segments = []
50-
HALFKMERSIZE = kmerSize // 2
5152
with open(file, 'r') as f:
5253
next(f) # skip header
5354
# seq.name seq.pos seq.strand aln.id seq.kmer aln.read_id dtw.start dtw.length dtw.current dtw.current_sd
@@ -57,7 +58,7 @@ def readUncalled4(file : str, readid : str, kmerSize : int = 5) -> list:
5758
continue
5859
if start == "*":
5960
continue
60-
segments.append((int(start), int(start) + int(length), kmer[HALFKMERSIZE], kmer))
61+
segments.append((int(start), int(start) + int(length), kmer[len(kmer) // 2], kmer))
6162
return segments
6263

6364
def readF5CResquiggle(file : str, readid : str, read : str, kmerSize : int = 5) -> list:
@@ -73,13 +74,12 @@ def readF5CResquiggle(file : str, readid : str, read : str, kmerSize : int = 5)
7374
_, kmer_idx, start, end = line.strip().split('\t')
7475
if start == '.' or end == '.':
7576
continue
76-
segments.append((int(start), int(end), read[int(kmer_idx) + HALFKMERSIZE], read[int(kmer_idx) + kmerSize]))
77+
segments.append((int(start), int(end), read[int(kmer_idx) + HALFKMERSIZE], read[int(kmer_idx): int(kmer_idx) + kmerSize]))
7778
return segments
7879

79-
def readF5CEventalign(file: str, summary : str, readid : str, kmerSize : int = 5) -> list:
80+
def readF5CEventalign(file: str, summary : str, readid : str) -> list:
8081
print("Reading f5c Eventalign output from " + file)
8182
readNum = None
82-
HALFKMERSIZE = kmerSize // 2
8383
with open(summary, 'r') as f:
8484
# read_index read_name fast5_path model_name strand num_events num_steps num_skips num_stays total_duration shift scale drift var
8585
for line in f:
@@ -94,10 +94,10 @@ def readF5CEventalign(file: str, summary : str, readid : str, kmerSize : int = 5
9494
next(f) # skip header
9595
# contig position reference_kmer read_index strand event_index event_level_mean event_stdv event_length model_kmer model_mean model_stdv standardized_level start_idx end_idx
9696
for line in f:
97-
_, _, _, rNum, _, _, _, _, _, modelKmer, _, _, _, start, end = line.strip().split('\t')
97+
_, _, _, rNum, _, _, _, _, _, kmer, _, _, _, start, end = line.strip().split('\t')
9898
if not rNum == readNum:
9999
continue
100-
segments.append((int(start), int(end), modelKmer[HALFKMERSIZE], modelKmer))
100+
segments.append((int(start), int(end), kmer[len(kmer) // 2], kmer))
101101
return segments
102102

103103
def readDorado(file : str, readid : str) -> list:
@@ -113,28 +113,22 @@ def readDorado(file : str, readid : str) -> list:
113113
segments.append((int(start), int(end), base, motif))
114114
return segments
115115

116-
def readBasecalls(file : str, readid : str) -> str:
117-
import pysam
118-
with pysam.AlignmentFile(file, "rb", check_sq=False) as bamfile:
116+
def readBasecalls(file: str, readid: str) -> str:
117+
from pysam import AlignmentFile
118+
with AlignmentFile(file, "rb", check_sq=False) as bamfile:
119119
for read in bamfile.fetch(until_eof=True):
120-
if not readid == read.query_name:
121-
continue
122-
return read.query_sequence
120+
if readid == read.query_name:
121+
return read.query_sequence
122+
return None
123123

124124
def main() -> None:
125125
basecolors = {
126-
'A':'#377eb8',
127-
'a':'#377eb8',
128-
'C':'#ff7f00',
129-
'c':'#ff7f00',
130-
'G':'#4daf4a',
131-
'g':'#4daf4a',
132-
'T':'#f781bf',
133-
't':'#f781bf',
134-
'U':'#f781bf',
135-
'u':'#f781bf',
136-
'N':'#ffffff',
137-
'n':'#ffffff',
126+
'A':'#377eb8', 'a':'#377eb8',
127+
'C':'#ff7f00', 'c':'#ff7f00',
128+
'G':'#4daf4a', 'g':'#4daf4a',
129+
'T':'#f781bf', 't':'#f781bf',
130+
'U':'#f781bf', 'u':'#f781bf',
131+
'N':'#ffffff', 'n':'#ffffff',
138132
}
139133

140134
args = parse()
@@ -144,7 +138,7 @@ def main() -> None:
144138
tools = {
145139
"Dynamont" : np.array(readDynamont(args.dynamont, args.readid)),
146140
"Uncalled4" : np.array(readUncalled4(args.uncalled4, args.readid)),
147-
"f5c Resquiggle" : np.array(readF5CResquiggle(args.f5cresquiggle, args.readid, read, args.k // 2)),
141+
"f5c Resquiggle" : np.array(readF5CResquiggle(args.f5cresquiggle, args.readid, read, args.k)),
148142
"f5c Eventalign" : np.array(readF5CEventalign(args.f5ceventalign, splitext(args.f5ceventalign)[0] + '.sum', args.readid)),
149143
# "Tombo" : readTombo(args.tombo, args.readid),
150144
"Dorado" : np.array(readDorado(args.dorado, args.readid)),
@@ -159,20 +153,16 @@ def main() -> None:
159153
fig.supxlabel("Rel. Time (Sequencing Data Points)")
160154

161155
for i, (tool, segments) in enumerate(tools.items()):
162-
segments = np.unique(segments, axis=1)
163-
# segments = segments[np.argsort(segments[:, 0].astype(int))]
156+
segments = np.unique(segments, axis=0)
164157
ax[i].plot(signal, linewidth=1.0, c='black')
165158
ax[i].set_xlim((0, len(signal)))
166-
# ax[i].vlines(segments[:, 0], ymin=min(signal), ymax=max(signal), linewidth=0.5, linestyle='--', colors='red')
167159
ax[i].set_title(tool)
168-
# ax[i].legend()
169160
ax[i].set_xticks(np.arange(0, len(signal), 1000))
170161

171162
for s in segments:
172163
start = int(s[0])
173164
end = int(s[1])
174165
base = str(s[2])
175-
# ax[i].text(start + (end - start)/2 - 6, -3.5, base, fontdict={'size' : 6, 'color':'black'}) # rotation=90
176166
ax[i].vlines([start, end], ymin=min(signal), ymax=max(signal), colors=basecolors[base], linestyles='--', linewidth=0.7)
177167
ax[i].add_patch(Rectangle((start, min(signal)), end - start, max(signal)-min(signal), alpha=0.4, edgecolor=basecolors[base], facecolor=basecolors[base]))
178168

@@ -182,32 +172,35 @@ def main() -> None:
182172
plt.savefig(join(outpath, args.readid + "_tool_segmentation.png"), dpi=300)
183173
plt.close()
184174

185-
# fig, ax = plt.subplots(nrows = 5, figsize=(20,10), dpi=300)
186-
# fig.suptitle(f"Segmentation of {args.readid} by Different Tools", fontsize=20)
187-
# fig.supylabel("Current (pA)", fontsize=18)
188-
# fig.supxlabel("Rel. Time (Sequencing Data Points)", fontsize=18)
189-
190-
# for i, (tool, segments) in enumerate(tools.items()):
191-
# segments = np.unique(segments, axis=1)
192-
# # segments = segments[np.argsort(segments[:, 0].astype(int))]
193-
# ax[i].plot(signal, linewidth=1.0, c='black')
194-
# # ax[i].vlines(segments[:, 0], ymin=min(signal), ymax=max(signal), linewidth=0.5, linestyle='--', colors='red')
195-
# ax[i].set_title(tool, fontsize=18)
196-
# # ax[i].legend()
197-
198-
# for s in segments:
199-
# start = int(s[0])
200-
# end = int(s[1])
201-
# base = str(s[2])
202-
# # ax[i].text(start + (end - start)/2 - 6, -3.5, base, fontdict={'size' : 6, 'color':'black'}) # rotation=90
203-
# ax[i].vlines([start], ymin=min(signal), ymax=max(signal), colors=basecolors[base], linestyles='--', linewidth=0.7)
204-
# ax[i].add_patch(Rectangle((start, min(signal)), end - start, max(signal)-min(signal), alpha=0.4, edgecolor=basecolors[base], facecolor=basecolors[base]))
205-
206-
# ax[i].set_xticks(np.arange(0, len(signal), 500))
207-
# ax[i].set_xlim((19000, 22000))
208-
209-
# plt.tight_layout()
210-
# plt.savefig(join(outpath, args.readid + "_tool_segmentation_region.svg"), dpi=300)
175+
print("Plotted: " + args.readid + "_tool_segmentation.svg")
176+
177+
fig, ax = plt.subplots(nrows = 5, figsize=(10,10), dpi=300)
178+
fig.suptitle(f"Segmentation of {args.readid} by Different Tools", fontsize=20)
179+
fig.supylabel("Current (pA)", fontsize=18)
180+
fig.supxlabel("Rel. Time (Sequencing Data Points)", fontsize=18)
181+
182+
for i, (tool, segments) in enumerate(tools.items()):
183+
segments = np.unique(segments, axis=0)
184+
ax[i].plot(signal, linewidth=1.0, c='black')
185+
ax[i].set_title(tool, fontsize=18)
186+
187+
for s in segments:
188+
start = int(s[0])
189+
end = int(s[1])
190+
base = str(s[2])
191+
ax[i].vlines([start], ymin=min(signal), ymax=max(signal), colors=basecolors[base], linestyles='--', linewidth=0.7)
192+
ax[i].add_patch(Rectangle((start, min(signal)), end - start, max(signal)-min(signal), alpha=0.4, edgecolor=basecolors[base], facecolor=basecolors[base]))
193+
194+
ax[i].set_xticks(np.arange(0, len(signal), 500))
195+
# ax[i].set_xlim((19000, 22000)) #! for c28
196+
ax[i].set_xlim((7400, 8000))
197+
198+
plt.tight_layout()
199+
plt.savefig(join(outpath, args.readid + "_tool_segmentation_region.svg"), dpi=300)
200+
plt.savefig(join(outpath, args.readid + "_tool_segmentation_region.pdf"), dpi=300)
201+
plt.savefig(join(outpath, args.readid + "_tool_segmentation_region.png"), dpi=300)
202+
203+
print("Plotted: " + args.readid + "_tool_segmentation_region.svg")
211204

212205
if __name__ == '__main__':
213206
main()

0 commit comments

Comments
 (0)