Skip to content

Commit 349e3a6

Browse files
committed
hardcoded segmentation probability plot for manuscript
1 parent 86fed6a commit 349e3a6

File tree

1 file changed

+38
-21
lines changed

1 file changed

+38
-21
lines changed

src/python/plot/plotToolSegments.py

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ def getDynamontProbs() -> np.ndarray:
6262
signal = (signal - shift) / scale
6363
hampelFilter(signal)
6464
start = 2050
65-
end = start + 9769
66-
script = "dynamont-NT-banded"
65+
end = 9769
66+
script = "dynamont-NT"
6767
kmerSize = 9
6868
pore = "rna_rp4"
6969
PARAMS = {
@@ -72,14 +72,22 @@ def getDynamontProbs() -> np.ndarray:
7272
'r' : pore, # pore type
7373
't' : 4,
7474
}
75-
segments, borderProbs = feedSegmentation(signal[start:end], read, script, start, kmerSize, "rna" in pore, PARAMS)
76-
# safety check
77-
print(segments[:10])
75+
segments, borderProbs, heatmap = feedSegmentation(signal[start:end], read, script, start, kmerSize, "rna" in pore, PARAMS)
7876
# convert log probabilities to probabilities
7977
borderProbs = np.exp(borderProbs)
80-
# print(type(borderProbs), borderProbs.shape, borderProbs)
8178
borderProbs = np.concat((np.zeros(start), borderProbs))
82-
return borderProbs
79+
80+
# heatmap has dimensionality TxN
81+
nt2idx = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
82+
acgt = np.zeros((end - start, 4), dtype=np.float32)
83+
print(acgt.shape, heatmap.shape)
84+
for t in range(0, heatmap.shape[0] - 1): # iterate signal
85+
for n in range(0, heatmap.shape[1] - 1 - kmerSize//2): # iterate read
86+
acgt[t, nt2idx[read[n + kmerSize//2]]] += heatmap[t + 1, n + 1]
87+
88+
acgt = np.concat((np.zeros((start, 4)), acgt))
89+
90+
return borderProbs, acgt
8391

8492

8593
def readUncalled4(file : str, readid : str) -> list:
@@ -202,13 +210,18 @@ def main() -> None:
202210
ax[i].vlines([start, end], ymin=min(signal), ymax=max(signal), colors=basecolors[base], linestyles='--', linewidth=0.7)
203211
ax[i].add_patch(Rectangle((start, min(signal)), end - start, max(signal)-min(signal), alpha=0.4, edgecolor=basecolors[base], facecolor=basecolors[base]))
204212

205-
# if tool == "Dynamont":
206-
# prob_ax = ax[i].twinx()
207-
# borderProbs = getDynamontProbs()
208-
# prob_ax.plot(np.arange(len(borderProbs)), borderProbs, c='red', linewidth=0.5, linestyle='-')
209-
# prob_ax.set_ylabel("Border Probabilities", fontsize=10)
210-
# prob_ax.set_ylim((-0.1, 5))
211-
# prob_ax.set_yticks([0, 1])
213+
if tool == "Dynamont":
214+
prob_ax = ax[i].twinx()
215+
borderProbs, acgt_probs = getDynamontProbs()
216+
# prob_ax.plot(np.arange(len(borderProbs)), borderProbs, c='red', linewidth=0.5, linestyle='-')
217+
prob_ax.plot(acgt_probs[:, 0], c=basecolors['A'], linewidth=1, linestyle='-', label="A probability")
218+
prob_ax.plot(acgt_probs[:, 1], c=basecolors['C'], linewidth=1, linestyle='-', label="C probability")
219+
prob_ax.plot(acgt_probs[:, 2], c=basecolors['G'], linewidth=1, linestyle='-', label="G probability")
220+
prob_ax.plot(acgt_probs[:, 3], c=basecolors['T'], linewidth=1, linestyle='-', label="T probability")
221+
prob_ax.set_ylabel("Nucleotide Probabilities", fontsize=10)
222+
prob_ax.set_ylim((-0.1, 7))
223+
prob_ax.set_yticks([0, 1])
224+
prob_ax.legend(loc='upper right', fontsize=10)
212225

213226
plt.tight_layout()
214227
plt.savefig(join(outpath, args.readid + "_tool_segmentation.svg"), dpi=300)
@@ -235,13 +248,17 @@ def main() -> None:
235248
ax[i].vlines([start], ymin=min(signal), ymax=max(signal), colors=basecolors[base], linestyles='--', linewidth=0.7)
236249
ax[i].add_patch(Rectangle((start, min(signal)), end - start, max(signal)-min(signal), alpha=0.4, edgecolor=basecolors[base], facecolor=basecolors[base]))
237250

238-
# if tool == "Dynamont":
239-
# prob_ax = ax[i].twinx()
240-
# # borderProbs = getDynamontProbs()
241-
# prob_ax.plot(np.arange(len(borderProbs)), borderProbs, c='red', linewidth=1, linestyle='-')
242-
# prob_ax.set_ylabel("Border Probabilities", fontsize=10)
243-
# prob_ax.set_ylim((-0.1, 5))
244-
# prob_ax.set_yticks([0, 1])
251+
if tool == "Dynamont":
252+
prob_ax = ax[i].twinx()
253+
# prob_ax.plot(np.arange(len(borderProbs)), borderProbs, c='red', linewidth=0.5, linestyle='-')
254+
prob_ax.plot(acgt_probs[:, 0], c=basecolors['A'], linewidth=1, linestyle='-', label="A probability")
255+
prob_ax.plot(acgt_probs[:, 1], c=basecolors['C'], linewidth=1, linestyle='-', label="C probability")
256+
prob_ax.plot(acgt_probs[:, 2], c=basecolors['G'], linewidth=1, linestyle='-', label="G probability")
257+
prob_ax.plot(acgt_probs[:, 3], c=basecolors['T'], linewidth=1, linestyle='-', label="T probability")
258+
prob_ax.set_ylabel("Nucleotide Probabilities", fontsize=10)
259+
prob_ax.set_ylim((-0.1, 7))
260+
prob_ax.set_yticks([0, 1])
261+
prob_ax.legend(loc='upper right', fontsize=10)
245262

246263
ax[i].set_xticks(np.arange(0, len(signal), 500))
247264
# ax[i].set_xlim((19000, 22000)) #! for c28

0 commit comments

Comments
 (0)