Skip to content

Commit 33e5007

Browse files
committed
add notebooks to do predictions and interpretations on custom sequences input as fasta files
1 parent 1517572 commit 33e5007

File tree

4 files changed

+753
-0
lines changed

4 files changed

+753
-0
lines changed

chrombpnet/evaluation/custom_sequences/custom_sequence_interpret.ipynb

Lines changed: 371 additions & 0 deletions
Large diffs are not rendered by default.

chrombpnet/evaluation/custom_sequences/custom_sequence_pred.ipynb

Lines changed: 218 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
>seq1
2+
AATCTCCAGGTTATTTAAAGTAGTTATAGGAGCAGAGAACAAGCACCTTTATCAAAATCTGGTCCTATGTGCCTTGCTTTACCAAATACCTGATTTTTCTGGAGGGTGTTCCTGTAATTCACAACTGTAGACACATGGGCAAAATTAGGATTTTTAAGAATAAATACATTTCTATTTTTTTGGTTGTTTCAACATTAGCTCTTCAAATTCATTAACAAAATTAAAATAGGTATATTACAAAAGCATAAACATTTGTGAACAGTACTTAAATAAATTGTGATACTATTGCTCCATCATTGAACTTTTTGAAACTTTAACAATTGTATAAAACTGTCAGTTTGTTGTTTCATTTGTAATTACAAAATAATTTAAAAACTTTTTAAAATAATTTGGATCCTGACTTTGTCTATATCTGTATTTCATTTGTTTAGAAAGATTCTTTTGGGTTTGATAATGTAATTTGTATATTTAAATTTTTTATGGACATAATTCAAAGGAATGTATAAATTGGTCTTTTGTTAAATGGCTTTTTAATTGATAAACTTCTCTTGTCATTTTTTGGTATCCAGCTATTACCTATTTAATAGATTTATTGAAATAGATTATTTTCATAAAGAACTCTATACAAATCTTTTTCTATATTTCCTTATTTTCCTATTTACCTGTGTCTATGACCTAACCTATGAATTAGTCTTCTCTCTTTATATATCAAAAATGAATTACTGATCTTTTTCTCTGGCTCTGTAGTATCTCTATCACTGTCACATGTGATCTTTCTTCCTTTTCTCTAGCCCATATTCTAGCATGAAATACTGGGTTGGCCAGGTGCAGTGGCTCATTCCTGTAATCCCAACACTTTAGGAAGCCAAGGCAGAAGAATCGCTTGAGCCCATGAATTTGAGGCCAGCCTGGGCAACATAGCCTTGTTTCTACAAAAAATCTTAAAAATAAAATTAGCCAATATGTGGGCATGCACCTGTGGGGCCAGCTACTCAGGAGGCTGAGGCAGAAGGATTGCTTGAGCCCAGCAATTTGATGCTGCAGTAAACCATGATGACACTACTGGACTCTAGCCTGAGTGACAGTGAGACTCTGTCTCAAAAAACAAACAAAACAAAAACTGAAACAACAAAAAAAGACTGGGTTTATTTAAGCTAGTTAGAATTTATCTTTCTATATGTTAATAACAGCCTAACAGATTTTTTGTTTTAAATATCTCTAGGCTAGCCTCAAGGTTAAGTAATTATAGAAGTTTGGTATGTATTTTCTTCATAATTTGAATATAATTGCTTCCATTGTGACTGTCAATTGAATGCATGGAGATCAATTGTGATAATATACAGGATTTTAGTCCTATCTCTACTGCTGAAGTAACCTTACACAAAATACTTTGTAAAAAAATCACTAAAGTGCCAGCATTTTTAAAGTGTATATTTTTCTTTGGCAACCTCTCATGAAAAGCACTAACTAAAAATATTTAATAATCTTTTTTGTATTACAGTGCTTCTTTTGTTGGAAATATATCACAATCCTCAAGTTCCACTGCTATGCAAAAGTATCTTAGAATCTGAATCTTATAGATAATACTACCTTTTTTTTTTTTTTTTTTTTTTTTTTGAGATGGTGTCTCGCTCTGTCACCTAGGCTGCAGTGGTGTGACCTCACTGCAACCTCTGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCTAGTAACTGGGATTACAGGCCATGCCACCACGCCCAACTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGTTTACAGGCGTGAGCCGCTGCGCCCGGCCAATACTGCCTTTCAAAAGAGACTGATAGTGACCAGCATTAGTAATCATACTGGTGGGGTTTTTTGGGGTGTTTTTCTGTTTTGTTTTTTGTTTTTTGTTTTTGAGACAAGGTCTTCCTCCCGTTGCCTAGGCTGGAGTGCGGTGACACAATCTTGGCTCACTGCAGCCTTGATAGAGCAAGCTCAAGTGATCCTCCCAGGAGCCTCGGCCCCCAAGCAGCTGGGAA
3+
>seq2
4+
ATCACAATCCTCAAGTTCCACTGCTATGCAAAAGTATCTTAGAATCTGAATCTTATAGATAATACTACCTTTTTTTTTTTTTTTTTTTTTTTTTTGAGATGGTGTCTCGCTCTGTCACCTAGGCTGCAGTGGTGTGACCTCACTGCAACCTCTGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCCTAGTAACTGGGATTACAGGCCATGCCACCACGCCCAACTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGTTTACAGGCGTGAGCCGCTGCGCCCGGCCAATACTGCCTTTCAAAAGAGACTGATAGTGACCAGCATTAGTAATCATACTGGTGGGGTTTTTTGGGGTGTTTTTCTGTTTTGTTTTTTGTTTTTTGTTTTTGAGACAAGGTCTTCCTCCCGTTGCCTAGGCTGGAGTGCGGTGACACAATCTTGGCTCACTGCAGCCTTGATAGAGCAAGCTCAAGTGATCCTCCCAGGAGCCTCGGCCCCCAAGCAGCTGGGAATACAGGTGCGCGCCACCATGCCCAGCTGATTTTTGTATTTTTTTGTAGAGATGGGGTTTTGCCATGTTGCCCGGATTGGTCTCAAACTCCTGAGCTTCAGTGATCTGCCTGCCTTGGCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCGACCACACCCAGCCCATATTGGTCTTTCTTACTGTTCTTAAAAAGAGAATTCCTTTAAGGCAGGACCGATTACATATACACTCTAGAAAAGAAAATAGCAAGGAAGAAATAAATTGCCTTCAATTACCAAAGATTTGAGCTTCTGCTATGGCTGAGAGTGTTTTGGTCATTGCAAATTCAGGGGTTTCCCAAGTTCACCCTCAGTTCTGGCTAGAAAGAGAAAACTTACTAAAAGCTATTATACTCACAGTCATATTTATTACAGAGAAAGGAAATACAAATTAAAACCAGCCAAAGGAAGTGACACATAAAACAGAGTCTAGGAGTGGTCCAAACTTGAGGCTTTCGGTGTCCTTTTCTTGTAGCGTCATGGAAGGTGTTATCTACTCCTGACCACAATGTTTGACAGTACACACATAGTATTGCCATTCAGGGAAGCTCACCTAAGCTTTGGTGTCCAGATTTTTATTGAGAGAGGCTCTATTAGTTGGCATGGTTGGTTGATTTTTTTGCCCATGTAGTTGTCCTCTCTTTCCAACATCTGCCCCTCCCTGGAGATCTGGTTGACATCAAGACTTCAGGGCCTCACCATAGGTTATCTCGTTAGCATAAACTGTCAAGTGTTGTCTAAGGAACCCACAATGAATAATAAAGACATTCCTATCAGTGAGAACTCCCAAAGACTTACACCAGAACTTTCTTTGGATGGGCCAAATTTCTTACTACACAAAGACCATTCATCTCTATACACTTCCTTCTGAATTGATGAGGATGATACAAGCAACGACAATTCTTCTTTTCAGAGACTTTTAATTTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCACTCAGGCTGGAGTTCAGCGGCACGATCTCTGCTCACTGCAACCTCCGCCTCCTGGGTTTGAGCAATTCTCCTACCTCAGCCTCCCAAGTAGCTGGGATTACTGGTGCTCACCACCACGCCCAACTAATTTTTGTATTTTTGGTAGAGACGGGTTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCTGCCGGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCATCACACCCAGCCCAGAGACTTTTAATTTATACATTGTCATTTTTAATTTTAGAATATCTGTCAAATGATTCTGAACATAACATGAATCTAGTGTGGAAAAATGTTTATAATCAGATATTGTGTTAAGAACATATATATATATATAGAGAGAGAGAGAGCATAGTATTGTCATTTAGTTTTCACTTCTTATGTTTAATGACAACTTTATAAATGCTGGACTATTTTAAACTACAAATTTAAAACATGGTTTATAAACCTTTTACCTGTA
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import matplotlib
2+
import matplotlib.pyplot as plt
3+
import numpy as np
4+
5+
6+
def ic_scale(pwm,background):
7+
odds_ratio = ((pwm+0.001)/(1.004))/(background[None,:])
8+
ic = ((np.log((pwm+0.001)/(1.004))/np.log(2))*pwm -\
9+
(np.log(background)*background/np.log(2))[None,:])
10+
return pwm*(np.sum(ic,axis=1)[:,None])
11+
12+
13+
def plot_a(ax, base, left_edge, height, color):
14+
a_polygon_coords = [
15+
np.array([
16+
[0.0, 0.0],
17+
[0.5, 1.0],
18+
[0.5, 0.8],
19+
[0.2, 0.0],
20+
]),
21+
np.array([
22+
[1.0, 0.0],
23+
[0.5, 1.0],
24+
[0.5, 0.8],
25+
[0.8, 0.0],
26+
]),
27+
np.array([
28+
[0.225, 0.45],
29+
[0.775, 0.45],
30+
[0.85, 0.3],
31+
[0.15, 0.3],
32+
])
33+
]
34+
for polygon_coords in a_polygon_coords:
35+
ax.add_patch(matplotlib.patches.Polygon((np.array([1,height])[None,:]*polygon_coords
36+
+ np.array([left_edge,base])[None,:]),
37+
facecolor=color, edgecolor=color))
38+
39+
40+
def plot_c(ax, base, left_edge, height, color):
41+
ax.add_patch(matplotlib.patches.Ellipse(xy=[left_edge+0.65, base+0.5*height], width=1.3, height=height,
42+
facecolor=color, edgecolor=color))
43+
ax.add_patch(matplotlib.patches.Ellipse(xy=[left_edge+0.65, base+0.5*height], width=0.7*1.3, height=0.7*height,
44+
facecolor='white', edgecolor='white'))
45+
ax.add_patch(matplotlib.patches.Rectangle(xy=[left_edge+1, base], width=1.0, height=height,
46+
facecolor='white', edgecolor='white', fill=True))
47+
48+
49+
def plot_g(ax, base, left_edge, height, color):
50+
ax.add_patch(matplotlib.patches.Ellipse(xy=[left_edge+0.65, base+0.5*height], width=1.3, height=height,
51+
facecolor=color, edgecolor=color))
52+
ax.add_patch(matplotlib.patches.Ellipse(xy=[left_edge+0.65, base+0.5*height], width=0.7*1.3, height=0.7*height,
53+
facecolor='white', edgecolor='white'))
54+
ax.add_patch(matplotlib.patches.Rectangle(xy=[left_edge+1, base], width=1.0, height=height,
55+
facecolor='white', edgecolor='white', fill=True))
56+
ax.add_patch(matplotlib.patches.Rectangle(xy=[left_edge+0.825, base+0.085*height], width=0.174, height=0.415*height,
57+
facecolor=color, edgecolor=color, fill=True))
58+
ax.add_patch(matplotlib.patches.Rectangle(xy=[left_edge+0.625, base+0.35*height], width=0.374, height=0.15*height,
59+
facecolor=color, edgecolor=color, fill=True))
60+
61+
62+
def plot_t(ax, base, left_edge, height, color):
63+
ax.add_patch(matplotlib.patches.Rectangle(xy=[left_edge+0.4, base],
64+
width=0.2, height=height, facecolor=color, edgecolor=color, fill=True))
65+
ax.add_patch(matplotlib.patches.Rectangle(xy=[left_edge, base+0.8*height],
66+
width=1.0, height=0.2*height, facecolor=color, edgecolor=color, fill=True))
67+
68+
default_colors = {0:'green', 1:'blue', 2:'orange', 3:'red'}
69+
default_plot_funcs = {0:plot_a, 1:plot_c, 2:plot_g, 3:plot_t}
70+
def plot_weights_given_ax(ax, array,
71+
height_padding_factor,
72+
length_padding,
73+
subticks_frequency,
74+
highlight,
75+
colors=default_colors,
76+
plot_funcs=default_plot_funcs,
77+
ylabel="",
78+
ylim=None):
79+
if len(array.shape)==3:
80+
array = np.squeeze(array)
81+
assert len(array.shape)==2, array.shape
82+
if (array.shape[0]==4 and array.shape[1] != 4):
83+
array = array.transpose(1,0)
84+
assert array.shape[1]==4
85+
max_pos_height = 0.0
86+
min_neg_height = 0.0
87+
heights_at_positions = []
88+
depths_at_positions = []
89+
for i in range(array.shape[0]):
90+
#sort from smallest to highest magnitude
91+
acgt_vals = sorted(enumerate(array[i,:]), key=lambda x: abs(x[1]))
92+
positive_height_so_far = 0.0
93+
negative_height_so_far = 0.0
94+
for letter in acgt_vals:
95+
plot_func = plot_funcs[letter[0]]
96+
color=colors[letter[0]]
97+
if (letter[1] > 0):
98+
height_so_far = positive_height_so_far
99+
positive_height_so_far += letter[1]
100+
else:
101+
height_so_far = negative_height_so_far
102+
negative_height_so_far += letter[1]
103+
plot_func(ax=ax, base=height_so_far, left_edge=i, height=letter[1], color=color)
104+
max_pos_height = max(max_pos_height, positive_height_so_far)
105+
min_neg_height = min(min_neg_height, negative_height_so_far)
106+
heights_at_positions.append(positive_height_so_far)
107+
depths_at_positions.append(negative_height_so_far)
108+
109+
#now highlight any desired positions; the key of
110+
#the highlight dict should be the color
111+
for color in highlight:
112+
for start_pos, end_pos in highlight[color]:
113+
assert start_pos >= 0.0 and end_pos <= array.shape[0]
114+
min_depth = np.min(depths_at_positions[start_pos:end_pos])
115+
max_height = np.max(heights_at_positions[start_pos:end_pos])
116+
ax.add_patch(
117+
matplotlib.patches.Rectangle(xy=[start_pos,min_depth],
118+
width=end_pos-start_pos,
119+
height=max_height-min_depth,
120+
edgecolor=color, fill=False))
121+
122+
ax.set_xlim(-length_padding, array.shape[0]+length_padding)
123+
ax.xaxis.set_ticks(np.arange(0.0, array.shape[0]+1, subticks_frequency))
124+
125+
if ylim is not None:
126+
# Use user-specified y-axis limits
127+
min_neg_height, max_pos_height = ylim
128+
assert min_neg_height <= 0
129+
assert max_pos_height >= 0
130+
131+
height_padding = max(abs(min_neg_height)*(height_padding_factor),
132+
abs(max_pos_height)*(height_padding_factor))
133+
ax.set_ylim(min_neg_height-height_padding, max_pos_height+height_padding)
134+
ax.set_ylabel(ylabel)
135+
ax.yaxis.label.set_fontsize(15)
136+
137+
138+
def plot_weights(array,
139+
figsize=(20,2),
140+
height_padding_factor=0.2,
141+
length_padding=1.0,
142+
subticks_frequency=1.0,
143+
colors=default_colors,
144+
plot_funcs=default_plot_funcs,
145+
highlight={},
146+
ylabel="",
147+
ylim=None,
148+
dpi=300):
149+
fig = plt.figure(figsize=figsize, dpi=dpi)
150+
ax = fig.add_subplot(111)
151+
plot_weights_given_ax(ax=ax, array=array,
152+
height_padding_factor=height_padding_factor,
153+
length_padding=length_padding,
154+
subticks_frequency=subticks_frequency,
155+
colors=colors,
156+
plot_funcs=plot_funcs,
157+
highlight=highlight,
158+
ylabel=ylabel,
159+
ylim=ylim)
160+
# plt.show()

0 commit comments

Comments
 (0)