Skip to content

Commit 781916b

Browse files
🚧 added files moved + python package files
1 parent 63ee342 commit 781916b

22 files changed

+12121
-0
lines changed

‎MANIFEST.in‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
include LICENSE
2+
include README.md

‎docs/README.md‎

Whitespace-only changes.

‎docs/index.md‎

Whitespace-only changes.
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "41895583",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"# import libraries\n",
11+
"\n",
12+
"import numpy as np\n",
13+
"import matplotlib.pyplot as plt\n",
14+
"import plotly.graph_objects as go\n",
15+
"from matplotlib.patches import Rectangle"
16+
]
17+
},
18+
{
19+
"cell_type": "raw",
20+
"id": "00339492",
21+
"metadata": {
22+
"vscode": {
23+
"languageId": "raw"
24+
}
25+
},
26+
"source": [
27+
"seq = \"QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALITSGHMTHYEDSVKGRFTISRDNAKEVLYLQMNSLKPEDTAVYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDIDYKDDDDKGAAHHHHHH\"\n",
28+
"\n",
29+
"cdrs = {\n",
30+
" \"CDR1\": seq[30:35],\n",
31+
" \"CDR2\": seq[49:66], \n",
32+
" \"CDR3\": seq[98:115], \n",
33+
"}\n",
34+
"\n",
35+
"for cdr, seq_part in cdrs.items():\n",
36+
" print(f\"{cdr}: {seq_part}\")\n",
37+
"\n",
38+
"# Styling\n",
39+
"font_family = \"Courier New\"\n",
40+
"highlight_colors = {\n",
41+
" \"CDR1\": \"#f4a261\",\n",
42+
" \"CDR2\": \"#2a9d8f\",\n",
43+
" \"CDR3\": \"#457b9d\"\n",
44+
"}"
45+
]
46+
},
47+
{
48+
"cell_type": "code",
49+
"execution_count": null,
50+
"id": "1785f8e1",
51+
"metadata": {},
52+
"outputs": [],
53+
"source": [
54+
"def plot_psm_depth_with_cdrs(\n",
55+
" protein_sequence,\n",
56+
" mapped_psms,\n",
57+
" cdrs,\n",
58+
" highlight_colors,\n",
59+
" output_file=\"fig_depth_psms_cdrs.svg\",\n",
60+
" title=\"PSM depth across the protein sequence with CDR regions\",\n",
61+
"):\n",
62+
" \"\"\"\n",
63+
" Plot PSM depth along a protein sequence with CDR regions highlighted.\n",
64+
"\n",
65+
" Args:\n",
66+
" protein_sequence (str): Normalized protein sequence.\n",
67+
" mapped_psms (list of tuples): Each tuple should contain (start, end, ..., ...).\n",
68+
" cdrs (dict): Dictionary of CDR labels and their (start, end) positions.\n",
69+
" highlight_colors (dict): CDR labels mapped to their highlight colors.\n",
70+
" output_file (str): Path to save the SVG file.\n",
71+
" title (str): Plot title.\n",
72+
" \"\"\"\n",
73+
" depth = np.zeros(len(protein_sequence), dtype=int)\n",
74+
" for _, (start, end, _, _) in mapped_psms:\n",
75+
" depth[start:end] += 1\n",
76+
"\n",
77+
" fig = go.Figure()\n",
78+
" fig.add_trace(\n",
79+
" go.Scatter(\n",
80+
" x=list(range(len(protein_sequence))),\n",
81+
" y=depth,\n",
82+
" mode=\"lines\",\n",
83+
" line=dict(color=\"steelblue\", width=2),\n",
84+
" fill=\"tozeroy\",\n",
85+
" fillcolor=\"rgba(70, 130, 180, 0.2)\",\n",
86+
" name=\"PSM Depth\",\n",
87+
" )\n",
88+
" )\n",
89+
"\n",
90+
" for label, (start, end) in cdrs.items():\n",
91+
" fig.add_shape(\n",
92+
" type=\"rect\",\n",
93+
" x0=start,\n",
94+
" x1=end,\n",
95+
" y0=0,\n",
96+
" y1=max(depth),\n",
97+
" fillcolor=highlight_colors.get(label, \"gray\"),\n",
98+
" opacity=0.3,\n",
99+
" line=dict(width=0),\n",
100+
" layer=\"below\",\n",
101+
" )\n",
102+
" fig.add_annotation(\n",
103+
" x=(start + end) / 2,\n",
104+
" y=max(depth) + 8,\n",
105+
" text=label,\n",
106+
" showarrow=False,\n",
107+
" font=dict(size=14, color=\"black\"),\n",
108+
" xanchor=\"center\",\n",
109+
" )\n",
110+
"\n",
111+
" fig.update_layout(\n",
112+
" title=title,\n",
113+
" xaxis=dict(\n",
114+
" title=\"Amino acid position\",\n",
115+
" tickmode=\"linear\",\n",
116+
" dtick=10,\n",
117+
" showline=True,\n",
118+
" linecolor=\"black\",\n",
119+
" linewidth=2,\n",
120+
" showgrid=False,\n",
121+
" ),\n",
122+
" yaxis=dict(\n",
123+
" title=\"Depth (Number of matching PSMs per position)\",\n",
124+
" showline=True,\n",
125+
" linecolor=\"black\",\n",
126+
" linewidth=2,\n",
127+
" showgrid=False,\n",
128+
" ),\n",
129+
" template=\"plotly_white\",\n",
130+
" height=450,\n",
131+
" width=1000,\n",
132+
" margin=dict(t=60),\n",
133+
" showlegend=False,\n",
134+
" )\n",
135+
"\n",
136+
" fig.show()\n",
137+
" fig.write_image(output_file, format=\"svg\", scale=2)"
138+
]
139+
},
140+
{
141+
"cell_type": "markdown",
142+
"id": "3347a030",
143+
"metadata": {},
144+
"source": [
145+
"To run the previous function you need to create mapped_psms and protein_norm first you can find in the main pipeline."
146+
]
147+
},
148+
{
149+
"cell_type": "code",
150+
"execution_count": null,
151+
"id": "857db62a",
152+
"metadata": {},
153+
"outputs": [],
154+
"source": [
155+
"cdrs = {\"CDR1\": (31, 35), \"CDR2\": (50, 66), \"CDR3\": (99, 115)}\n",
156+
"highlight_colors = {\"CDR1\": \"orange\", \"CDR2\": \"lightgreen\", \"CDR3\": \"deepskyblue\"}\n",
157+
"\n",
158+
"plot_psm_depth_with_cdrs(\n",
159+
" protein_sequence=protein_norm,\n",
160+
" mapped_psms=mapped_psms,\n",
161+
" cdrs=cdrs,\n",
162+
" highlight_colors=highlight_colors,\n",
163+
" output_file=\"fig_4C_depth_psms_cdrs.svg\",\n",
164+
")"
165+
]
166+
},
167+
{
168+
"cell_type": "code",
169+
"execution_count": null,
170+
"id": "43d014e1",
171+
"metadata": {},
172+
"outputs": [],
173+
"source": [
174+
"def plot_cdr_scaffolds(\n",
175+
" reference_seq,\n",
176+
" scaffold_info,\n",
177+
" cdrs,\n",
178+
" highlight_colors,\n",
179+
" font_family=\"monospace\",\n",
180+
" font_size=25,\n",
181+
" letter_spacing=1.5,\n",
182+
" save_path=None,\n",
183+
"):\n",
184+
" \"\"\"\n",
185+
" Plot reference sequence and full scaffold sequences aligned, highlighting CDR regions,\n",
186+
" with adjustable letter spacing for sequence letters.\n",
187+
"\n",
188+
" \"\"\"\n",
189+
"\n",
190+
" def chars_equal(a, b):\n",
191+
" return (a in [\"L\", \"I\"] and b in [\"L\", \"I\"]) or a == b\n",
192+
"\n",
193+
" def find_alignment_offset(ref, seq):\n",
194+
" for pos in range(len(ref) - len(seq) + 1):\n",
195+
" if all(chars_equal(ref[pos + i], seq[i]) for i in range(len(seq))):\n",
196+
" return pos\n",
197+
" return 0\n",
198+
"\n",
199+
" fig_width = max(10, len(reference_seq) * letter_spacing / 8)\n",
200+
" fig, ax = plt.subplots(figsize=(fig_width, 6))\n",
201+
" ax.axis(\"off\")\n",
202+
"\n",
203+
" for name, (start, end) in cdrs.items():\n",
204+
" x0 = (start - 1) * letter_spacing + 1\n",
205+
" width = (end - start + 1) * letter_spacing\n",
206+
" ax.add_patch(\n",
207+
" Rectangle((x0, 4.5), width, 0.8, color=highlight_colors[name], alpha=0.3)\n",
208+
" )\n",
209+
" ax.text(\n",
210+
" x0 + width / 2,\n",
211+
" 5.5,\n",
212+
" name,\n",
213+
" ha=\"center\",\n",
214+
" va=\"bottom\",\n",
215+
" fontsize=font_size,\n",
216+
" fontweight=\"bold\",\n",
217+
" color=highlight_colors[name],\n",
218+
" )\n",
219+
"\n",
220+
" ax.text(-5, 5, \"Reference\", fontfamily=font_family, fontsize=font_size, ha=\"right\")\n",
221+
" for i, aa in enumerate(reference_seq):\n",
222+
" x = i * letter_spacing + 1\n",
223+
" ax.text(x, 5, aa, fontfamily=font_family, fontsize=font_size, color=\"black\")\n",
224+
"\n",
225+
" y_positions = [4, 3, 2]\n",
226+
" for idx, (scaf_name, seq) in enumerate(scaffold_info):\n",
227+
" cdr_name = f\"CDR{idx+1}\"\n",
228+
" color = highlight_colors[cdr_name]\n",
229+
" offset = find_alignment_offset(reference_seq, seq)\n",
230+
"\n",
231+
" start, end = cdrs[cdr_name]\n",
232+
" rel_start = max(start - 1 - offset, 0)\n",
233+
" rel_end = min(end - 1 - offset, len(seq) - 1)\n",
234+
"\n",
235+
" ax.text(\n",
236+
" -5,\n",
237+
" y_positions[idx],\n",
238+
" f\"{cdr_name} ({scaf_name})\",\n",
239+
" fontfamily=font_family,\n",
240+
" fontsize=font_size,\n",
241+
" ha=\"right\",\n",
242+
" )\n",
243+
"\n",
244+
" for i, aa in enumerate(seq):\n",
245+
" x = (offset + i) * letter_spacing + 1\n",
246+
" ax.text(\n",
247+
" x,\n",
248+
" y_positions[idx],\n",
249+
" aa,\n",
250+
" fontfamily=font_family,\n",
251+
" fontsize=font_size,\n",
252+
" color=\"black\",\n",
253+
" )\n",
254+
"\n",
255+
" if rel_end >= rel_start:\n",
256+
" x0 = (offset + rel_start) * letter_spacing + 1\n",
257+
" width = (rel_end - rel_start + 1) * letter_spacing\n",
258+
" ax.add_patch(\n",
259+
" Rectangle(\n",
260+
" (x0, y_positions[idx] - 0.2), width, 0.8, color=color, alpha=0.3\n",
261+
" )\n",
262+
" )\n",
263+
"\n",
264+
" ax.set_xlim(0, len(reference_seq) * letter_spacing + 5)\n",
265+
" ax.set_ylim(1.5, 6)\n",
266+
"\n",
267+
" plt.tight_layout()\n",
268+
" if save_path:\n",
269+
" plt.savefig(save_path, format=\"svg\", dpi=600, bbox_inches=\"tight\")\n",
270+
" plt.show()"
271+
]
272+
},
273+
{
274+
"cell_type": "code",
275+
"execution_count": null,
276+
"id": "849fc33e",
277+
"metadata": {},
278+
"outputs": [],
279+
"source": [
280+
"reference_seq = \"QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALITSGHMTHYEDSVKGRFTISRDNAKEVLYLQMNSLKPEDTAVYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDIDYKDDDDKGAAHHHHHH\"\n",
281+
"\n",
282+
"cdrs = {\"CDR1\": (31, 35), \"CDR2\": (50, 66), \"CDR3\": (99, 115)}\n",
283+
"highlight_colors = {\"CDR1\": \"orange\", \"CDR2\": \"lightgreen\", \"CDR3\": \"deepskyblue\"}\n",
284+
"scaffold_info = [\n",
285+
" (\n",
286+
" \"scaffold_5\",\n",
287+
" \"QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALLTSGHMTHYEDSVKGRFY\",\n",
288+
" ),\n",
289+
" (\"scaffold_10\", \"SYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDLDYKDDDDKGAAH\"),\n",
290+
"]\n",
291+
"\n",
292+
"plot_cdr_scaffolds(\n",
293+
" reference_seq,\n",
294+
" scaffold_info,\n",
295+
" cdrs,\n",
296+
" highlight_colors,\n",
297+
" font_size=20,\n",
298+
" letter_spacing=3, # increased spacing\n",
299+
" save_path=\"cdrs_scaffolds.svg\",\n",
300+
")"
301+
]
302+
}
303+
],
304+
"metadata": {
305+
"language_info": {
306+
"name": "python"
307+
}
308+
},
309+
"nbformat": 4,
310+
"nbformat_minor": 5
311+
}

0 commit comments

Comments
 (0)