|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": null, |
| 6 | + "id": "41895583", |
| 7 | + "metadata": {}, |
| 8 | + "outputs": [], |
| 9 | + "source": [ |
| 10 | + "# import libraries\n", |
| 11 | + "\n", |
| 12 | + "import numpy as np\n", |
| 13 | + "import matplotlib.pyplot as plt\n", |
| 14 | + "import plotly.graph_objects as go\n", |
| 15 | + "from matplotlib.patches import Rectangle" |
| 16 | + ] |
| 17 | + }, |
| 18 | + { |
| 19 | + "cell_type": "raw", |
| 20 | + "id": "00339492", |
| 21 | + "metadata": { |
| 22 | + "vscode": { |
| 23 | + "languageId": "raw" |
| 24 | + } |
| 25 | + }, |
| 26 | + "source": [ |
| 27 | + "seq = \"QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALITSGHMTHYEDSVKGRFTISRDNAKEVLYLQMNSLKPEDTAVYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDIDYKDDDDKGAAHHHHHH\"\n", |
| 28 | + "\n", |
| 29 | + "cdrs = {\n", |
| 30 | + " \"CDR1\": seq[30:35],\n", |
| 31 | + " \"CDR2\": seq[49:66], \n", |
| 32 | + " \"CDR3\": seq[98:115], \n", |
| 33 | + "}\n", |
| 34 | + "\n", |
| 35 | + "for cdr, seq_part in cdrs.items():\n", |
| 36 | + " print(f\"{cdr}: {seq_part}\")\n", |
| 37 | + "\n", |
| 38 | + "# Styling\n", |
| 39 | + "font_family = \"Courier New\"\n", |
| 40 | + "highlight_colors = {\n", |
| 41 | + " \"CDR1\": \"#f4a261\",\n", |
| 42 | + " \"CDR2\": \"#2a9d8f\",\n", |
| 43 | + " \"CDR3\": \"#457b9d\"\n", |
| 44 | + "}" |
| 45 | + ] |
| 46 | + }, |
| 47 | + { |
| 48 | + "cell_type": "code", |
| 49 | + "execution_count": null, |
| 50 | + "id": "1785f8e1", |
| 51 | + "metadata": {}, |
| 52 | + "outputs": [], |
| 53 | + "source": [ |
| 54 | + "def plot_psm_depth_with_cdrs(\n", |
| 55 | + " protein_sequence,\n", |
| 56 | + " mapped_psms,\n", |
| 57 | + " cdrs,\n", |
| 58 | + " highlight_colors,\n", |
| 59 | + " output_file=\"fig_depth_psms_cdrs.svg\",\n", |
| 60 | + " title=\"PSM depth across the protein sequence with CDR regions\",\n", |
| 61 | + "):\n", |
| 62 | + " \"\"\"\n", |
| 63 | + " Plot PSM depth along a protein sequence with CDR regions highlighted.\n", |
| 64 | + "\n", |
| 65 | + " Args:\n", |
| 66 | + " protein_sequence (str): Normalized protein sequence.\n", |
| 67 | + " mapped_psms (list of tuples): Each tuple should contain (start, end, ..., ...).\n", |
| 68 | + " cdrs (dict): Dictionary of CDR labels and their (start, end) positions.\n", |
| 69 | + " highlight_colors (dict): CDR labels mapped to their highlight colors.\n", |
| 70 | + " output_file (str): Path to save the SVG file.\n", |
| 71 | + " title (str): Plot title.\n", |
| 72 | + " \"\"\"\n", |
| 73 | + " depth = np.zeros(len(protein_sequence), dtype=int)\n", |
| 74 | + " for _, (start, end, _, _) in mapped_psms:\n", |
| 75 | + " depth[start:end] += 1\n", |
| 76 | + "\n", |
| 77 | + " fig = go.Figure()\n", |
| 78 | + " fig.add_trace(\n", |
| 79 | + " go.Scatter(\n", |
| 80 | + " x=list(range(len(protein_sequence))),\n", |
| 81 | + " y=depth,\n", |
| 82 | + " mode=\"lines\",\n", |
| 83 | + " line=dict(color=\"steelblue\", width=2),\n", |
| 84 | + " fill=\"tozeroy\",\n", |
| 85 | + " fillcolor=\"rgba(70, 130, 180, 0.2)\",\n", |
| 86 | + " name=\"PSM Depth\",\n", |
| 87 | + " )\n", |
| 88 | + " )\n", |
| 89 | + "\n", |
| 90 | + " for label, (start, end) in cdrs.items():\n", |
| 91 | + " fig.add_shape(\n", |
| 92 | + " type=\"rect\",\n", |
| 93 | + " x0=start,\n", |
| 94 | + " x1=end,\n", |
| 95 | + " y0=0,\n", |
| 96 | + " y1=max(depth),\n", |
| 97 | + " fillcolor=highlight_colors.get(label, \"gray\"),\n", |
| 98 | + " opacity=0.3,\n", |
| 99 | + " line=dict(width=0),\n", |
| 100 | + " layer=\"below\",\n", |
| 101 | + " )\n", |
| 102 | + " fig.add_annotation(\n", |
| 103 | + " x=(start + end) / 2,\n", |
| 104 | + " y=max(depth) + 8,\n", |
| 105 | + " text=label,\n", |
| 106 | + " showarrow=False,\n", |
| 107 | + " font=dict(size=14, color=\"black\"),\n", |
| 108 | + " xanchor=\"center\",\n", |
| 109 | + " )\n", |
| 110 | + "\n", |
| 111 | + " fig.update_layout(\n", |
| 112 | + " title=title,\n", |
| 113 | + " xaxis=dict(\n", |
| 114 | + " title=\"Amino acid position\",\n", |
| 115 | + " tickmode=\"linear\",\n", |
| 116 | + " dtick=10,\n", |
| 117 | + " showline=True,\n", |
| 118 | + " linecolor=\"black\",\n", |
| 119 | + " linewidth=2,\n", |
| 120 | + " showgrid=False,\n", |
| 121 | + " ),\n", |
| 122 | + " yaxis=dict(\n", |
| 123 | + " title=\"Depth (Number of matching PSMs per position)\",\n", |
| 124 | + " showline=True,\n", |
| 125 | + " linecolor=\"black\",\n", |
| 126 | + " linewidth=2,\n", |
| 127 | + " showgrid=False,\n", |
| 128 | + " ),\n", |
| 129 | + " template=\"plotly_white\",\n", |
| 130 | + " height=450,\n", |
| 131 | + " width=1000,\n", |
| 132 | + " margin=dict(t=60),\n", |
| 133 | + " showlegend=False,\n", |
| 134 | + " )\n", |
| 135 | + "\n", |
| 136 | + " fig.show()\n", |
| 137 | + " fig.write_image(output_file, format=\"svg\", scale=2)" |
| 138 | + ] |
| 139 | + }, |
| 140 | + { |
| 141 | + "cell_type": "markdown", |
| 142 | + "id": "3347a030", |
| 143 | + "metadata": {}, |
| 144 | + "source": [ |
| 145 | + "To run the previous function you need to create mapped_psms and protein_norm first you can find in the main pipeline." |
| 146 | + ] |
| 147 | + }, |
| 148 | + { |
| 149 | + "cell_type": "code", |
| 150 | + "execution_count": null, |
| 151 | + "id": "857db62a", |
| 152 | + "metadata": {}, |
| 153 | + "outputs": [], |
| 154 | + "source": [ |
| 155 | + "cdrs = {\"CDR1\": (31, 35), \"CDR2\": (50, 66), \"CDR3\": (99, 115)}\n", |
| 156 | + "highlight_colors = {\"CDR1\": \"orange\", \"CDR2\": \"lightgreen\", \"CDR3\": \"deepskyblue\"}\n", |
| 157 | + "\n", |
| 158 | + "plot_psm_depth_with_cdrs(\n", |
| 159 | + " protein_sequence=protein_norm,\n", |
| 160 | + " mapped_psms=mapped_psms,\n", |
| 161 | + " cdrs=cdrs,\n", |
| 162 | + " highlight_colors=highlight_colors,\n", |
| 163 | + " output_file=\"fig_4C_depth_psms_cdrs.svg\",\n", |
| 164 | + ")" |
| 165 | + ] |
| 166 | + }, |
| 167 | + { |
| 168 | + "cell_type": "code", |
| 169 | + "execution_count": null, |
| 170 | + "id": "43d014e1", |
| 171 | + "metadata": {}, |
| 172 | + "outputs": [], |
| 173 | + "source": [ |
| 174 | + "def plot_cdr_scaffolds(\n", |
| 175 | + " reference_seq,\n", |
| 176 | + " scaffold_info,\n", |
| 177 | + " cdrs,\n", |
| 178 | + " highlight_colors,\n", |
| 179 | + " font_family=\"monospace\",\n", |
| 180 | + " font_size=25,\n", |
| 181 | + " letter_spacing=1.5,\n", |
| 182 | + " save_path=None,\n", |
| 183 | + "):\n", |
| 184 | + " \"\"\"\n", |
| 185 | + " Plot reference sequence and full scaffold sequences aligned, highlighting CDR regions,\n", |
| 186 | + " with adjustable letter spacing for sequence letters.\n", |
| 187 | + "\n", |
| 188 | + " \"\"\"\n", |
| 189 | + "\n", |
| 190 | + " def chars_equal(a, b):\n", |
| 191 | + " return (a in [\"L\", \"I\"] and b in [\"L\", \"I\"]) or a == b\n", |
| 192 | + "\n", |
| 193 | + " def find_alignment_offset(ref, seq):\n", |
| 194 | + " for pos in range(len(ref) - len(seq) + 1):\n", |
| 195 | + " if all(chars_equal(ref[pos + i], seq[i]) for i in range(len(seq))):\n", |
| 196 | + " return pos\n", |
| 197 | + " return 0\n", |
| 198 | + "\n", |
| 199 | + " fig_width = max(10, len(reference_seq) * letter_spacing / 8)\n", |
| 200 | + " fig, ax = plt.subplots(figsize=(fig_width, 6))\n", |
| 201 | + " ax.axis(\"off\")\n", |
| 202 | + "\n", |
| 203 | + " for name, (start, end) in cdrs.items():\n", |
| 204 | + " x0 = (start - 1) * letter_spacing + 1\n", |
| 205 | + " width = (end - start + 1) * letter_spacing\n", |
| 206 | + " ax.add_patch(\n", |
| 207 | + " Rectangle((x0, 4.5), width, 0.8, color=highlight_colors[name], alpha=0.3)\n", |
| 208 | + " )\n", |
| 209 | + " ax.text(\n", |
| 210 | + " x0 + width / 2,\n", |
| 211 | + " 5.5,\n", |
| 212 | + " name,\n", |
| 213 | + " ha=\"center\",\n", |
| 214 | + " va=\"bottom\",\n", |
| 215 | + " fontsize=font_size,\n", |
| 216 | + " fontweight=\"bold\",\n", |
| 217 | + " color=highlight_colors[name],\n", |
| 218 | + " )\n", |
| 219 | + "\n", |
| 220 | + " ax.text(-5, 5, \"Reference\", fontfamily=font_family, fontsize=font_size, ha=\"right\")\n", |
| 221 | + " for i, aa in enumerate(reference_seq):\n", |
| 222 | + " x = i * letter_spacing + 1\n", |
| 223 | + " ax.text(x, 5, aa, fontfamily=font_family, fontsize=font_size, color=\"black\")\n", |
| 224 | + "\n", |
| 225 | + " y_positions = [4, 3, 2]\n", |
| 226 | + " for idx, (scaf_name, seq) in enumerate(scaffold_info):\n", |
| 227 | + " cdr_name = f\"CDR{idx+1}\"\n", |
| 228 | + " color = highlight_colors[cdr_name]\n", |
| 229 | + " offset = find_alignment_offset(reference_seq, seq)\n", |
| 230 | + "\n", |
| 231 | + " start, end = cdrs[cdr_name]\n", |
| 232 | + " rel_start = max(start - 1 - offset, 0)\n", |
| 233 | + " rel_end = min(end - 1 - offset, len(seq) - 1)\n", |
| 234 | + "\n", |
| 235 | + " ax.text(\n", |
| 236 | + " -5,\n", |
| 237 | + " y_positions[idx],\n", |
| 238 | + " f\"{cdr_name} ({scaf_name})\",\n", |
| 239 | + " fontfamily=font_family,\n", |
| 240 | + " fontsize=font_size,\n", |
| 241 | + " ha=\"right\",\n", |
| 242 | + " )\n", |
| 243 | + "\n", |
| 244 | + " for i, aa in enumerate(seq):\n", |
| 245 | + " x = (offset + i) * letter_spacing + 1\n", |
| 246 | + " ax.text(\n", |
| 247 | + " x,\n", |
| 248 | + " y_positions[idx],\n", |
| 249 | + " aa,\n", |
| 250 | + " fontfamily=font_family,\n", |
| 251 | + " fontsize=font_size,\n", |
| 252 | + " color=\"black\",\n", |
| 253 | + " )\n", |
| 254 | + "\n", |
| 255 | + " if rel_end >= rel_start:\n", |
| 256 | + " x0 = (offset + rel_start) * letter_spacing + 1\n", |
| 257 | + " width = (rel_end - rel_start + 1) * letter_spacing\n", |
| 258 | + " ax.add_patch(\n", |
| 259 | + " Rectangle(\n", |
| 260 | + " (x0, y_positions[idx] - 0.2), width, 0.8, color=color, alpha=0.3\n", |
| 261 | + " )\n", |
| 262 | + " )\n", |
| 263 | + "\n", |
| 264 | + " ax.set_xlim(0, len(reference_seq) * letter_spacing + 5)\n", |
| 265 | + " ax.set_ylim(1.5, 6)\n", |
| 266 | + "\n", |
| 267 | + " plt.tight_layout()\n", |
| 268 | + " if save_path:\n", |
| 269 | + " plt.savefig(save_path, format=\"svg\", dpi=600, bbox_inches=\"tight\")\n", |
| 270 | + " plt.show()" |
| 271 | + ] |
| 272 | + }, |
| 273 | + { |
| 274 | + "cell_type": "code", |
| 275 | + "execution_count": null, |
| 276 | + "id": "849fc33e", |
| 277 | + "metadata": {}, |
| 278 | + "outputs": [], |
| 279 | + "source": [ |
| 280 | + "reference_seq = \"QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALITSGHMTHYEDSVKGRFTISRDNAKEVLYLQMNSLKPEDTAVYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDIDYKDDDDKGAAHHHHHH\"\n", |
| 281 | + "\n", |
| 282 | + "cdrs = {\"CDR1\": (31, 35), \"CDR2\": (50, 66), \"CDR3\": (99, 115)}\n", |
| 283 | + "highlight_colors = {\"CDR1\": \"orange\", \"CDR2\": \"lightgreen\", \"CDR3\": \"deepskyblue\"}\n", |
| 284 | + "scaffold_info = [\n", |
| 285 | + " (\n", |
| 286 | + " \"scaffold_5\",\n", |
| 287 | + " \"QVQLQESGGGLVQPGGSLRLSCAASGSASSMYTLAWYRQAPGKQRELVALLTSGHMTHYEDSVKGRFY\",\n", |
| 288 | + " ),\n", |
| 289 | + " (\"scaffold_10\", \"SYFCNLHRLTSSDDDGRTWGQGTQVTVSSAAADYKDHDGDYKDHDLDYKDDDDKGAAH\"),\n", |
| 290 | + "]\n", |
| 291 | + "\n", |
| 292 | + "plot_cdr_scaffolds(\n", |
| 293 | + " reference_seq,\n", |
| 294 | + " scaffold_info,\n", |
| 295 | + " cdrs,\n", |
| 296 | + " highlight_colors,\n", |
| 297 | + " font_size=20,\n", |
| 298 | + " letter_spacing=3, # increased spacing\n", |
| 299 | + " save_path=\"cdrs_scaffolds.svg\",\n", |
| 300 | + ")" |
| 301 | + ] |
| 302 | + } |
| 303 | + ], |
| 304 | + "metadata": { |
| 305 | + "language_info": { |
| 306 | + "name": "python" |
| 307 | + } |
| 308 | + }, |
| 309 | + "nbformat": 4, |
| 310 | + "nbformat_minor": 5 |
| 311 | +} |
0 commit comments