|
46 | 46 | { |
47 | 47 | "cell_type": "code", |
48 | 48 | "execution_count": 2, |
49 | | - "id": "10454f90", |
50 | | - "metadata": {}, |
51 | | - "outputs": [], |
52 | | - "source": [ |
53 | | - "#mave_dat = pd.read_csv(\"analysis_files/mave_dat.csv\", index_col=[0])\n", |
54 | | - "#mave_dat = mave_dat.iloc[:-5]\n", |
55 | | - "#mave_dat.head()" |
56 | | - ] |
57 | | - }, |
58 | | - { |
59 | | - "cell_type": "code", |
60 | | - "execution_count": 3, |
61 | 49 | "id": "6d6db2d5", |
62 | 50 | "metadata": {}, |
63 | 51 | "outputs": [ |
|
162 | 150 | "4 dna SUMO1 Protein coding P63165 Homo sapiens " |
163 | 151 | ] |
164 | 152 | }, |
165 | | - "execution_count": 3, |
| 153 | + "execution_count": 2, |
166 | 154 | "metadata": {}, |
167 | 155 | "output_type": "execute_result" |
168 | 156 | } |
169 | 157 | ], |
170 | 158 | "source": [ |
171 | | - "mave_dat = pd.read_csv(\"mave_metadata_20241114.csv\", index_col=0)\n", |
| 159 | + "mave_dat = pd.read_csv(\"analysis_files/mave_metadata_20241114.csv\", index_col=0)\n", |
172 | 160 | "mave_dat.head()" |
173 | 161 | ] |
174 | 162 | }, |
|
182 | 170 | }, |
183 | 171 | { |
184 | 172 | "cell_type": "code", |
185 | | - "execution_count": 4, |
| 173 | + "execution_count": 3, |
186 | 174 | "id": "fe764f5e", |
187 | 175 | "metadata": {}, |
188 | 176 | "outputs": [ |
|
192 | 180 | "'The number of score sets with DNA target sequences is: 482'" |
193 | 181 | ] |
194 | 182 | }, |
195 | | - "execution_count": 4, |
| 183 | + "execution_count": 3, |
196 | 184 | "metadata": {}, |
197 | 185 | "output_type": "execute_result" |
198 | 186 | } |
|
212 | 200 | }, |
213 | 201 | { |
214 | 202 | "cell_type": "code", |
215 | | - "execution_count": 5, |
| 203 | + "execution_count": 4, |
216 | 204 | "id": "ed5a617f", |
217 | 205 | "metadata": {}, |
218 | 206 | "outputs": [ |
|
222 | 210 | "'The number of score sets with protein target sequences is: 582'" |
223 | 211 | ] |
224 | 212 | }, |
225 | | - "execution_count": 5, |
| 213 | + "execution_count": 4, |
226 | 214 | "metadata": {}, |
227 | 215 | "output_type": "execute_result" |
228 | 216 | } |
|
242 | 230 | }, |
243 | 231 | { |
244 | 232 | "cell_type": "code", |
245 | | - "execution_count": 6, |
| 233 | + "execution_count": 5, |
246 | 234 | "id": "b7108a34", |
247 | 235 | "metadata": {}, |
248 | 236 | "outputs": [ |
|
252 | 240 | "'The number of protein coding score sets: 1023'" |
253 | 241 | ] |
254 | 242 | }, |
255 | | - "execution_count": 6, |
| 243 | + "execution_count": 5, |
256 | 244 | "metadata": {}, |
257 | 245 | "output_type": "execute_result" |
258 | 246 | } |
|
272 | 260 | }, |
273 | 261 | { |
274 | 262 | "cell_type": "code", |
275 | | - "execution_count": 7, |
| 263 | + "execution_count": 6, |
276 | 264 | "id": "82659bbd", |
277 | 265 | "metadata": {}, |
278 | 266 | "outputs": [ |
|
282 | 270 | "'The number of regulatory/other noncoding score sets: 41'" |
283 | 271 | ] |
284 | 272 | }, |
285 | | - "execution_count": 7, |
| 273 | + "execution_count": 6, |
286 | 274 | "metadata": {}, |
287 | 275 | "output_type": "execute_result" |
288 | 276 | } |
|
302 | 290 | }, |
303 | 291 | { |
304 | 292 | "cell_type": "code", |
305 | | - "execution_count": 8, |
| 293 | + "execution_count": 7, |
306 | 294 | "id": "f1d53a77", |
307 | 295 | "metadata": {}, |
308 | 296 | "outputs": [ |
|
312 | 300 | "'897 score sets have UniProt IDs'" |
313 | 301 | ] |
314 | 302 | }, |
315 | | - "execution_count": 8, |
| 303 | + "execution_count": 7, |
316 | 304 | "metadata": {}, |
317 | 305 | "output_type": "execute_result" |
318 | 306 | } |
|
331 | 319 | }, |
332 | 320 | { |
333 | 321 | "cell_type": "code", |
334 | | - "execution_count": 9, |
| 322 | + "execution_count": 8, |
335 | 323 | "id": "5eeeaca3", |
336 | 324 | "metadata": {}, |
337 | 325 | "outputs": [ |
|
341 | 329 | "'The number of score sets with protein target sequences is: 582'" |
342 | 330 | ] |
343 | 331 | }, |
344 | | - "execution_count": 9, |
| 332 | + "execution_count": 8, |
345 | 333 | "metadata": {}, |
346 | 334 | "output_type": "execute_result" |
347 | 335 | } |
|
360 | 348 | }, |
361 | 349 | { |
362 | 350 | "cell_type": "code", |
363 | | - "execution_count": 10, |
| 351 | + "execution_count": 9, |
364 | 352 | "id": "7c51d97b", |
365 | 353 | "metadata": {}, |
366 | 354 | "outputs": [ |
|
370 | 358 | "'The number of coding score sets with DNA target sequences is: 441'" |
371 | 359 | ] |
372 | 360 | }, |
373 | | - "execution_count": 10, |
| 361 | + "execution_count": 9, |
374 | 362 | "metadata": {}, |
375 | 363 | "output_type": "execute_result" |
376 | 364 | } |
|
389 | 377 | }, |
390 | 378 | { |
391 | 379 | "cell_type": "code", |
392 | | - "execution_count": 11, |
| 380 | + "execution_count": 10, |
393 | 381 | "id": "48e9db0f", |
394 | 382 | "metadata": {}, |
395 | 383 | "outputs": [ |
|
399 | 387 | "'The number of regulatory/other noncoding score sets with DNA target sequences is: 41'" |
400 | 388 | ] |
401 | 389 | }, |
402 | | - "execution_count": 11, |
| 390 | + "execution_count": 10, |
403 | 391 | "metadata": {}, |
404 | 392 | "output_type": "execute_result" |
405 | 393 | } |
|
418 | 406 | }, |
419 | 407 | { |
420 | 408 | "cell_type": "code", |
421 | | - "execution_count": 12, |
| 409 | + "execution_count": 11, |
422 | 410 | "id": "efa5b7e3", |
423 | 411 | "metadata": {}, |
424 | 412 | "outputs": [ |
|
428 | 416 | "'The average length of protein target sequences is: 105.39003436426117 amino acids'" |
429 | 417 | ] |
430 | 418 | }, |
431 | | - "execution_count": 12, |
| 419 | + "execution_count": 11, |
432 | 420 | "metadata": {}, |
433 | 421 | "output_type": "execute_result" |
434 | 422 | } |
|
449 | 437 | }, |
450 | 438 | { |
451 | 439 | "cell_type": "code", |
452 | | - "execution_count": 13, |
| 440 | + "execution_count": 12, |
453 | 441 | "id": "94061952", |
454 | 442 | "metadata": {}, |
455 | 443 | "outputs": [ |
|
459 | 447 | "'The average length of protein coding DNA target sequences is: 635.2448979591836 nucleotides'" |
460 | 448 | ] |
461 | 449 | }, |
462 | | - "execution_count": 13, |
| 450 | + "execution_count": 12, |
463 | 451 | "metadata": {}, |
464 | 452 | "output_type": "execute_result" |
465 | 453 | } |
|
480 | 468 | }, |
481 | 469 | { |
482 | 470 | "cell_type": "code", |
483 | | - "execution_count": 14, |
| 471 | + "execution_count": 13, |
484 | 472 | "id": "be20cb3b", |
485 | 473 | "metadata": {}, |
486 | 474 | "outputs": [ |
|
490 | 478 | "'The average length of regulatory/other noncoding DNA target sequences is: 353.2682926829268 nucleotides'" |
491 | 479 | ] |
492 | 480 | }, |
493 | | - "execution_count": 14, |
| 481 | + "execution_count": 13, |
494 | 482 | "metadata": {}, |
495 | 483 | "output_type": "execute_result" |
496 | 484 | } |
|
511 | 499 | }, |
512 | 500 | { |
513 | 501 | "cell_type": "code", |
514 | | - "execution_count": 15, |
| 502 | + "execution_count": 14, |
515 | 503 | "id": "86195c62", |
516 | 504 | "metadata": {}, |
517 | 505 | "outputs": [ |
|
521 | 509 | "'The number of protein coding score sets with gene symbols/aliases is: 348'" |
522 | 510 | ] |
523 | 511 | }, |
524 | | - "execution_count": 15, |
| 512 | + "execution_count": 14, |
525 | 513 | "metadata": {}, |
526 | 514 | "output_type": "execute_result" |
527 | 515 | } |
|
542 | 530 | }, |
543 | 531 | { |
544 | 532 | "cell_type": "code", |
545 | | - "execution_count": 16, |
| 533 | + "execution_count": 15, |
546 | 534 | "id": "101ea727", |
547 | 535 | "metadata": {}, |
548 | 536 | "outputs": [ |
|
552 | 540 | "'The number of protein coding score sets with descriptive targets is: 675'" |
553 | 541 | ] |
554 | 542 | }, |
555 | | - "execution_count": 16, |
| 543 | + "execution_count": 15, |
556 | 544 | "metadata": {}, |
557 | 545 | "output_type": "execute_result" |
558 | 546 | } |
|
573 | 561 | }, |
574 | 562 | { |
575 | 563 | "cell_type": "code", |
576 | | - "execution_count": 17, |
| 564 | + "execution_count": 16, |
577 | 565 | "id": "afdf49aa", |
578 | 566 | "metadata": {}, |
579 | 567 | "outputs": [ |
|
583 | 571 | "'The number of regulatory/other noncoding score sets with descriptive targets is: 41'" |
584 | 572 | ] |
585 | 573 | }, |
586 | | - "execution_count": 17, |
| 574 | + "execution_count": 16, |
587 | 575 | "metadata": {}, |
588 | 576 | "output_type": "execute_result" |
589 | 577 | } |
|
604 | 592 | }, |
605 | 593 | { |
606 | 594 | "cell_type": "code", |
607 | | - "execution_count": 18, |
| 595 | + "execution_count": 17, |
608 | 596 | "id": "3b3dc007", |
609 | 597 | "metadata": {}, |
610 | 598 | "outputs": [ |
|
614 | 602 | "'The number of unique gene symbols across examined score set targets is: 526'" |
615 | 603 | ] |
616 | 604 | }, |
617 | | - "execution_count": 18, |
| 605 | + "execution_count": 17, |
618 | 606 | "metadata": {}, |
619 | 607 | "output_type": "execute_result" |
620 | 608 | } |
|
642 | 630 | }, |
643 | 631 | { |
644 | 632 | "cell_type": "code", |
645 | | - "execution_count": 19, |
| 633 | + "execution_count": 18, |
646 | 634 | "id": "4ef61ec4", |
647 | 635 | "metadata": {}, |
648 | 636 | "outputs": [ |
|
1177 | 1165 | " 'human'}" |
1178 | 1166 | ] |
1179 | 1167 | }, |
1180 | | - "execution_count": 19, |
| 1168 | + "execution_count": 18, |
1181 | 1169 | "metadata": {}, |
1182 | 1170 | "output_type": "execute_result" |
1183 | 1171 | } |
|
1196 | 1184 | }, |
1197 | 1185 | { |
1198 | 1186 | "cell_type": "code", |
1199 | | - "execution_count": null, |
| 1187 | + "execution_count": 19, |
1200 | 1188 | "id": "0b188ac0", |
1201 | 1189 | "metadata": {}, |
1202 | 1190 | "outputs": [ |
|
1267 | 1255 | " </tr>\n", |
1268 | 1256 | " <tr>\n", |
1269 | 1257 | " <th>Unique Targets</th>\n", |
1270 | | - " <td>71.00</td>\n", |
| 1258 | + " <td>526.00</td>\n", |
1271 | 1259 | " </tr>\n", |
1272 | 1260 | " </tbody>\n", |
1273 | 1261 | "</table>\n", |
|
1285 | 1273 | "Protein Coding Score Sets with Gene Symbols/Ali... 348.00\n", |
1286 | 1274 | "Protein Coding Score Sets with Descriptive Targets 675.00\n", |
1287 | 1275 | "Regulatory/Other Noncoding Score Sets with Desc... 41.00\n", |
1288 | | - "Unique Targets 71.00" |
| 1276 | + "Unique Targets 526.00" |
1289 | 1277 | ] |
1290 | 1278 | }, |
1291 | | - "execution_count": 20, |
| 1279 | + "execution_count": 19, |
1292 | 1280 | "metadata": {}, |
1293 | 1281 | "output_type": "execute_result" |
1294 | 1282 | } |
|
0 commit comments