Skip to content

Commit 21da479

Browse files
committed
Rerun scoreset breakdown notebook
1 parent 7ba9ba9 commit 21da479

File tree

1 file changed

+38
-50
lines changed

1 file changed

+38
-50
lines changed

notebooks/analysis/mavedb_scoreset_breakdown.ipynb

Lines changed: 38 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,6 @@
4646
{
4747
"cell_type": "code",
4848
"execution_count": 2,
49-
"id": "10454f90",
50-
"metadata": {},
51-
"outputs": [],
52-
"source": [
53-
"#mave_dat = pd.read_csv(\"analysis_files/mave_dat.csv\", index_col=[0])\n",
54-
"#mave_dat = mave_dat.iloc[:-5]\n",
55-
"#mave_dat.head()"
56-
]
57-
},
58-
{
59-
"cell_type": "code",
60-
"execution_count": 3,
6149
"id": "6d6db2d5",
6250
"metadata": {},
6351
"outputs": [
@@ -162,13 +150,13 @@
162150
"4 dna SUMO1 Protein coding P63165 Homo sapiens "
163151
]
164152
},
165-
"execution_count": 3,
153+
"execution_count": 2,
166154
"metadata": {},
167155
"output_type": "execute_result"
168156
}
169157
],
170158
"source": [
171-
"mave_dat = pd.read_csv(\"mave_metadata_20241114.csv\", index_col=0)\n",
159+
"mave_dat = pd.read_csv(\"analysis_files/mave_metadata_20241114.csv\", index_col=0)\n",
172160
"mave_dat.head()"
173161
]
174162
},
@@ -182,7 +170,7 @@
182170
},
183171
{
184172
"cell_type": "code",
185-
"execution_count": 4,
173+
"execution_count": 3,
186174
"id": "fe764f5e",
187175
"metadata": {},
188176
"outputs": [
@@ -192,7 +180,7 @@
192180
"'The number of score sets with DNA target sequences is: 482'"
193181
]
194182
},
195-
"execution_count": 4,
183+
"execution_count": 3,
196184
"metadata": {},
197185
"output_type": "execute_result"
198186
}
@@ -212,7 +200,7 @@
212200
},
213201
{
214202
"cell_type": "code",
215-
"execution_count": 5,
203+
"execution_count": 4,
216204
"id": "ed5a617f",
217205
"metadata": {},
218206
"outputs": [
@@ -222,7 +210,7 @@
222210
"'The number of score sets with protein target sequences is: 582'"
223211
]
224212
},
225-
"execution_count": 5,
213+
"execution_count": 4,
226214
"metadata": {},
227215
"output_type": "execute_result"
228216
}
@@ -242,7 +230,7 @@
242230
},
243231
{
244232
"cell_type": "code",
245-
"execution_count": 6,
233+
"execution_count": 5,
246234
"id": "b7108a34",
247235
"metadata": {},
248236
"outputs": [
@@ -252,7 +240,7 @@
252240
"'The number of protein coding score sets: 1023'"
253241
]
254242
},
255-
"execution_count": 6,
243+
"execution_count": 5,
256244
"metadata": {},
257245
"output_type": "execute_result"
258246
}
@@ -272,7 +260,7 @@
272260
},
273261
{
274262
"cell_type": "code",
275-
"execution_count": 7,
263+
"execution_count": 6,
276264
"id": "82659bbd",
277265
"metadata": {},
278266
"outputs": [
@@ -282,7 +270,7 @@
282270
"'The number of regulatory/other noncoding score sets: 41'"
283271
]
284272
},
285-
"execution_count": 7,
273+
"execution_count": 6,
286274
"metadata": {},
287275
"output_type": "execute_result"
288276
}
@@ -302,7 +290,7 @@
302290
},
303291
{
304292
"cell_type": "code",
305-
"execution_count": 8,
293+
"execution_count": 7,
306294
"id": "f1d53a77",
307295
"metadata": {},
308296
"outputs": [
@@ -312,7 +300,7 @@
312300
"'897 score sets have UniProt IDs'"
313301
]
314302
},
315-
"execution_count": 8,
303+
"execution_count": 7,
316304
"metadata": {},
317305
"output_type": "execute_result"
318306
}
@@ -331,7 +319,7 @@
331319
},
332320
{
333321
"cell_type": "code",
334-
"execution_count": 9,
322+
"execution_count": 8,
335323
"id": "5eeeaca3",
336324
"metadata": {},
337325
"outputs": [
@@ -341,7 +329,7 @@
341329
"'The number of score sets with protein target sequences is: 582'"
342330
]
343331
},
344-
"execution_count": 9,
332+
"execution_count": 8,
345333
"metadata": {},
346334
"output_type": "execute_result"
347335
}
@@ -360,7 +348,7 @@
360348
},
361349
{
362350
"cell_type": "code",
363-
"execution_count": 10,
351+
"execution_count": 9,
364352
"id": "7c51d97b",
365353
"metadata": {},
366354
"outputs": [
@@ -370,7 +358,7 @@
370358
"'The number of coding score sets with DNA target sequences is: 441'"
371359
]
372360
},
373-
"execution_count": 10,
361+
"execution_count": 9,
374362
"metadata": {},
375363
"output_type": "execute_result"
376364
}
@@ -389,7 +377,7 @@
389377
},
390378
{
391379
"cell_type": "code",
392-
"execution_count": 11,
380+
"execution_count": 10,
393381
"id": "48e9db0f",
394382
"metadata": {},
395383
"outputs": [
@@ -399,7 +387,7 @@
399387
"'The number of regulatory/other noncoding score sets with DNA target sequences is: 41'"
400388
]
401389
},
402-
"execution_count": 11,
390+
"execution_count": 10,
403391
"metadata": {},
404392
"output_type": "execute_result"
405393
}
@@ -418,7 +406,7 @@
418406
},
419407
{
420408
"cell_type": "code",
421-
"execution_count": 12,
409+
"execution_count": 11,
422410
"id": "efa5b7e3",
423411
"metadata": {},
424412
"outputs": [
@@ -428,7 +416,7 @@
428416
"'The average length of protein target sequences is: 105.39003436426117 amino acids'"
429417
]
430418
},
431-
"execution_count": 12,
419+
"execution_count": 11,
432420
"metadata": {},
433421
"output_type": "execute_result"
434422
}
@@ -449,7 +437,7 @@
449437
},
450438
{
451439
"cell_type": "code",
452-
"execution_count": 13,
440+
"execution_count": 12,
453441
"id": "94061952",
454442
"metadata": {},
455443
"outputs": [
@@ -459,7 +447,7 @@
459447
"'The average length of protein coding DNA target sequences is: 635.2448979591836 nucleotides'"
460448
]
461449
},
462-
"execution_count": 13,
450+
"execution_count": 12,
463451
"metadata": {},
464452
"output_type": "execute_result"
465453
}
@@ -480,7 +468,7 @@
480468
},
481469
{
482470
"cell_type": "code",
483-
"execution_count": 14,
471+
"execution_count": 13,
484472
"id": "be20cb3b",
485473
"metadata": {},
486474
"outputs": [
@@ -490,7 +478,7 @@
490478
"'The average length of regulatory/other noncoding DNA target sequences is: 353.2682926829268 nucleotides'"
491479
]
492480
},
493-
"execution_count": 14,
481+
"execution_count": 13,
494482
"metadata": {},
495483
"output_type": "execute_result"
496484
}
@@ -511,7 +499,7 @@
511499
},
512500
{
513501
"cell_type": "code",
514-
"execution_count": 15,
502+
"execution_count": 14,
515503
"id": "86195c62",
516504
"metadata": {},
517505
"outputs": [
@@ -521,7 +509,7 @@
521509
"'The number of protein coding score sets with gene symbols/aliases is: 348'"
522510
]
523511
},
524-
"execution_count": 15,
512+
"execution_count": 14,
525513
"metadata": {},
526514
"output_type": "execute_result"
527515
}
@@ -542,7 +530,7 @@
542530
},
543531
{
544532
"cell_type": "code",
545-
"execution_count": 16,
533+
"execution_count": 15,
546534
"id": "101ea727",
547535
"metadata": {},
548536
"outputs": [
@@ -552,7 +540,7 @@
552540
"'The number of protein coding score sets with descriptive targets is: 675'"
553541
]
554542
},
555-
"execution_count": 16,
543+
"execution_count": 15,
556544
"metadata": {},
557545
"output_type": "execute_result"
558546
}
@@ -573,7 +561,7 @@
573561
},
574562
{
575563
"cell_type": "code",
576-
"execution_count": 17,
564+
"execution_count": 16,
577565
"id": "afdf49aa",
578566
"metadata": {},
579567
"outputs": [
@@ -583,7 +571,7 @@
583571
"'The number of regulatory/other noncoding score sets with descriptive targets is: 41'"
584572
]
585573
},
586-
"execution_count": 17,
574+
"execution_count": 16,
587575
"metadata": {},
588576
"output_type": "execute_result"
589577
}
@@ -604,7 +592,7 @@
604592
},
605593
{
606594
"cell_type": "code",
607-
"execution_count": 18,
595+
"execution_count": 17,
608596
"id": "3b3dc007",
609597
"metadata": {},
610598
"outputs": [
@@ -614,7 +602,7 @@
614602
"'The number of unique gene symbols across examined score set targets is: 526'"
615603
]
616604
},
617-
"execution_count": 18,
605+
"execution_count": 17,
618606
"metadata": {},
619607
"output_type": "execute_result"
620608
}
@@ -642,7 +630,7 @@
642630
},
643631
{
644632
"cell_type": "code",
645-
"execution_count": 19,
633+
"execution_count": 18,
646634
"id": "4ef61ec4",
647635
"metadata": {},
648636
"outputs": [
@@ -1177,7 +1165,7 @@
11771165
" 'human'}"
11781166
]
11791167
},
1180-
"execution_count": 19,
1168+
"execution_count": 18,
11811169
"metadata": {},
11821170
"output_type": "execute_result"
11831171
}
@@ -1196,7 +1184,7 @@
11961184
},
11971185
{
11981186
"cell_type": "code",
1199-
"execution_count": null,
1187+
"execution_count": 19,
12001188
"id": "0b188ac0",
12011189
"metadata": {},
12021190
"outputs": [
@@ -1267,7 +1255,7 @@
12671255
" </tr>\n",
12681256
" <tr>\n",
12691257
" <th>Unique Targets</th>\n",
1270-
" <td>71.00</td>\n",
1258+
" <td>526.00</td>\n",
12711259
" </tr>\n",
12721260
" </tbody>\n",
12731261
"</table>\n",
@@ -1285,10 +1273,10 @@
12851273
"Protein Coding Score Sets with Gene Symbols/Ali... 348.00\n",
12861274
"Protein Coding Score Sets with Descriptive Targets 675.00\n",
12871275
"Regulatory/Other Noncoding Score Sets with Desc... 41.00\n",
1288-
"Unique Targets 71.00"
1276+
"Unique Targets 526.00"
12891277
]
12901278
},
1291-
"execution_count": 20,
1279+
"execution_count": 19,
12921280
"metadata": {},
12931281
"output_type": "execute_result"
12941282
}

0 commit comments

Comments
 (0)