Skip to content

Commit 6ebfcac

Browse files
committed
Update plotting
1 parent 11c1953 commit 6ebfcac

File tree

4 files changed

+70
-48
lines changed

4 files changed

+70
-48
lines changed
2.22 MB
Loading
210 KB
Loading

scripts/ProteinGym_runs/results/dca_esm_and_hybrid_opt_results_clean.csv

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,48 @@ No.,Dataset,N_Variants,N_Max_Muts,Untrained_Performance_DCA,Untrained_Performanc
109109
118,S22A1_HUMAN_Yee_2023_abundance,9803,1,0.5783901778318246,0.6049457579599515,0.6265223214824237,0.5827395139508005,0.6325708009489824,0.6378514151439854,0.5657640807536206,0.6308098142704518,0.6459163580319655,0.6334508915409808,0.7103271508280004,0.7154079417238408,9803,9703,9603,8803,5032
110110
119,S22A1_HUMAN_Yee_2023_activity,10094,1,0.5427328121851095,0.5559350277107161,0.563158678641977,0.5432530125807854,0.5870974556591995,0.6114640727988875,0.5511329675329357,0.6063675579244358,0.6006432373043168,0.6011629865892636,0.6539786983369106,0.6591219477477889,10094,9994,9894,9094,5130
111111
120,SBI_STAAM_Tsuboyama_2023_2JVG,1025,1,0.47470827802722126,0.21294576254348396,0.6816863232777064,0.5377788988571398,0.443964729828563,0.7822287081898315,0.5227508790754878,0.6193069804052069,0.801038448140637,nan,nan,nan,1025,925,825,nan,153
112+
121,SC6A4_HUMAN_Young_2021,11576,1,0.5457407276755958,0.5216892626232456,0.4842819640132234,0.545829231995369,0.5676597020594244,0.5811443075078228,0.5455502688159861,0.5632868683142125,0.5853427609784542,0.5754170431612782,0.6196542734035645,0.6150262537405341,11576,11476,11376,10576,6254
113+
122,SCIN_STAAR_Tsuboyama_2023_2QFF,1212,1,0.1060686633210642,0.38118875190472196,0.5699742691658471,0.12248628823649155,0.45144422107227034,0.7130757530755111,0.383485599128105,0.5904395122247938,0.8360862510634989,0.56251713967182,0.777641336437256,0.881865457252142,1212,1112,1012,212,450
114+
124,SERC_HUMAN_Xie_2023,1914,1,0.5198799640868612,0.5028905622752997,0.458774440194722,0.4648799157974788,0.518281504423488,0.5552394867619885,0.525628762529798,0.5678990222521046,0.5664379890185185,0.5841845589343698,0.6369700412939021,0.626924895903814,1914,1814,1714,914,1606
115+
125,SHOC2_HUMAN_Kwon_2022,10972,1,0.31547221417409194,0.3735314705751871,0.34777805145899826,0.1816484353020659,0.3832629304420021,0.3536330852600139,0.3092701103178369,0.39525028830134706,0.39781122978504524,0.3458903055791938,0.4496095814663275,0.40647595428589506,10972,10872,10772,9972,5598
116+
126,SOX30_HUMAN_Tsuboyama_2023_7JJK,1010,1,0.2064985080477759,0.34196559666222814,0.3848601590299928,0.4370072617193959,0.5366871383041059,0.7074102980918471,0.6016285906565677,0.6155125545203876,0.8402197771305179,nan,nan,nan,1010,910,810,nan,148
117+
129,SQSTM_MOUSE_Tsuboyama_2023_2RRU,707,1,0.48236419220983223,0.3372898200362942,0.6676043208790352,0.593717105145836,0.5703482886252531,0.8296700578537867,0.5951916037081594,0.7502768205774814,0.869242757874154,nan,nan,nan,707,607,507,nan,117
118+
130,SRC_HUMAN_Ahler_2019,3372,1,0.43074014270269706,0.5597716563081357,0.6201767878255111,0.5072312971878398,0.4765162811653047,0.6490329306524109,0.5052707544810872,0.5657544587053533,0.6276279789952638,0.709674829332756,0.7295956022587283,0.7362608550073382,3372,3272,3172,2372,2635
119+
131,SRC_HUMAN_Chakraborty_2023_binding-DAS_25uM,3637,1,0.3794169212720729,0.478703663566242,0.53533446748227,0.4575641017056756,0.5294129869660518,0.553483003400323,0.48949067658315,0.5160886905390629,0.5796611736820453,0.6225928232347172,0.6622353395167132,0.6550171035090475,3637,3537,3437,2637,2721
120+
132,SRC_HUMAN_Nguyen_2022,3366,1,0.3618707030863949,0.4798505335457928,0.5254314761272385,0.3391142579473501,0.491799055676241,0.5235127559222374,0.46431020857445504,0.5079228292791076,0.5318524405907179,0.6133815800916715,0.6410457682283388,0.6491172449218118,3366,3266,3166,2366,2641
121+
133,SUMO1_HUMAN_Weile_2017,1700,1,0.4202750508550765,0.49361802873003224,0.4723518014278685,0.43724456921909843,0.5177957403498986,0.5163841636266264,0.45654978850846695,0.5422520916675964,0.5347861470160654,0.5447731254975344,0.6382164744942628,0.5556527724692149,1700,1600,1500,700,603
122+
134,SYUA_HUMAN_Newberry_2020,2497,1,0.06380835807125626,0.17603273053052568,0.4018318715232637,0.030676656991472102,0.3050278133960828,0.49545195406449777,0.06346800312936872,0.4532941030753343,0.5600952895932856,0.18983259079915304,0.5854554526704274,0.599141462064945,2497,2397,2297,1497,860
123+
135,TADBP_HUMAN_Bolognesi_2019,1196,1,0.051321621475514424,0.22025446138770352,0.5367168779058517,0.0857028145909641,0.6081560394553069,0.6114516078018452,0.11280991964013713,0.7178273331831784,0.6975466813347956,0.1378744557284951,0.7404696826016625,0.804663356947621,1196,1096,996,196,1421
124+
136,TAT_HV1BR_Fernandes_2016,1577,1,0.3778608402253199,0.321464799463336,0.37025198507793133,0.40504907652898087,0.41961274689396205,0.4315035568654124,0.432394646165624,0.4673820198114287,0.4673449878970876,0.583098604263967,0.57669001614572,0.5984546879716307,1577,1477,1377,577,537
125+
137,TPK1_HUMAN_Weile_2017,3181,1,0.19048332019038014,0.29174985326293595,0.24565349047855753,0.18792778983901204,0.26849401681494883,0.24593966308907309,0.050319820469659306,0.2650814575960433,0.2405747477445055,0.21937357206932365,0.34221008769705663,0.2881486872881473,3181,3081,2981,2181,1276
126+
138,TPMT_HUMAN_Matreyek_2018,3648,1,0.5144356265119117,0.5030000509839055,0.543490866434038,0.5119386943869986,0.5265029949006056,0.5491286357471789,0.50636880029725,0.5227626684920824,0.57623660913325,0.5442451840109759,0.5738735583131437,0.5850763512211299,3648,3548,3448,2648,1356
127+
139,TPOR_HUMAN_Bridgford_2020,562,1,0.3628498746675898,0.23155185338433887,0.49738434282008415,0.3899250555641096,0.45055850246463186,0.5227216901668628,0.4622027633064958,0.5183642432244521,0.5265056390119173,nan,nan,nan,562,462,362,nan,720
128+
140,TRPC_SACS2_Chan_2017,1519,1,0.5259352519003024,0.58025330104515,0.6045552018958693,0.5665013137201305,0.6002054138555706,0.6324002755335841,0.5925246437987312,0.5852334034559911,0.6667168090935982,0.6683046447890254,0.758573367626568,0.7485554443878845,1519,1419,1319,519,1028
129+
141,TRPC_THEMA_Chan_2017,1519,1,0.3740257810945499,0.4977474949168521,0.4760985190928834,0.43344020636345426,0.5067866531329004,0.572774302730931,0.477583364998662,0.5698155118806562,0.6017810914462008,0.6556047736356596,0.7145215794153164,0.7157408689020401,1519,1419,1319,519,1048
130+
142,UBC9_HUMAN_Weile_2017,2563,1,0.4300148269635904,0.4672397690856626,0.4074945982291688,0.4538201744915946,0.48744018183917515,0.5005487986178968,0.4642209370779951,0.5900692318410755,0.5000454288949262,0.5733204683681983,0.6755730429452588,0.6348134531501661,2563,2463,2363,1563,940
131+
144,VG08_BPP22_Tsuboyama_2023_2GP8,723,1,0.41455950504987665,0.4640389775011293,0.647044858378144,0.5601430117868146,0.7071481235943086,0.7843313790873313,0.5826960537798762,0.7726361240342204,0.8255805467575497,nan,nan,nan,723,623,523,nan,118
132+
145,VKOR1_HUMAN_Chiasson_2020_abundance,2695,1,0.3878902388296152,0.4500894437991905,0.6095929890243181,0.3892330385891188,0.3827545084680324,0.6403368708884017,0.4252912331180519,0.5334878683649218,0.653849078237902,0.47336396891598265,0.7150159981930743,0.7634233723061346,2695,2595,2495,1695,1037
133+
146,VKOR1_HUMAN_Chiasson_2020_activity,697,1,0.3814983564109183,0.3930320000762038,0.29834682165448223,0.36828972897652545,0.3922345508946637,0.3690537608386781,0.3719667862837802,0.4009431068974133,0.4005387658857601,nan,nan,nan,697,597,497,nan,241
134+
147,VRPI_BPT7_Tsuboyama_2023_2WNM,1047,1,0.2010948147385621,0.3010372195302911,0.6157236927867243,0.4478820211953952,0.5849022533217165,0.7287910725626284,0.6500710173103692,0.7216362276164965,0.804935291646543,nan,nan,nan,1047,947,847,nan,149
135+
148,A4_HUMAN_Seuma_2022,14811,2,0.4141012646279537,0.42087615110193743,0.45143198591573047,0.41598127582424654,0.5807718088392064,0.594625991419763,0.4562524307018595,0.5224128406832542,0.6244713890372375,0.6555180669214519,0.7771214702557847,0.7173040203063673,14811,14711,14611,13811,9231
136+
149,AMFR_HUMAN_Tsuboyama_2023_4G3O,2972,2,0.34047220661690736,-0.11288367180484951,0.4800191369946087,0.5228947837614569,0.39038742596230835,0.7369811158094607,0.5386910075262479,0.5638465577981644,0.8258553356127101,0.5495839696577909,0.8117831005744485,0.8380616080351925,2972,2872,2772,1972,558
137+
150,BBC1_YEAST_Tsuboyama_2023_1TG0,2069,2,0.33271413877243944,0.4857526297646051,0.5509563279344588,0.5009162821716936,0.4849365219567278,0.6162984725546327,0.5560236067914881,0.6703113879177816,0.7209845622264189,0.6344007275041881,0.8252398171949942,0.7043885676026146,2069,1969,1869,1069,520
138+
151,BCHB_CHLTE_Tsuboyama_2023_2KRU,1572,2,0.45268111678150086,0.33357648075222357,0.5686553042047826,0.669345818083031,0.6874792090591195,0.7686933884493037,0.6704952015597885,0.6548760689576898,0.7538389467370891,0.7750176818696303,0.8801819653416314,0.9306419458719548,1572,1472,1372,572,434
139+
152,CAPSD_AAV2S_Sinai_2021,42328,28,0.43420949107497625,-0.08218679529654699,0.29384415430364613,0.4360588699637501,0.3969804320355039,0.4992230036608686,0.5400977067445235,0.5268906619182812,0.41422726596005827,0.6806210299552098,0.7567637255694606,0.7723000496729749,42328,42228,42128,41328,20149
140+
153,CATR_CHLRE_Tsuboyama_2023_2AMI,1903,2,0.5311438784466329,0.628458329846203,0.735100753593251,0.6198173494036866,0.6828705492399618,0.8300650657947163,0.6495453318438739,0.8039718288207066,0.8469737998972817,0.709880863392241,0.9262422578375454,0.8199844581402064,1903,1803,1703,903,530
141+
154,CBPA2_HUMAN_Tsuboyama_2023_1O6X,2068,2,0.7176743655459781,0.07054025019326544,0.8612942051783469,0.7438676700233379,0.7748394688916098,0.887675531490518,0.771535450959457,0.7873357938484106,0.9041276288488629,0.8414504511728774,0.9179101726762151,0.8896213597916778,2068,1968,1868,1068,548
142+
155,CBX4_HUMAN_Tsuboyama_2023_2K28,2282,2,0.610252060405259,0.6210470808563223,0.6081920888099694,0.6436012416143375,0.6514258870835755,0.7805585204701242,0.6870418496960732,0.7391401425881441,0.7963705256381465,0.7184846470788941,0.8882731597549787,0.8258175338978712,2282,2182,2082,1282,486
143+
156,CSN4_MOUSE_Tsuboyama_2023_1UFM,3295,2,0.48659447009400725,0.5956441575221731,0.7462975060252841,0.502036631900777,0.6395561170702402,0.8590120387629596,0.5433795142062805,0.7780176121982608,0.8749640933938232,0.6360672066015975,0.9233412288724518,0.9073448598185953,3295,3195,3095,2295,700
144+
157,CUE1_YEAST_Tsuboyama_2023_2MYX,1580,2,0.46026780213862545,0.29855429261138827,0.6295050905247971,0.5088233501189253,0.5363767829816213,0.7938870559717721,0.583166216490856,0.6466962211446955,0.8212147541149409,0.6981932590653837,0.8455887345565516,0.8942496548894986,1580,1480,1380,580,419
145+
158,D7PM05_CLYGR_Somermeyer_2022,24515,23,0.625539418548384,0.1099313474100974,0.5393639221742506,0.6013727288289953,nan,0.5884560450422001,0.6539696940363787,0.6516432430021644,0.6721457214196221,0.7127534845372306,0.7394345034114357,nan,24515,24415,24315,23515,4628
146+
159,DLG4_HUMAN_Faure_2021,6976,2,0.43356405327473974,0.6209999362508375,0.738120082385577,0.5714567964399738,0.6933388749607303,0.7744813074129586,0.6349196915517866,0.7306025997740245,0.7648126464471569,0.7350077311902942,0.8118488773248407,0.790469594463036,6976,6876,6776,5976,5307
147+
160,DNJA1_HUMAN_Tsuboyama_2023_2LO1,2264,2,0.7809281393936109,0.7563329777924263,0.8395669039272611,0.7952424868782203,0.8293827963869247,0.8990315063707124,0.823396043145748,0.8431831118776323,0.8931515052194627,0.8705396629083446,0.9452994725350512,0.9143212684444415,2264,2164,2064,1264,563
148+
161,DOCK1_MOUSE_Tsuboyama_2023_2M0Y,2915,2,0.4868281482314094,0.30042678133544815,0.5633147559494596,0.5742909578720252,0.5220801804898201,0.716635771003406,0.6348800711748903,0.6537343725910945,0.7112221589676033,0.6681654815709037,0.8089757285829563,0.738926110195409,2915,2815,2715,1915,639
149+
162,EPHB2_HUMAN_Tsuboyama_2023_1F0M,1960,2,0.768894826394754,0.7676204208166675,0.8512498843962328,0.788578042913721,0.8075990914517256,0.8572958758626966,0.7873879296202516,0.8420298947451057,0.8984260683545839,0.8118956316436496,0.9052034747971601,0.9086576564210681,1960,1860,1760,960,525
150+
163,F7YBW8_MESOW_Ding_2023,7922,10,0.5314533450217397,0.34925924494915067,0.6758085876551873,0.6599259157196231,0.5997058741235249,0.7513136872750791,0.6818895131597839,0.7174401829879349,0.7427065235543165,0.7362585294121639,0.7642590484358062,0.7555521863157263,7922,7822,7722,6922,1329
151+
164,F7YBW8_MESOW_Aakre_2015,9192,4,0.40423469006146223,0.1350482226848431,0.13466118499332658,0.404660845745217,0.4053799723416281,0.3587571369684241,0.4817443545606126,0.4039694203320132,0.44663749332293123,0.47988656727026047,0.5238386889524845,0.49333826713642914,9192,9092,8992,8192,1484
152+
165,FECA_ECOLI_Tsuboyama_2023_2D1U,1886,2,0.40408514501427906,0.3139309947296232,0.5738858975054831,0.4523259703428921,0.5052927011009388,0.6656143272779307,0.4769453507671686,0.5682842582146705,0.7580245116989018,0.6098073159278943,0.7656754659582231,nan,1886,1786,1686,886,346
153+
166,GCN4_YEAST_Staller_2018,2638,44,0.25011546899669806,-0.006027620813706041,0.22764968209696385,0.24358636362901392,0.241174939429571,nan,0.2544759868327667,0.36879030431219906,nan,0.2859991050918842,0.5125215331300973,nan,2638,2538,2438,1638,684
154+
167,GFP_AEQVI_Sarkisyan_2016,51714,15,0.6406366653494072,0.1336688728267403,0.6860965519817945,0.6422492276272843,0.6495590297325259,nan,0.6486786880849276,0.6360629034990605,nan,0.7463828216993244,0.7711781697848346,nan,51714,51614,51514,50714,8310
155+
168,GRB2_HUMAN_Faure_2021,63366,2,0.5258434005381363,0.5367412810228084,0.7216670654700138,0.5839763004949682,0.6852622116666458,0.697364904206813,0.6746840807821936,0.7103753714289707,0.7316071269759475,0.708765178129165,0.7742172713240596,0.7938850307943259,63366,63266,63166,62366,13344
156+
169,HECD1_HUMAN_Tsuboyama_2023_3DKM,5586,2,0.28623326119763287,0.2150000228393089,0.2406307470397028,0.5991137006846494,0.6520916880669738,0.7123845116845012,0.671240119874008,0.6713008248409833,0.715769538708621,0.6959516975099861,0.7840773924211748,0.7706682813598381,5586,5486,5386,4586,986

scripts/ProteinGym_runs/run_performance_tests_proteingym_hybrid_dca_llm.py

Lines changed: 25 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,7 @@ def plot_csv_data(csv, plot_name):
453453
plt.legend()
454454
plt.tight_layout()
455455
plt.ylim(0.0, 1.0)
456-
plt.ylabel(r'|Spearman $\rho$|')
456+
plt.ylabel(r'Spearman $\rho$')
457457
adjust_text(train_test_size_texts, expand=(1.2, 2))
458458
plt.savefig(os.path.join(os.path.dirname(__file__), f'{plot_name}.png'), dpi=300)
459459
print('Saved file as ' + os.path.join(os.path.dirname(__file__), f'{plot_name}.png') + '.')
@@ -482,55 +482,32 @@ def plot_csv_data(csv, plot_name):
482482
'tab:green','limegreen', 'mediumseagreen', 'turquoise',
483483
'tab:red', 'indianred', 'red', 'darkred']
484484
)
485-
plt.ylim(-0.09, 1.09)
486-
plt.ylabel(r'|Spearmanr $\rho$|')
485+
plt.ylabel(r'Spearmanr $\rho$')
487486
sns.swarmplot(df_, color='black')
488-
print(df.columns)
489-
dset_ns_y_test = [
490-
df['N_Y_test'].to_list(),
491-
df['N_Y_test_100'].to_list(),
492-
df['N_Y_test_200'].to_list(),
493-
df['N_Y_test_1000'].to_list(),
494-
df['N_Y_test'].to_list(),
495-
df['N_Y_test_100'].to_list(),
496-
df['N_Y_test_200'].to_list(),
497-
df['N_Y_test_1000'].to_list(),
498-
df['N_Y_test'].to_list(),
499-
df['N_Y_test_100'].to_list(),
500-
df['N_Y_test_200'].to_list(),
501-
df['N_Y_test_1000'].to_list()
487+
dset_perfs = [
488+
dset_dca_perfs,
489+
dset_hybrid_perfs_dca_100,
490+
dset_hybrid_perfs_dca_200,
491+
dset_hybrid_perfs_dca_1000,
492+
dset_esm_perfs,
493+
dset_hybrid_perfs_dca_esm_100,
494+
dset_hybrid_perfs_dca_esm_200,
495+
dset_hybrid_perfs_dca_esm_1000,
496+
dset_prosst_perfs,
497+
dset_hybrid_perfs_dca_prosst_100,
498+
dset_hybrid_perfs_dca_prosst_200,
499+
dset_hybrid_perfs_dca_prosst_1000
502500
]
503-
for n in range(0, len(dset_ns_y_test)):
501+
for n in range(0, len(dset_perfs)):
504502
plt.text(
505503
n + 0.15, -0.075,
506-
[
507-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_dca_perfs):.2f}',
508-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_100):.2f}',
509-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_200):.2f}',
510-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_1000):.2f}',
511-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_esm_perfs):.2f}',
512-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_esm_100):.2f}',
513-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_esm_200):.2f}',
514-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_esm_1000):.2f}',
515-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_prosst_perfs):.2f}',
516-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_prosst_100):.2f}',
517-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_prosst_200):.2f}',
518-
r'$\overline{|\rho|}=$' + f'{np.nanmean(dset_hybrid_perfs_dca_prosst_1000):.2f}'
519-
][n]
520-
)
521-
plt.text(
522-
n + 0.15, -0.05,
523-
r'$\overline{N_{Y_\mathrm{test}}}=$' + f'{int(np.nanmean(np.array(dset_ns_y_test)[n]))}'
504+
r'$\overline{\rho}=$' + f'{np.nanmean(dset_perfs[n]):.2f}\n'
505+
+ r'$N_\mathrm{Datasets}=$' + f'{np.count_nonzero(~np.isnan(np.array(dset_perfs)[n]))}'
524506
)
525-
plt.text(
526-
n + 0.15, -0.025,
527-
r'$N_\mathrm{Datasets}=$' + f'{np.count_nonzero(~np.isnan(np.array(dset_ns_y_test)[n]))}'
528-
)
529-
print(f'\n{np.shape(dset_ns_y_test)[1]} datasets tested with N_Test\'s at N_Train\'s =\n'
530-
f' 0 100 200 1000 0 100 200 1000 0 100 200 1000\n'
531-
f'{np.nan_to_num(dset_ns_y_test).T.astype("int")}\n')
532507
plot.set_xticks(range(len(plot.get_xticklabels())))
533508
plot.set_xticklabels(plot.get_xticklabels(), rotation=45, horizontalalignment='right')
509+
plt.ylim(-0.09, 1.09)
510+
plt.margins(0.05)
534511
plt.tight_layout()
535512
plt.savefig(os.path.join(os.path.dirname(__file__), f'{plot_name}_violin.png'), dpi=300)
536513
print('Saved file as ' + os.path.join(os.path.dirname(__file__), f'{plot_name}_violin.png') + '.')
@@ -585,11 +562,11 @@ def plot_csv_data(csv, plot_name):
585562
start_i = 0
586563
already_tested_is = []
587564

588-
compute_performances(
589-
mut_data=combined_mut_data,
590-
start_i=start_i,
591-
already_tested_is=already_tested_is
592-
)
565+
#compute_performances(
566+
# mut_data=combined_mut_data,
567+
# start_i=start_i,
568+
# already_tested_is=already_tested_is
569+
#)
593570

594571
with open(out_results_csv, 'r') as fh:
595572
lines = fh.readlines()

0 commit comments

Comments
 (0)