11, win_rate , standard_error , n_wins , n_wins_base , n_draws , n_total , discrete_win_rate , mode , avg_length , length_controlled_winrate , lc_standard_error
2- Shopee-SlimMoA-v1 , 75.6142865980535 , 1.27062740591947 , 621 , 184 , 0 , 805 , 77.1428571428572 , community , 1994 , 77.4515432873834 , 0.430175221492396
3- blendaxai-gm-l6-vo31 , 69.11033492869565 , 1.3280735654354865 , 562 , 242 , 1 , 805 , 69.87577639751554 , community , 1809 , 76.91981221023656 , 0.5725365663132986
2+ Shopee-SlimMoA-v1 , 75.61428659805350 , 1.2706274059194700 , 621 , 184 , 0 , 805 , 77.14285714285720 , community , 1994 , 77.4515432873834 , 0.43017522149239600
3+ blendaxai-gm-l6-vo31 , 69.11033492869565 , 1.3280735654354863 , 562 , 242 , 1 , 805 , 69.87577639751554 , community , 1809 , 76.91981221023656 , 0.5725365663132986
44gemma-2-9b-it-WPO-HB , 77.82503168985093 , 1.2355857177790277 , 640 , 163 , 2 , 805 , 79.62732919254658 , community , 2285 , 76.72506842726064 , 0.4242603928637889
55blendaxai-gm-l3-v35 , 73.41035740244067 , 1.254951147343878 , 607 , 196 , 2 , 805 , 75.527950310559 , community , 2186 , 73.37270365010379 , 0.6163911450738288
66gemma-2-9b-it-SimPO , 65.86422561532919 , 1.423459922555078 , 540 , 264 , 1 , 805 , 67.14285714285714 , community , 1833 , 72.3508446939842 , 0.5167873784867067
7- model_hf_model_args_pretrained=mlfoundations-dev__gemma-simpo-reproduction , 67.35102937013792 , 1.4210070002869848 , 557 , 247 , 1 , 805 , 69.25465838509317 , community , 1950 , 71.18995900084634 , 0.5756949353655318
87openpipe-moa-gpt-4-turbo-v1 , 63.15493451236265 , 1.422980098799326 , 515 , 283 , 7 , 805 , 64.40993788819875 , community , 1856 , 68.37866250336802 , 0.7309418614587613
98gemma-2-9b-it-DPO , 65.35922380122982 , 1.402802336467638 , 536 , 268 , 1 , 805 , 66.64596273291924 , community , 2016 , 67.6620382198043 , 0.6605613085864308
109Together-MoA , 59.8688062333292 , 1.434305604543079 , 490 , 314 , 1 , 805 , 60.93167701863354 , community , 1825 , 65.37996976852163 , 0.7392392836781445
@@ -23,7 +22,7 @@ gpt4_1106_preview_verbose,64.30360147101865,1.3348590089025316,525,268,12,805,65
2322gpt-4o-mini-2024-07-18 , 44.65413862507926 , 1.4572395578449813 , 350 , 451 , 4 , 805 , 43.72670807453416 , minimal , 1861 , 50.727144855901976 , 0.8284734951761676
2423Storm-7B , 50.26886905528583 , 1.4728176780737183 , 397 , 408 , 0 , 805 , 49.31677018633541 , community , 2045 , 50.45110959343775 ,
2524gpt4_1106_preview , 50.0 , 0.0 , 0 , 0 , 805 , 805 , 50.0 , minimal , 2049 , 50.0 ,
26- REBEL-Llama-3-8B-Instruct-Armo , 48.43655307668638 , 1.480341435123528 , 394 , 410 , 1 , 805 , 49.00621118012423 , community , 1965 , 49.31429353685712 , 0.7061879308002301
25+ REBEL-Llama-3-8B-Instruct-Armo , 48.43655307668638 , 1.480341435123528 , 394 , 410 , 1 , 805 , 49.006211180124225 , community , 1965 , 49.314293536857114 , 0.7061879308002301
2726Infinity-Instruct-7M-Gen-Llama3_1-70B , 37.46327383827497 , 1.4734130373862548 , 299 , 501 , 5 , 805 , 37.453416149068325 , community , 1654 , 46.10043331712677 , 0.822439983375277
2827Llama-3-Instruct-8B-SimPO-ExPO , 40.63285400856655 , 1.4439449942168028 , 325 , 479 , 1 , 805 , 40.43478260869565 , community , 1765 , 45.78021783946177 ,
2928Llama-3-Instruct-8B-SimPO , 40.52977498461182 , 1.422574464675002 , 319 , 485 , 1 , 805 , 39.68944099378882 , community , 1825 , 44.65131348921881 , 0.8800655791760451
@@ -209,5 +208,4 @@ oasst-sft-pythia-12b,1.790114083180124,0.3985580883049341,13,790,2,805,1.7391304
209208guanaco-13b , 3.469596859739131 , 0.5518606725700214 , 22 , 780 , 3 , 805 , 2.919254658385093 , verified , 1774 , 3.003787329611614 ,
210209guanaco-7b , 2.880002266173913 , 0.5202924149314048 , 21 , 783 , 1 , 805 , 2.670807453416149 , verified , 1364 , 2.871116813131697 ,
211210Qwen1.5-1.8B-Chat , 3.70555681579365 , 0.5811750995496215 , 27 , 774 , 3 , 804 , 3.544776119402985 , verified , 2673 , 2.588498849185137 ,
212- baichuan-13b-chat , 1.9921455615279504 , 0.4176985079331233 , 14 , 790 , 1 , 805 , 1.8012422360248446 , community , 1727 , 2.062170253598568 ,
213- model_hf_model_args_pretrained=mlfoundations-dev__gemma-oh-preferences , 0.005260368511326853 , 0.0018774672393365112 , 0 , 805 , 0 , 805 , 0.0 , community , 196 , 0.010252829751292214 , 0.0007495965900756891
211+ baichuan-13b-chat , 1.9921455615279504 , 0.4176985079331233 , 14 , 790 , 1 , 805 , 1.8012422360248446 , community , 1727 , 2.062170253598568 ,
0 commit comments