@@ -552,88 +552,88 @@ <h1 id="scicode-leaderboard">SciCode Leaderboard</h1>
552552< tr >
553553< th > Models</ th >
554554< th > Main Problem Resolve Rate</ th >
555- < th > < span style ="background- color:lightgrey "> Subproblem</ span > </ th >
555+ < th > < span style ="color:grey "> Subproblem</ span > </ th >
556556</ tr >
557557</ thead >
558558< tbody >
559559< tr >
560560< td > 🥇 OpenAI o1-preview</ td >
561- < td > < div align ="center "> 7.7</ div > </ td >
562- < td > < div align ="center " style ="background- color:lightgrey "> 28.5</ div > </ td >
561+ < td > < div align ="center "> < strong > 7.7</ strong > </ div > </ td >
562+ < td > < div align ="center " style ="color:grey "> 28.5</ div > </ td >
563563</ tr >
564564< tr >
565565< td > 🥈 Claude3.5-Sonnet</ td >
566- < td > < div align ="center "> 4.6</ div > </ td >
567- < td > < div align ="center " style ="background- color:lightgrey "> 26.0</ div > </ td >
566+ < td > < div align ="center "> < strong > 4.6</ strong > </ div > </ td >
567+ < td > < div align ="center " style ="color:grey "> 26.0</ div > </ td >
568568</ tr >
569569< tr >
570570< td > 🥉 Claude3.5-Sonnet (new)</ td >
571- < td > < div align ="center "> 4.6</ div > </ td >
572- < td > < div align ="center " style ="background- color:lightgrey "> 25.3</ div > </ td >
571+ < td > < div align ="center "> < strong > 4.6</ strong > </ div > </ td >
572+ < td > < div align ="center " style ="color:grey "> 25.3</ div > </ td >
573573</ tr >
574574< tr >
575575< td > Deepseek-Coder-v2</ td >
576- < td > < div align ="center "> 3.1</ div > </ td >
577- < td > < div align ="center " style ="background- color:lightgrey "> 21.2</ div > </ td >
576+ < td > < div align ="center "> < strong > 3.1</ strong > </ div > </ td >
577+ < td > < div align ="center " style ="color:grey "> 21.2</ div > </ td >
578578</ tr >
579579< tr >
580580< td > GPT-4o</ td >
581- < td > < div align ="center "> 1.5</ div > </ td >
582- < td > < div align ="center " style ="background- color:lightgrey "> 25.0</ div > </ td >
581+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
582+ < td > < div align ="center " style ="color:grey "> 25.0</ div > </ td >
583583</ tr >
584584< tr >
585585< td > GPT-4-Turbo</ td >
586- < td > < div align ="center "> 1.5</ div > </ td >
587- < td > < div align ="center " style ="background- color:lightgrey "> 22.9</ div > </ td >
586+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
587+ < td > < div align ="center " style ="color:grey "> 22.9</ div > </ td >
588588</ tr >
589589< tr >
590590< td > OpenAI o1-mini</ td >
591- < td > < div align ="center "> 1.5</ div > </ td >
592- < td > < div align ="center " style ="background- color:lightgrey "> 22.2</ div > </ td >
591+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
592+ < td > < div align ="center " style ="color:grey "> 22.2</ div > </ td >
593593</ tr >
594594< tr >
595595< td > Gemini 1.5 Pro</ td >
596- < td > < div align ="center "> 1.5</ div > </ td >
597- < td > < div align ="center " style ="background- color:lightgrey "> 21.9</ div > </ td >
596+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
597+ < td > < div align ="center " style ="color:grey "> 21.9</ div > </ td >
598598</ tr >
599599< tr >
600600< td > Claude3-Opus</ td >
601- < td > < div align ="center "> 1.5</ div > </ td >
602- < td > < div align ="center " style ="background- color:lightgrey "> 21.5</ div > </ td >
601+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
602+ < td > < div align ="center " style ="color:grey "> 21.5</ div > </ td >
603603</ tr >
604604< tr >
605605< td > Llama-3.1-405B-Chat</ td >
606- < td > < div align ="center "> 1.5</ div > </ td >
607- < td > < div align ="center " style ="background- color:lightgrey "> 19.8</ div > </ td >
606+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
607+ < td > < div align ="center " style ="color:grey "> 19.8</ div > </ td >
608608</ tr >
609609< tr >
610610< td > Claude3-Sonnet</ td >
611- < td > < div align ="center "> 1.5</ div > </ td >
612- < td > < div align ="center " style ="background- color:lightgrey "> 17.0</ div > </ td >
611+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
612+ < td > < div align ="center " style ="color:grey "> 17.0</ div > </ td >
613613</ tr >
614614< tr >
615615< td > Qwen2-72B-Instruct</ td >
616- < td > < div align ="center "> 1.5</ div > </ td >
617- < td > < div align ="center " style ="background- color:lightgrey "> 17.0</ div > </ td >
616+ < td > < div align ="center "> < strong > 1.5</ strong > </ div > </ td >
617+ < td > < div align ="center " style ="color:grey "> 17.0</ div > </ td >
618618</ tr >
619619< tr >
620620< td > Llama-3.1-70B-Chat</ td >
621- < td > < div align ="center "> 0.0</ div > </ td >
622- < td > < div align ="center " style ="background- color:lightgrey "> 17.0</ div > </ td >
621+ < td > < div align ="center "> < strong > 0.0</ strong > </ div > </ td >
622+ < td > < div align ="center " style ="color:grey "> 17.0</ div > </ td >
623623</ tr >
624624< tr >
625625< td > Mixtral-8x22B-Instruct</ td >
626- < td > < div align ="center "> 0.0</ div > </ td >
627- < td > < div align ="center " style ="background- color:lightgrey "> 16.3</ div > </ td >
626+ < td > < div align ="center "> < strong > 0.0</ strong > </ div > </ td >
627+ < td > < div align ="center " style ="color:grey "> 16.3</ div > </ td >
628628</ tr >
629629< tr >
630630< td > Llama-3-70B-Chat</ td >
631- < td > < div align ="center "> 0.0</ div > </ td >
632- < td > < div align ="center " style ="background- color:lightgrey "> 14.6</ div > </ td >
631+ < td > < div align ="center "> < strong > 0.0</ strong > </ div > </ td >
632+ < td > < div align ="center " style ="color:grey "> 14.6</ div > </ td >
633633</ tr >
634634</ tbody >
635635</ table >
636- < p > Note: If the models tie in the Main Problem resolve rate, we will then compare the Subproblems.</ p >
636+ < p > < strong > Note: If the models tie in the Main Problem resolve rate, we will then compare the Subproblems.</ strong > </ p >
637637<!-- Once you've added the results to the submission repository,
638638 bring back the table here -->
639639<!-- include-markdown "leaderboard_table.md" -->
0 commit comments