@@ -343,93 +343,68 @@ <h3 id="limit" class="text-nowrap mt-5">Known limitations</h3>
343343
344344 < script >
345345 const contextTable = document . getElementById ( "16k" ) ;
346- const files = [
347- "codellama_slash_CodeLlama-13b-Instruct-hf-SCORES.json" ,
348- "codellama_slash_CodeLlama-34b-Instruct-hf-SCORES.json" ,
349- "codellama_slash_CodeLlama-7b-Instruct-hf-SCORES.json" ,
350- "deepseek-ai_slash_deepseek-coder-33b-instruct-SCORES.json" ,
351- "deepseek-ai_slash_deepseek-coder-6.7b-instruct-SCORES.json" ,
352- "deepseek-ai_slash_DeepSeek-V2-Chat-SCORES.json" ,
353- "deepseek-ai_slash_DeepSeek-V2-Lite-Chat-SCORES.json" ,
354- "google_slash_codegemma-7b-it-SCORES.json" ,
355- "meta-llama_slash_Meta-Llama-3-70B-Instruct-SCORES.json" ,
356- "meta-llama_slash_Meta-Llama-3-8B-Instruct-SCORES.json" ,
357- "mistralai_slash_Mistral-7B-Instruct-v0.1-SCORES.json" ,
358- "mistralai_slash_Mistral-7B-Instruct-v0.2-SCORES.json" ,
359- "mistralai_slash_Mixtral-8x22B-Instruct-v0.1-SCORES.json" ,
360- "mistralai_slash_Mixtral-8x7B-Instruct-v0.1-SCORES.json" ,
361- "Qwen_slash_CodeQwen1.5-7B-Chat-SCORES.json" ,
362- "Qwen_slash_Qwen1.5-14B-Chat-SCORES.json" ,
363- "Qwen_slash_Qwen1.5-32B-Chat-SCORES.json" ,
364- "Qwen_slash_Qwen1.5-72B-Chat-SCORES.json" ,
365- "Qwen_slash_Qwen1.5-7B-Chat-SCORES.json" ,
366- "gpt-3.5-turbo-0125-SCORES.json" ,
367- "gpt-4-turbo-2024-04-09-SCORES.json" ,
368- "microsoft_slash_Phi-3-mini-128k-instruct-SCORES.json" ,
369- "ise-uiuc_slash_Magicoder-S-DS-6.7B-SCORES.json" ,
370- "claude-3-haiku-20240307-SCORES.json" ,
371- "gemini-1.5-pro-latest-SCORES.json" ,
372- "claude-3-opus-20240229-SCORES.json" ,
373- "claude-3-sonnet-20240229-SCORES.json" ,
374- "CohereForAI_slash_c4ai-command-r-plus-SCORES.json" ,
375- "gpt-4o-2024-05-13-SCORES.json" ,
376- "gemini-1.5-flash-latest-SCORES.json" ,
377- ] ;
378346 const linkMapping = new Map ( [ ] ) ;
379347 const hfLinkPrefix = "https://huggingface.co/" ;
380- files . map ( ( file ) => {
381- modelId = file . slice ( 0 , - 12 ) ;
382-
383- // if _slash_ in modelId, split by _slash_ and get the last one
384- if ( modelId . includes ( "_slash_" ) ) {
385- modelId = modelId . split ( "_slash_" ) ;
386- modelOrg = modelId [ 0 ] ;
387- modelId = modelId [ 1 ] ;
388- url = hfLinkPrefix + modelOrg + "/" + modelId ;
389- linkMapping . set ( modelId , url ) ;
390- } else if ( modelId . startsWith ( "gpt-4-" ) ) {
391- linkMapping . set (
392- modelId ,
393- "https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4" ,
394- ) ;
395- } else if ( modelId . startsWith ( "gpt-3.5-" ) ) {
396- linkMapping . set (
397- modelId ,
398- "https://platform.openai.com/docs/models/gpt-3-5-turbo" ,
399- ) ;
400- } else if ( modelId . startsWith ( "claude-3-" ) ) {
401- linkMapping . set (
402- modelId ,
403- "https://www.anthropic.com/news/claude-3-family" ,
404- ) ;
405- }
406- } ) ;
407- const dataUrlPrefix = "results/repoqa/ntoken_16384/" ;
348+ const dataUrlPrefix = "results/repoqa" ;
408349 const correctColor = "rgba(72, 200, 120" ,
409350 incorrectColor = "rgba(200, 53, 50" ;
410351
411352 // Load data
412353 var data = [ ] ;
413- for ( var i = 0 ; i < files . length ; i ++ ) {
414- var dataUrl = dataUrlPrefix + files [ i ] ;
415- var xhr = new XMLHttpRequest ( ) ;
416- xhr . open ( "GET" , dataUrl , false ) ; // false makes the request synchronous
417- xhr . send ( ) ;
418- if ( xhr . status === 200 ) {
419- dataRow = JSON . parse ( xhr . responseText ) ;
420- dataRow = Object . keys ( dataRow ) . map ( ( key ) => {
421- return {
422- Model : key . split ( "/" ) . pop ( ) ,
423- ...dataRow [ key ] ,
424- } ;
425- } ) ;
426-
427- data = data . concat ( dataRow ) ;
428- } else {
429- alert (
430- "Failed to load data from " + dataUrl + ". Please try again later." ,
431- ) ;
432- }
354+ var dataUrl = dataUrlPrefix + "/COMBINED-RESULTS.json" ;
355+ var xhr = new XMLHttpRequest ( ) ;
356+ xhr . open ( "GET" , dataUrl , false ) ; // false makes the request synchronous
357+ xhr . send ( ) ;
358+
359+ if ( xhr . status === 200 ) {
360+ var scores = JSON . parse ( xhr . responseText ) ;
361+ var scoresMap = new Map ( Object . entries ( scores ) ) ;
362+ scoresMap . forEach ( ( value , modelId ) => {
363+ var result = {
364+ Model : modelId . split ( "/" ) . pop ( ) ,
365+ ...value ,
366+ } ;
367+ data = data . concat ( result ) ;
368+
369+ if ( modelId . includes ( "/" ) ) {
370+ modelId = modelId . split ( "/" ) ;
371+ modelOrg = modelId [ 0 ] ;
372+ modelId = modelId [ 1 ] ;
373+ url = hfLinkPrefix + modelOrg + "/" + modelId ;
374+ linkMapping . set ( modelId , url ) ;
375+ } else if ( modelId . startsWith ( "gpt-4-" ) ) {
376+ linkMapping . set (
377+ modelId ,
378+ "https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4" ,
379+ ) ;
380+ } else if ( modelId . startsWith ( "gpt-3.5-" ) ) {
381+ linkMapping . set (
382+ modelId ,
383+ "https://platform.openai.com/docs/models/gpt-3-5-turbo" ,
384+ ) ;
385+ } else if ( modelId . startsWith ( "claude-3-" ) ) {
386+ linkMapping . set (
387+ modelId ,
388+ "https://www.anthropic.com/news/claude-3-family" ,
389+ ) ;
390+ } else if ( modelId . startsWith ( "gemini-1.5-pro" ) ) {
391+ linkMapping . set (
392+ modelId ,
393+ "https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/#sundar-note" ,
394+ ) ;
395+ } else if ( modelId . startsWith ( "gemini-1.5-flash" ) ) {
396+ linkMapping . set (
397+ modelId ,
398+ "https://deepmind.google/technologies/gemini/flash/" ,
399+ ) ;
400+ } else if ( modelId . startsWith ( "gpt-4o-" ) ) {
401+ linkMapping . set ( modelId , "https://openai.com/index/hello-gpt-4o/" ) ;
402+ }
403+ } ) ;
404+ } else {
405+ alert (
406+ "Failed to load data from " + dataUrl + ". Please try again later." ,
407+ ) ;
433408 }
434409
435410 const globalData = data ;
0 commit comments