Skip to content

Commit 5b9abbd

Browse files
JialeTomTianganler
andauthored
refactor: repoqa result json (#21)
* refactor:repoqa result json * update scores --------- Co-authored-by: ganler <[email protected]>
1 parent f088873 commit 5b9abbd

File tree

35 files changed

+2712
-138020
lines changed

35 files changed

+2712
-138020
lines changed

.pre-commit-config.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
11
repos:
2+
- repo: https://github.com/pycqa/isort
3+
rev: 5.12.0
4+
hooks:
5+
- id: isort
6+
name: isort (python)
7+
args: ["--profile", "black"]
8+
- repo: https://github.com/psf/black
9+
rev: 22.6.0
10+
hooks:
11+
- id: black
212
- repo: https://github.com/pre-commit/pre-commit-hooks
313
rev: v4.5.0
414
hooks:

repoqa.html

Lines changed: 55 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -343,93 +343,68 @@ <h3 id="limit" class="text-nowrap mt-5">Known limitations</h3>
343343

344344
<script>
345345
const contextTable = document.getElementById("16k");
346-
const files = [
347-
"codellama_slash_CodeLlama-13b-Instruct-hf-SCORES.json",
348-
"codellama_slash_CodeLlama-34b-Instruct-hf-SCORES.json",
349-
"codellama_slash_CodeLlama-7b-Instruct-hf-SCORES.json",
350-
"deepseek-ai_slash_deepseek-coder-33b-instruct-SCORES.json",
351-
"deepseek-ai_slash_deepseek-coder-6.7b-instruct-SCORES.json",
352-
"deepseek-ai_slash_DeepSeek-V2-Chat-SCORES.json",
353-
"deepseek-ai_slash_DeepSeek-V2-Lite-Chat-SCORES.json",
354-
"google_slash_codegemma-7b-it-SCORES.json",
355-
"meta-llama_slash_Meta-Llama-3-70B-Instruct-SCORES.json",
356-
"meta-llama_slash_Meta-Llama-3-8B-Instruct-SCORES.json",
357-
"mistralai_slash_Mistral-7B-Instruct-v0.1-SCORES.json",
358-
"mistralai_slash_Mistral-7B-Instruct-v0.2-SCORES.json",
359-
"mistralai_slash_Mixtral-8x22B-Instruct-v0.1-SCORES.json",
360-
"mistralai_slash_Mixtral-8x7B-Instruct-v0.1-SCORES.json",
361-
"Qwen_slash_CodeQwen1.5-7B-Chat-SCORES.json",
362-
"Qwen_slash_Qwen1.5-14B-Chat-SCORES.json",
363-
"Qwen_slash_Qwen1.5-32B-Chat-SCORES.json",
364-
"Qwen_slash_Qwen1.5-72B-Chat-SCORES.json",
365-
"Qwen_slash_Qwen1.5-7B-Chat-SCORES.json",
366-
"gpt-3.5-turbo-0125-SCORES.json",
367-
"gpt-4-turbo-2024-04-09-SCORES.json",
368-
"microsoft_slash_Phi-3-mini-128k-instruct-SCORES.json",
369-
"ise-uiuc_slash_Magicoder-S-DS-6.7B-SCORES.json",
370-
"claude-3-haiku-20240307-SCORES.json",
371-
"gemini-1.5-pro-latest-SCORES.json",
372-
"claude-3-opus-20240229-SCORES.json",
373-
"claude-3-sonnet-20240229-SCORES.json",
374-
"CohereForAI_slash_c4ai-command-r-plus-SCORES.json",
375-
"gpt-4o-2024-05-13-SCORES.json",
376-
"gemini-1.5-flash-latest-SCORES.json",
377-
];
378346
const linkMapping = new Map([]);
379347
const hfLinkPrefix = "https://huggingface.co/";
380-
files.map((file) => {
381-
modelId = file.slice(0, -12);
382-
383-
// if _slash_ in modelId, split by _slash_ and get the last one
384-
if (modelId.includes("_slash_")) {
385-
modelId = modelId.split("_slash_");
386-
modelOrg = modelId[0];
387-
modelId = modelId[1];
388-
url = hfLinkPrefix + modelOrg + "/" + modelId;
389-
linkMapping.set(modelId, url);
390-
} else if (modelId.startsWith("gpt-4-")) {
391-
linkMapping.set(
392-
modelId,
393-
"https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4",
394-
);
395-
} else if (modelId.startsWith("gpt-3.5-")) {
396-
linkMapping.set(
397-
modelId,
398-
"https://platform.openai.com/docs/models/gpt-3-5-turbo",
399-
);
400-
} else if (modelId.startsWith("claude-3-")) {
401-
linkMapping.set(
402-
modelId,
403-
"https://www.anthropic.com/news/claude-3-family",
404-
);
405-
}
406-
});
407-
const dataUrlPrefix = "results/repoqa/ntoken_16384/";
348+
const dataUrlPrefix = "results/repoqa";
408349
const correctColor = "rgba(72, 200, 120",
409350
incorrectColor = "rgba(200, 53, 50";
410351

411352
// Load data
412353
var data = [];
413-
for (var i = 0; i < files.length; i++) {
414-
var dataUrl = dataUrlPrefix + files[i];
415-
var xhr = new XMLHttpRequest();
416-
xhr.open("GET", dataUrl, false); // false makes the request synchronous
417-
xhr.send();
418-
if (xhr.status === 200) {
419-
dataRow = JSON.parse(xhr.responseText);
420-
dataRow = Object.keys(dataRow).map((key) => {
421-
return {
422-
Model: key.split("/").pop(),
423-
...dataRow[key],
424-
};
425-
});
426-
427-
data = data.concat(dataRow);
428-
} else {
429-
alert(
430-
"Failed to load data from " + dataUrl + ". Please try again later.",
431-
);
432-
}
354+
var dataUrl = dataUrlPrefix + "/COMBINED-RESULTS.json";
355+
var xhr = new XMLHttpRequest();
356+
xhr.open("GET", dataUrl, false); // false makes the request synchronous
357+
xhr.send();
358+
359+
if (xhr.status === 200) {
360+
var scores = JSON.parse(xhr.responseText);
361+
var scoresMap = new Map(Object.entries(scores));
362+
scoresMap.forEach((value, modelId) => {
363+
var result = {
364+
Model: modelId.split("/").pop(),
365+
...value,
366+
};
367+
data = data.concat(result);
368+
369+
if (modelId.includes("/")) {
370+
modelId = modelId.split("/");
371+
modelOrg = modelId[0];
372+
modelId = modelId[1];
373+
url = hfLinkPrefix + modelOrg + "/" + modelId;
374+
linkMapping.set(modelId, url);
375+
} else if (modelId.startsWith("gpt-4-")) {
376+
linkMapping.set(
377+
modelId,
378+
"https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4",
379+
);
380+
} else if (modelId.startsWith("gpt-3.5-")) {
381+
linkMapping.set(
382+
modelId,
383+
"https://platform.openai.com/docs/models/gpt-3-5-turbo",
384+
);
385+
} else if (modelId.startsWith("claude-3-")) {
386+
linkMapping.set(
387+
modelId,
388+
"https://www.anthropic.com/news/claude-3-family",
389+
);
390+
} else if (modelId.startsWith("gemini-1.5-pro")) {
391+
linkMapping.set(
392+
modelId,
393+
"https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/#sundar-note",
394+
);
395+
} else if (modelId.startsWith("gemini-1.5-flash")) {
396+
linkMapping.set(
397+
modelId,
398+
"https://deepmind.google/technologies/gemini/flash/",
399+
);
400+
} else if (modelId.startsWith("gpt-4o-")) {
401+
linkMapping.set(modelId, "https://openai.com/index/hello-gpt-4o/");
402+
}
403+
});
404+
} else {
405+
alert(
406+
"Failed to load data from " + dataUrl + ". Please try again later.",
407+
);
433408
}
434409

435410
const globalData = data;

0 commit comments

Comments
 (0)