From fb7901486f63e1d2ac1d7fa37c048629dc8f4c7d Mon Sep 17 00:00:00 2001 From: Rui Vieira Date: Mon, 9 Jun 2025 10:38:28 +0100 Subject: [PATCH] feat(RHOAIENG-26883): Add ConfigMap with list of LMEval's tasks and descriptions --- config/base/kustomization.yaml | 1 + config/configmaps/kustomization.yaml | 3 + config/configmaps/trustyai-lmeval-tasks.yaml | 27217 +++++++++++++++++ 3 files changed, 27221 insertions(+) create mode 100644 config/configmaps/kustomization.yaml create mode 100644 config/configmaps/trustyai-lmeval-tasks.yaml diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml index f50d41df3..354f0d22b 100644 --- a/config/base/kustomization.yaml +++ b/config/base/kustomization.yaml @@ -5,6 +5,7 @@ resources: - ../crd - ../rbac - ../manager + - ../configmaps commonLabels: app.kubernetes.io/part-of: trustyai diff --git a/config/configmaps/kustomization.yaml b/config/configmaps/kustomization.yaml new file mode 100644 index 000000000..67bf8d255 --- /dev/null +++ b/config/configmaps/kustomization.yaml @@ -0,0 +1,3 @@ +--- +resources: + - trustyai-lmeval-tasks.yaml diff --git a/config/configmaps/trustyai-lmeval-tasks.yaml b/config/configmaps/trustyai-lmeval-tasks.yaml new file mode 100644 index 000000000..1b0ee0f46 --- /dev/null +++ b/config/configmaps/trustyai-lmeval-tasks.yaml @@ -0,0 +1,27217 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: trustyai-lmeval-tasks + labels: + app.kubernetes.io/name: configmap + app.kubernetes.io/instance: trustyai-lmeval-tasks + app.kubernetes.io/component: trustyai + app.kubernetes.io/created-by: trustyai-service-operator + app.kubernetes.io/part-of: trustyai-service-operator + app.kubernetes.io/managed-by: kustomize +data: + tasks: | + { + "lm-evaluation-harness": [ + { + "name": "AraDiCE", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_ArabicMMLU_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "aclue", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aexams", + "description": "Tasks in Arabic related to various academic exams covering a range of subjects." + }, + { + "name": "agieval", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_cn", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_nous", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "arabic_leaderboard_acva", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_exams", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_exams_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_arc_challenge", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_arc_challenge_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_arc_easy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_arc_easy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_boolq", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_boolq_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_copa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_copa_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_hellaswag", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_hellaswag_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_mmlu", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_mmlu_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_openbook_qa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_openbook_qa_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_piqa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_piqa_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_race", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_race_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_sciq", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_sciq_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_toxigen", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mt_toxigen_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_complete", + "description": "A full version of the tasks in the Open Arabic LLM Leaderboard, focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated." + }, + { + "name": "arabic_leaderboard_light", + "description": "A light version of the tasks in the Open Arabic LLM Leaderboard (i.e., 10% samples of the test set in the original benchmarks), focusing on the evaluation of models that reflect the characteristics of Arabic language understanding and comprehension, culture, and heritage. Note that some of these tasks are machine-translated." + }, + { + "name": "arabicmmlu", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_humanities", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_language", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_other", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_social_science", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_stem", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "basque_bench", + "description": "Collection of tasks in Basque encompassing various evaluation areas." + }, + { + "name": "bbh", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "belebele", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "blimp", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "catalan_bench", + "description": "Collection of tasks in Catalan encompassing various evaluation areas." + }, + { + "name": "ceval-valid", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "cmmlu", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "csatqa", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "flan_held_in", + "description": "No description available" + }, + { + "name": "flan_held_out", + "description": "No description available" + }, + { + "name": "flores_ca", + "description": "No description available" + }, + { + "name": "flores_es", + "description": "No description available" + }, + { + "name": "flores_eu", + "description": "No description available" + }, + { + "name": "flores_gl", + "description": "No description available" + }, + { + "name": "flores_pt", + "description": "No description available" + }, + { + "name": "galician_bench", + "description": "Collection of tasks in Galician encompassing various evaluation areas." + }, + { + "name": "global_mmlu_ar", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "haerae", + "description": "Tasks focused on assessing detailed factual and historical knowledge." + }, + { + "name": "hendrycks_math", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hrm8k", + "description": "No description available" + }, + { + "name": "hrm8k_en", + "description": "No description available" + }, + { + "name": "japanese_leaderboard", + "description": "Japanese language understanding tasks to benchmark model performance on various linguistic aspects." + }, + { + "name": "kmmlu_cot_hard", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_applied_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_humss", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_other", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_stem", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_applied_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_applied_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_humss", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_other", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_stem", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_humss", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_other", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_stem", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_applied_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_humss", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_other", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_stem", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kobest", + "description": "A collection of tasks designed to evaluate understanding in Korean language." + }, + { + "name": "kormedmcqa", + "description": "Medical question answering tasks in Korean to test specialized domain knowledge." + }, + { + "name": "leaderboard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_gpqa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_instruction_following", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_musr", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "lingoly", + "description": "Challenging logical reasoning benchmark in low-resource languages with controls for memorization" + }, + { + "name": "med_concepts_qa", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_atc", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10cm", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10proc", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9cm", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9proc", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "mela", + "description": "No description available" + }, + { + "name": "metabench", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_secondary_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "minerva_math", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "mmlu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_pro", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlusr", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmmu_val", + "description": "No description available" + }, + { + "name": "mmmu_val_art_and_design", + "description": "No description available" + }, + { + "name": "mmmu_val_business", + "description": "No description available" + }, + { + "name": "mmmu_val_health_and_medicine", + "description": "No description available" + }, + { + "name": "mmmu_val_humanities_and_social_science", + "description": "No description available" + }, + { + "name": "mmmu_val_science", + "description": "No description available" + }, + { + "name": "mmmu_val_tech_and_engineering", + "description": "No description available" + }, + { + "name": "multimedqa", + "description": "Multiple choice question answering based on the United States Medical License Exams." + }, + { + "name": "openllm", + "description": "No description available" + }, + { + "name": "pawsx", + "description": "No description available" + }, + { + "name": "portuguese_bench", + "description": "Collection of tasks in European Portuguese encompassing various evaluation areas." + }, + { + "name": "pythia", + "description": "No description available" + }, + { + "name": "score_non_greedy_robustness_agieval", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_non_greedy_robustness_math", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_option_order_robustness_agieval", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_prompt_robustness_agieval", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_prompt_robustness_math", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_robustness", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_robustness_agieval", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_robustness_math", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "spanish_bench", + "description": "Collection of tasks in Spanish encompassing various evaluation areas." + }, + { + "name": "t0_eval", + "description": "No description available" + }, + { + "name": "tinyBenchmarks", + "description": "Evaluation of large language models with fewer examples using tiny versions of popular benchmarks." + }, + { + "name": "tmlu", + "description": "No description available" + }, + { + "name": "tmmluplus", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_STEM", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_humanities", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_other", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_social_sciences", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "wmdp", + "description": "A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions." + }, + { + "name": "xcopa", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xnli", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xstorycloze", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xwinograd", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "Tag", + "description": "No description available" + }, + { + "name": "AraDiCE_ArabicMMLU_humanities_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_humanities_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_language_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_language_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_other_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_other_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_social-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_social-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_stem_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_stem_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "advanced_ai_risk", + "description": "No description available" + }, + { + "name": "ai2_arc", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "anli", + "description": "Adversarial natural language inference tasks designed to test model robustness." + }, + { + "name": "arabicmmlu_humanities_tasks", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_language_tasks", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_other_tasks", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_social_science_tasks", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_stem_tasks", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arc_challenge_mt", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_multilingual", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arithmetic", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "basque-glue", + "description": "General Language Understanding Evaluation benchmark to test broad language abilities." + }, + { + "name": "bertaqa", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bigbench_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_multiple_choice_a", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_multiple_choice_b", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "cabreu", + "description": "No description available" + }, + { + "name": "chain_of_thought", + "description": "No description available" + }, + { + "name": "copal_id", + "description": "Indonesian causal commonsense reasoning dataset that captures local nuances." + }, + { + "name": "crows_pairs", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "eus_exams_es", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "flores", + "description": "No description available" + }, + { + "name": "freebase", + "description": "No description available" + }, + { + "name": "french_bench", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_extra", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_gen", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_mc", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_perplexity", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "global_mmlu_full_am_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_humanities_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_other_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_social_sciences_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_stem_tasks", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "glue", + "description": "General Language Understanding Evaluation benchmark to test broad language abilities." + }, + { + "name": "gpqa", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpt3_translation_benchmarks", + "description": "Tasks focused on evaluating the language translation capabilities of models." + }, + { + "name": "headqa", + "description": "A high-level education-based question answering dataset to test specialized knowledge." + }, + { + "name": "hellaswag_multilingual", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hendrycks_ethics", + "description": "Tasks designed to evaluate the ethical reasoning capabilities of models." + }, + { + "name": "inverse_scaling_mc", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "iwslt2017", + "description": "No description available" + }, + { + "name": "kbl", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_knowledge_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_reasoning_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kmmlu_cot_hard_applied_science_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_humss_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_other_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_stem_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_applied_science_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_applied_science_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_humss_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_other_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_stem_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_humss_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_other_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_stem_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_applied_science_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_humss_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_other_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_stem_tasks", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "lambada", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_cloze", + "description": "Cloze-style LAMBADA dataset." + }, + { + "name": "lambada_multilingual", + "description": "Multilingual LAMBADA dataset. This is a legacy version of the multilingual dataset, and users should instead use `lambada_multilingual_stablelm`." + }, + { + "name": "llama", + "description": "No description available" + }, + { + "name": "m_mmlu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "math_word_problems", + "description": "No description available" + }, + { + "name": "med_concepts_qa_atc_tasks", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10cm_tasks", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10proc_tasks", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9cm_tasks", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9proc_tasks", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "metabench_arc_subset", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_gsm8k_subset", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_hellaswag_subset", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_mmlu_subset", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_truthfulqa_subset", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_winogrande_subset", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "mgsm_cot_native", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mmlu_continuation_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_humanities", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_other", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_social_sciences", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_stem", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_humanities_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_humanities_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_humanities_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_other_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_social_sciences_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_stem_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_other_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_other_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_social_sciences_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_social_sciences_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_stem_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_stem_tasks", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlusr_answer_only_humanities_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_other_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_social_sciences_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_stem_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_humanities_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_other_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_social_sciences_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_stem_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_humanities_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_other_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_social_sciences_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_stem_tasks", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "multiple_choice", + "description": "No description available" + }, + { + "name": "paloma", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "persona", + "description": "No description available" + }, + { + "name": "phrases_es", + "description": "No description available" + }, + { + "name": "phrases_va", + "description": "No description available" + }, + { + "name": "polemo2", + "description": "Sentiment analysis and emotion detection tasks based on Polish language data." + }, + { + "name": "qa4mre", + "description": "Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning." + }, + { + "name": "qasper", + "description": "Question Answering dataset based on academic papers, testing in-depth scientific knowledge." + }, + { + "name": "score_robustness_mmlu_pro", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "self_consistency", + "description": "No description available" + }, + { + "name": "storycloze", + "description": "Tasks to predict story endings, focusing on narrative logic and coherence." + }, + { + "name": "super-glue-lm-eval-v1", + "description": "General Language Understanding Evaluation benchmark to test broad language abilities." + }, + { + "name": "super-glue-lm-eval-v1-seq2seq", + "description": "General Language Understanding Evaluation benchmark to test broad language abilities." + }, + { + "name": "super-glue-t5-prompt", + "description": "General Language Understanding Evaluation benchmark to test broad language abilities." + }, + { + "name": "sycophancy", + "description": "No description available" + }, + { + "name": "tmlu_humanities_tasks", + "description": "No description available" + }, + { + "name": "tmlu_other_tasks", + "description": "No description available" + }, + { + "name": "tmlu_social_sciences_tasks", + "description": "No description available" + }, + { + "name": "tmlu_stem_tasks", + "description": "No description available" + }, + { + "name": "tmlu_taiwan_specific_tasks", + "description": "No description available" + }, + { + "name": "tmmluplus_STEM_tasks", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_humanities_tasks", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_other_tasks", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_social_sciences_tasks", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "translation", + "description": "Tasks focused on evaluating the language translation capabilities of models." + }, + { + "name": "truthfulqa", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gl", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_multilingual", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "turkishmmlu", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "unscramble", + "description": "Tasks involving the rearrangement of scrambled sentences to test syntactic understanding." + }, + { + "name": "wmt14", + "description": "No description available" + }, + { + "name": "wmt16", + "description": "No description available" + }, + { + "name": "xnli_eu_mt_native", + "description": "Cross-lingual Natural Language Inference tasks in Basque." + }, + { + "name": "xquad", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "Task", + "description": "No description available" + }, + { + "name": "20_newsgroups", + "description": "No description available" + }, + { + "name": "AraDiCE_ArabicMMLU_high_humanities_history_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_humanities_history_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_humanities_islamic-studies_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_humanities_islamic-studies_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_humanities_philosophy_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_humanities_philosophy_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_language_arabic-language_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_language_arabic-language_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_social-science_civics_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_social-science_civics_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_social-science_economics_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_social-science_economics_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_social-science_geography_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_social-science_geography_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_stem_biology_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_stem_biology_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_stem_computer-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_stem_computer-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_stem_physics_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_high_stem_physics_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_humanities_history_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_humanities_history_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_humanities_islamic-studies_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_humanities_islamic-studies_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_language_arabic-language_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_language_arabic-language_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_other_general-knowledge_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_other_general-knowledge_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_civics_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_civics_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_economics_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_economics_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_geography_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_geography_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_social-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_social-science_social-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_stem_computer-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_stem_computer-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_stem_natural-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_middle_stem_natural-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_humanities_islamic-studies_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_humanities_islamic-studies_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_language_arabic-language-general_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_language_arabic-language-general_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_language_arabic-language-grammar_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_language_arabic-language-grammar_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_other_driving-test_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_other_driving-test_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_other_general-knowledge_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_na_other_general-knowledge_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_humanities_history_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_humanities_history_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_humanities_islamic-studies_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_humanities_islamic-studies_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_language_arabic-language_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_language_arabic-language_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_other_general-knowledge_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_other_general-knowledge_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_social-science_geography_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_social-science_geography_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_social-science_social-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_social-science_social-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_stem_computer-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_stem_computer-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_stem_math_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_stem_math_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_stem_natural-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_primary_stem_natural-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_prof_humanities_law_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_prof_humanities_law_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_other_management_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_other_management_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_social-science_accounting_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_social-science_accounting_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_social-science_economics_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_social-science_economics_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_social-science_political-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_social-science_political-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_stem_computer-science_egy", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_ArabicMMLU_univ_stem_computer-science_lev", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "AraDiCE_boolq_egy", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_boolq_eng", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_boolq_lev", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_boolq_msa", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_egypt_cultural", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_jordan_cultural", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_lebanon_cultural", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_openbookqa_egy", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "AraDiCE_openbookqa_eng", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "AraDiCE_openbookqa_lev", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "AraDiCE_openbookqa_msa", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "AraDiCE_palestine_cultural", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_piqa_egy", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_piqa_eng", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_piqa_lev", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_piqa_msa", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_qatar_cultural", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_syria_cultural", + "description": "A collection of multiple tasks carefully designed to evaluate dialectal and cultural capabilities in large language models (LLMs)." + }, + { + "name": "AraDiCE_truthfulqa_mc1_egy", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "AraDiCE_truthfulqa_mc1_eng", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "AraDiCE_truthfulqa_mc1_lev", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "AraDiCE_truthfulqa_mc1_msa", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "AraDiCE_winogrande_egy", + "description": "A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge." + }, + { + "name": "AraDiCE_winogrande_eng", + "description": "A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge." + }, + { + "name": "AraDiCE_winogrande_lev", + "description": "A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge." + }, + { + "name": "AraDiCE_winogrande_msa", + "description": "A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge." + }, + { + "name": "aclue_ancient_chinese_culture", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_ancient_literature", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_ancient_medical", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_ancient_phonetics", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_basic_ancient_chinese", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_couplet_prediction", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_homographic_character_resolution", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_named_entity_recognition", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_poetry_appreciate", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_poetry_context_prediction", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_poetry_quality_assessment", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_poetry_sentiment_analysis", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_polysemy_resolution", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_reading_comprehension", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "aclue_sentence_segmentation", + "description": "Tasks focusing on ancient Chinese language understanding and cultural aspects." + }, + { + "name": "advanced_ai_risk_fewshot-coordinate-itself", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-coordinate-other-ais", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-coordinate-other-versions", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-corrigible-less-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-corrigible-more-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-corrigible-neutral-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-myopic-reward", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-one-box-tendency", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-power-seeking-inclination", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-self-awareness-general-ai", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-self-awareness-good-text-model", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-self-awareness-text-model", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-self-awareness-training-architecture", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "advanced_ai_risk_fewshot-self-awareness-training-web-gpt", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-survival-instinct", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_fewshot-wealth-seeking-inclination", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-coordinate-itself", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-coordinate-other-ais", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-coordinate-other-versions", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-corrigible-less-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-corrigible-more-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-corrigible-neutral-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-myopic-reward", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-one-box-tendency", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-power-seeking-inclination", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-self-awareness-general-ai", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-self-awareness-good-text-model", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-self-awareness-text-model", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-self-awareness-training-architecture", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "advanced_ai_risk_human-self-awareness-web-gpt", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-survival-instinct", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_human-wealth-seeking-inclination", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-coordinate-itself", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-coordinate-other-ais", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-coordinate-other-versions", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-corrigible-less-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-corrigible-more-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-corrigible-neutral-HHH", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-myopic-reward", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-one-box-tendency", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-power-seeking-inclination", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-self-awareness-general-ai", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-self-awareness-good-text-model", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-self-awareness-text-model", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-self-awareness-training-architecture", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "advanced_ai_risk_lm-self-awareness-training-nn-architecture", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "advanced_ai_risk_lm-self-awareness-training-web-gpt", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-survival-instinct", + "description": "No description available" + }, + { + "name": "advanced_ai_risk_lm-wealth-seeking-inclination", + "description": "No description available" + }, + { + "name": "aexams_Biology", + "description": "Tasks in Arabic related to various academic exams covering a range of subjects." + }, + { + "name": "aexams_IslamicStudies", + "description": "Tasks in Arabic related to various academic exams covering a range of subjects." + }, + { + "name": "aexams_Physics", + "description": "Tasks in Arabic related to various academic exams covering a range of subjects." + }, + { + "name": "aexams_Science", + "description": "Tasks in Arabic related to various academic exams covering a range of subjects." + }, + { + "name": "aexams_Social", + "description": "Tasks in Arabic related to various academic exams covering a range of subjects." + }, + { + "name": "afrimgsm_direct_amh", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_eng", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_ewe", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_fra", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_hau", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_ibo", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_kin", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_lin", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_lug", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_orm", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_sna", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_sot", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_swa", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_twi", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_wol", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_xho", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_yor", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_direct_zul", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_amh", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_eng", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_ewe", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_fra", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_hau", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_ibo", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_kin", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_lin", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_lug", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_orm", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_sna", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_sot", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_swa", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_twi", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_wol", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_xho", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_yor", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_en_cot_zul", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_amh", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_eng", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_ewe", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_fra", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_hau", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_ibo", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_kin", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_lin", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_lug", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_orm", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_sna", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_sot", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_swa", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_twi", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_wol", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_xho", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_yor", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimgsm_translate_direct_zul", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "afrimmlu_direct_amh", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_eng", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_ewe", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_fra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_hau", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_ibo", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_kin", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_lin", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_lug", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_orm", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_sna", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_sot", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_swa", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_twi", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_wol", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_xho", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_yor", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_direct_zul", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_amh", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_eng", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_ewe", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_fra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_hau", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_ibo", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_kin", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_lin", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_lug", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_orm", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_sna", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_sot", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_swa", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_twi", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_wol", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_xho", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_yor", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrimmlu_translate_zul", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "afrixnli_en_direct_amh", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_eng", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_ewe", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_fra", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_hau", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_ibo", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_kin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_lin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_lug", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_orm", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_sna", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_sot", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_swa", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_twi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_wol", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_xho", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_yor", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_en_direct_zul", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_amh", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_eng", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_ewe", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_fra", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_hau", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_ibo", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_kin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_lin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_lug", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_orm", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_sna", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_sot", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_swa", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_twi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_wol", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_xho", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_yor", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_direct_zul", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_amh", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_ewe", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_fra", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_hau", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_ibo", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_kin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_lin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_lug", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_orm", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_sna", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_sot", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_swa", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_twi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_wol", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_xho", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_yor", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_manual_translate_zul", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_amh", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_eng", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_ewe", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_fra", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_hau", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_ibo", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_kin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_lin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_lug", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_orm", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_sna", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_sot", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_swa", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_twi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_wol", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_xho", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_yor", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_native_direct_zul", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_amh", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_ewe", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_fra", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_hau", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_ibo", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_kin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_lin", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_lug", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_orm", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_sna", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_sot", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_swa", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_twi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_wol", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_xho", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_yor", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "afrixnli_translate_zul", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "ag_news", + "description": "No description available" + }, + { + "name": "agieval_aqua_rat", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_biology", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_chemistry", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_chinese", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_english", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_geography", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_history", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_mathcloze", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_mathqa", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_gaokao_physics", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_jec_qa_ca", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_jec_qa_kd", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_logiqa_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_logiqa_zh", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_lsat_ar", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_lsat_lr", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_lsat_rc", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_math", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_sat_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_sat_en_without_passage", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "agieval_sat_math", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "anagrams1", + "description": "No description available" + }, + { + "name": "anagrams2", + "description": "No description available" + }, + { + "name": "anli_r1", + "description": "Adversarial natural language inference tasks designed to test model robustness." + }, + { + "name": "anli_r2", + "description": "Adversarial natural language inference tasks designed to test model robustness." + }, + { + "name": "anli_r3", + "description": "Adversarial natural language inference tasks designed to test model robustness." + }, + { + "name": "arabic_exams", + "description": "No description available" + }, + { + "name": "arabic_exams_light", + "description": "No description available" + }, + { + "name": "arabic_leaderboard_acva_Algeria", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Algeria_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Ancient_Egypt", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Ancient_Egypt_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arab_Empire", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arab_Empire_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Architecture", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Architecture_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Art", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Art_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Astronomy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Astronomy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Calligraphy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Calligraphy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Ceremony", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Ceremony_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Clothing", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Clothing_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Culture", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Culture_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Food", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Food_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Funeral", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Funeral_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Geography", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Geography_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_History", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_History_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Language_Origin", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Language_Origin_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Literature", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Literature_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Math", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Math_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Medicine", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Medicine_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Music", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Music_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Ornament", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Ornament_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Philosophy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Philosophy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Physics_and_Chemistry", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Physics_and_Chemistry_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Wedding", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Arabic_Wedding_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Bahrain", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Bahrain_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Comoros", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Comoros_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Egypt_modern", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Egypt_modern_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromAncientEgypt", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromAncientEgypt_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromByzantium", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromByzantium_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromChina", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromChina_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromGreece", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromGreece_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromIslam", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromIslam_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromPersia", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromPersia_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromRome", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_InfluenceFromRome_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Iraq", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Iraq_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Islam_Education", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Islam_Education_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Islam_branches_and_schools", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Islam_branches_and_schools_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Islamic_law_system", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Islamic_law_system_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Jordan", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Jordan_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Kuwait", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Kuwait_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Lebanon", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Lebanon_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Libya", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Libya_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Mauritania", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Mauritania_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Mesopotamia_civilization", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Mesopotamia_civilization_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Morocco", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Morocco_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Oman", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Oman_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Palestine", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Palestine_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Qatar", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Qatar_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Saudi_Arabia", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Saudi_Arabia_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Somalia", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Somalia_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Sudan", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Sudan_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Syria", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Syria_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Tunisia", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Tunisia_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_United_Arab_Emirates", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_United_Arab_Emirates_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Yemen", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_Yemen_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_communication", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_communication_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_computer_and_phone", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_computer_and_phone_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_daily_life", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_daily_life_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_entertainment", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_acva_entertainment_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_mcq_exams_test_ar", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_mcq_exams_test_ar_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_meta_ar_dialects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_meta_ar_dialects_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_meta_ar_msa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_meta_ar_msa_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_facts_truefalse_balanced_task_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_grounded_statement_soqal_task_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_grounded_statement_xglue_mlqa_task_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_no_neutral_task_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_rating_sentiment_task_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_sentiment_task", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_alghafa_multiple_choice_sentiment_task_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_abstract_algebra", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_abstract_algebra_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_anatomy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_anatomy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_astronomy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_astronomy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_business_ethics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_business_ethics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_clinical_knowledge", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_clinical_knowledge_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_biology", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_biology_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_chemistry", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_chemistry_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_computer_science", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_computer_science_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_mathematics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_mathematics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_medicine", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_medicine_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_physics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_college_physics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_computer_security", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_computer_security_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_conceptual_physics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_conceptual_physics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_econometrics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_econometrics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_electrical_engineering", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_electrical_engineering_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_elementary_mathematics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_elementary_mathematics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_formal_logic", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_formal_logic_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_global_facts", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_global_facts_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_biology", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_biology_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_chemistry", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_chemistry_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_computer_science", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_computer_science_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_european_history", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_european_history_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_geography", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_geography_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_government_and_politics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_macroeconomics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_mathematics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_mathematics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_microeconomics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_physics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_physics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_psychology", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_psychology_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_statistics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_statistics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_us_history", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_us_history_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_world_history", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_high_school_world_history_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_human_aging", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_human_aging_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_human_sexuality", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_human_sexuality_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_international_law", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_international_law_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_jurisprudence", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_jurisprudence_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_logical_fallacies", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_logical_fallacies_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_machine_learning", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_machine_learning_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_management", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_management_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_marketing", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_marketing_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_medical_genetics", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_medical_genetics_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_miscellaneous", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_miscellaneous_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_moral_disputes", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_moral_disputes_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_moral_scenarios", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_moral_scenarios_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_nutrition", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_nutrition_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_philosophy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_philosophy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_prehistory", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_prehistory_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_accounting", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_accounting_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_law", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_law_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_medicine", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_medicine_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_psychology", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_professional_psychology_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_public_relations", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_public_relations_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_security_studies", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_security_studies_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_sociology", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_sociology_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_us_foreign_policy", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_us_foreign_policy_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_virology", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_virology_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_world_religions", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_leaderboard_arabic_mmlu_world_religions_light", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "arabic_mt_arc_challenge", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arabic_mt_arc_challenge_light", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arabic_mt_arc_easy", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arabic_mt_arc_easy_light", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arabic_mt_boolq", + "description": "No description available" + }, + { + "name": "arabic_mt_boolq_light", + "description": "No description available" + }, + { + "name": "arabic_mt_copa", + "description": "No description available" + }, + { + "name": "arabic_mt_copa_light", + "description": "No description available" + }, + { + "name": "arabic_mt_hellaswag", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "arabic_mt_hellaswag_light", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "arabic_mt_mmlu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "arabic_mt_mmlu_light", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "arabic_mt_openbook_qa", + "description": "No description available" + }, + { + "name": "arabic_mt_openbook_qa_light", + "description": "No description available" + }, + { + "name": "arabic_mt_piqa", + "description": "Physical Interaction Question Answering tasks to test physical commonsense reasoning." + }, + { + "name": "arabic_mt_piqa_light", + "description": "Physical Interaction Question Answering tasks to test physical commonsense reasoning." + }, + { + "name": "arabic_mt_race", + "description": "Reading comprehension assessment tasks based on English exams in China." + }, + { + "name": "arabic_mt_race_light", + "description": "Reading comprehension assessment tasks based on English exams in China." + }, + { + "name": "arabic_mt_sciq", + "description": "Science Question Answering tasks to assess understanding of scientific concepts." + }, + { + "name": "arabic_mt_sciq_light", + "description": "Science Question Answering tasks to assess understanding of scientific concepts." + }, + { + "name": "arabic_mt_toxigen", + "description": "Tasks designed to evaluate language models on their propensity to generate toxic content." + }, + { + "name": "arabic_mt_toxigen_light", + "description": "Tasks designed to evaluate language models on their propensity to generate toxic content." + }, + { + "name": "arabicmmlu_accounting_university", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_arabic_language_general", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_arabic_language_grammar", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_arabic_language_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_arabic_language_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_arabic_language_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_biology_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_civics_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_civics_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_computer_science_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_computer_science_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_computer_science_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_computer_science_university", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_driving_test", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_economics_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_economics_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_economics_university", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_general_knowledge", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_general_knowledge_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_general_knowledge_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_geography_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_geography_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_geography_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_history_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_history_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_history_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_islamic_studies", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_islamic_studies_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_islamic_studies_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_islamic_studies_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_law_professional", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_management_university", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_math_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_natural_science_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_natural_science_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_philosophy_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_physics_high_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_political_science_university", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_social_science_middle_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arabicmmlu_social_science_primary_school", + "description": "Localized Arabic version of MMLU with multiple-choice questions from 40 subjects." + }, + { + "name": "arc_ar", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_bn", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ca", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ca_challenge", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ca_easy", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_chat", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_da", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_de", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_el", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_es", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_fi", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_hu", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_is", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_it", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_nb", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_pl", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_pt", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_challenge_mt_sv", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_da", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_de", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_easy", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_es", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_eu", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_eu_challenge", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_eu_easy", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_fr", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_gu", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_hi", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_hr", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_hu", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_hy", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_id", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_it", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_kn", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ml", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_mr", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ne", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_nl", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_pt", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ro", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ru", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_sk", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_sr", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_sv", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_ta", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_te", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_uk", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_vi", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "arc_zh", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "argument_topic", + "description": "No description available" + }, + { + "name": "arithmetic_1dc", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_2da", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_2dm", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_2ds", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_3da", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_3ds", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_4da", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_4ds", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_5da", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "arithmetic_5ds", + "description": "Tasks involving numerical computations and arithmetic reasoning." + }, + { + "name": "asdiv", + "description": "Tasks involving arithmetic and mathematical reasoning challenges." + }, + { + "name": "asdiv_cot_llama", + "description": "Tasks involving arithmetic and mathematical reasoning challenges." + }, + { + "name": "assin_entailment", + "description": "No description available" + }, + { + "name": "assin_paraphrase", + "description": "No description available" + }, + { + "name": "atis", + "description": "No description available" + }, + { + "name": "babi", + "description": "Tasks designed as question and answering challenges based on simulated stories." + }, + { + "name": "banking77", + "description": "No description available" + }, + { + "name": "bbh_cot_fewshot_boolean_expressions", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_causal_judgement", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_date_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_disambiguation_qa", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_dyck_languages", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_formal_fallacies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_geometric_shapes", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_hyperbaton", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_logical_deduction_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_logical_deduction_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_logical_deduction_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_movie_recommendation", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_multistep_arithmetic_two", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_navigate", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_object_counting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_penguins_in_a_table", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_reasoning_about_colored_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_ruin_names", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_salient_translation_error_detection", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_snarks", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_sports_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_temporal_sequences", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_tracking_shuffled_objects_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_tracking_shuffled_objects_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_tracking_shuffled_objects_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_web_of_lies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_fewshot_word_sorting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_boolean_expressions", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_causal_judgement", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_date_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_disambiguation_qa", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_dyck_languages", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_formal_fallacies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_geometric_shapes", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_hyperbaton", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_logical_deduction_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_logical_deduction_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_logical_deduction_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_movie_recommendation", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_multistep_arithmetic_two", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_navigate", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_object_counting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_penguins_in_a_table", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_reasoning_about_colored_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_ruin_names", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_salient_translation_error_detection", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_snarks", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_sports_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_temporal_sequences", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_tracking_shuffled_objects_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_tracking_shuffled_objects_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_tracking_shuffled_objects_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_web_of_lies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_cot_zeroshot_word_sorting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_boolean_expressions", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_causal_judgement", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_date_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_disambiguation_qa", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_dyck_languages", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_formal_fallacies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_geometric_shapes", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_hyperbaton", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_logical_deduction_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_logical_deduction_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_logical_deduction_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_movie_recommendation", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_multistep_arithmetic_two", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_navigate", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_object_counting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_penguins_in_a_table", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_reasoning_about_colored_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_ruin_names", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_salient_translation_error_detection", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_snarks", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_sports_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_temporal_sequences", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_tracking_shuffled_objects_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_tracking_shuffled_objects_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_tracking_shuffled_objects_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_web_of_lies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_fewshot_word_sorting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_boolean_expressions", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_causal_judgement", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_date_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_disambiguation_qa", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_dyck_languages", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_formal_fallacies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_geometric_shapes", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_hyperbaton", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_logical_deduction_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_logical_deduction_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_logical_deduction_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_movie_recommendation", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_multistep_arithmetic_two", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_navigate", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_object_counting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_penguins_in_a_table", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_reasoning_about_colored_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_ruin_names", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_salient_translation_error_detection", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_snarks", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_sports_understanding", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_temporal_sequences", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_tracking_shuffled_objects_five_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_tracking_shuffled_objects_seven_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_tracking_shuffled_objects_three_objects", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_web_of_lies", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bbh_zeroshot_word_sorting", + "description": "Tasks focused on deep semantic understanding through hypothesization and reasoning." + }, + { + "name": "bec2016eu", + "description": "No description available" + }, + { + "name": "belebele_acm_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_afr_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_als_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_amh_Ethi", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_apc_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_arb_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_arb_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ars_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ary_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_arz_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_asm_Beng", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_azj_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_bam_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ben_Beng", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ben_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_bod_Tibt", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_bul_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_cat_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ceb_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ces_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ckb_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_dan_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_deu_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ell_Grek", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_eng_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_est_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_eus_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_fin_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_fra_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_fuv_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_gaz_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_glg_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_grn_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_guj_Gujr", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hat_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hau_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_heb_Hebr", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hin_Deva", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hin_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hrv_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hun_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_hye_Armn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ibo_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ilo_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ind_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_isl_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ita_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_jav_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_jpn_Jpan", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kac_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kan_Knda", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kat_Geor", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kaz_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kea_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_khk_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_khm_Khmr", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kin_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kir_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_kor_Hang", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_lao_Laoo", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_lin_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_lit_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_lug_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_luo_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_lvs_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_mal_Mlym", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_mar_Deva", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_mkd_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_mlt_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_mri_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_mya_Mymr", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_nld_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_nob_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_npi_Deva", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_npi_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_nso_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_nya_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ory_Orya", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_pan_Guru", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_pbt_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_pes_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_plt_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_pol_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_por_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ron_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_rus_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_shn_Mymr", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_sin_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_sin_Sinh", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_slk_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_slv_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_sna_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_snd_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_som_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_sot_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_spa_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_srp_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ssw_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_sun_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_swe_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_swh_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tam_Taml", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tel_Telu", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tgk_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tgl_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tha_Thai", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tir_Ethi", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tsn_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tso_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_tur_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_ukr_Cyrl", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_urd_Arab", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_urd_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_uzn_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_vie_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_war_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_wol_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_xho_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_yor_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_zho_Hans", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_zho_Hant", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_zsm_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "belebele_zul_Latn", + "description": "Language understanding tasks in a variety of languages and scripts." + }, + { + "name": "bertaqa_en", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_gemma-7b", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_hitz", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_itzuli", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_latxa-13b-v1", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_latxa-13b-v1.1", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_latxa-70b-v1", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_latxa-70b-v1.1", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_latxa-7b-v1", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_latxa-7b-v1.1", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_llama-2-13b", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_llama-2-70b", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_llama-2-7b", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_madlad", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_en_mt_nllb", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bertaqa_eu", + "description": "Local Basque cultural trivia QA tests in English and Basque languages." + }, + { + "name": "bhtc_v2", + "description": "No description available" + }, + { + "name": "bigbench_abstract_narrative_understanding_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_abstract_narrative_understanding_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_anachronisms_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_anachronisms_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_analogical_similarity_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_analogical_similarity_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_analytic_entailment_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_analytic_entailment_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_arithmetic_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_arithmetic_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_ascii_word_recognition_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_authorship_verification_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_authorship_verification_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_auto_categorization_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_auto_debugging_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_bbq_lite_json_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_bbq_lite_json_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_bridging_anaphora_resolution_barqa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_causal_judgment_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_causal_judgment_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cause_and_effect_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cause_and_effect_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_checkmate_in_one_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_checkmate_in_one_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_chess_state_tracking_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_chinese_remainder_theorem_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cifar10_classification_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cifar10_classification_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_code_line_description_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_code_line_description_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_codenames_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_color_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_color_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_common_morpheme_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_common_morpheme_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_conceptual_combinations_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_conceptual_combinations_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_conlang_translation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_contextual_parametric_knowledge_conflicts_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_contextual_parametric_knowledge_conflicts_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_crash_blossom_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_crash_blossom_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_crass_ai_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_crass_ai_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cryobiology_spanish_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cryobiology_spanish_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cryptonite_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cs_algorithms_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_cs_algorithms_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_dark_humor_detection_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_dark_humor_detection_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_date_understanding_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_date_understanding_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_disambiguation_qa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_disambiguation_qa_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_discourse_marker_prediction_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_discourse_marker_prediction_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_disfl_qa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_dyck_languages_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_dyck_languages_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_elementary_math_qa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_elementary_math_qa_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_emoji_movie_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_emoji_movie_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_emojis_emotion_prediction_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_emojis_emotion_prediction_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_empirical_judgments_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_empirical_judgments_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_english_proverbs_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_english_proverbs_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_english_russian_proverbs_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_english_russian_proverbs_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_entailed_polarity_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_entailed_polarity_hindi_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_entailed_polarity_hindi_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_entailed_polarity_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_epistemic_reasoning_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_epistemic_reasoning_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_evaluating_information_essentiality_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_evaluating_information_essentiality_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_fact_checker_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_fact_checker_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_fantasy_reasoning_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_fantasy_reasoning_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_few_shot_nlg_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_figure_of_speech_detection_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_figure_of_speech_detection_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_formal_fallacies_syllogisms_negation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_formal_fallacies_syllogisms_negation_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_gem_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_gender_inclusive_sentences_german_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_general_knowledge_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_general_knowledge_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_geometric_shapes_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_geometric_shapes_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_goal_step_wikihow_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_goal_step_wikihow_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_gre_reading_comprehension_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_gre_reading_comprehension_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hhh_alignment_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hhh_alignment_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hindi_question_answering_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hindu_knowledge_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hindu_knowledge_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hinglish_toxicity_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hinglish_toxicity_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_human_organs_senses_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_human_organs_senses_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hyperbaton_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_hyperbaton_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_identify_math_theorems_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_identify_math_theorems_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_identify_odd_metaphor_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_identify_odd_metaphor_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_implicatures_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_implicatures_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_implicit_relations_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_implicit_relations_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_intent_recognition_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_intent_recognition_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_international_phonetic_alphabet_nli_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_international_phonetic_alphabet_nli_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_international_phonetic_alphabet_transliterate_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_intersect_geometry_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_intersect_geometry_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_irony_identification_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_irony_identification_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_kanji_ascii_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_kanji_ascii_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_kannada_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_kannada_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_key_value_maps_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_key_value_maps_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_known_unknowns_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_known_unknowns_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_language_games_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_language_identification_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_language_identification_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_linguistic_mappings_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_linguistics_puzzles_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_list_functions_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logic_grid_puzzle_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logic_grid_puzzle_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_args_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_args_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_deduction_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_deduction_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_fallacy_detection_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_fallacy_detection_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_sequence_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_logical_sequence_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_mathematical_induction_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_mathematical_induction_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_matrixshapes_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_metaphor_boolean_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_metaphor_boolean_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_metaphor_understanding_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_metaphor_understanding_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_minute_mysteries_qa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_misconceptions_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_misconceptions_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_misconceptions_russian_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_misconceptions_russian_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_mnist_ascii_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_mnist_ascii_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_modified_arithmetic_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_moral_permissibility_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_moral_permissibility_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_movie_dialog_same_or_different_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_movie_dialog_same_or_different_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_movie_recommendation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_movie_recommendation_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_mult_data_wrangling_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_multiemo_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_multiemo_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_natural_instructions_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_navigate_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_navigate_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_nonsense_words_grammar_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_nonsense_words_grammar_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_novel_concepts_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_novel_concepts_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_object_counting_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_odd_one_out_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_odd_one_out_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_operators_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_paragraph_segmentation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_parsinlu_qa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_parsinlu_qa_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_parsinlu_reading_comprehension_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_penguins_in_a_table_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_penguins_in_a_table_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_periodic_elements_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_periodic_elements_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_persian_idioms_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_persian_idioms_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_phrase_relatedness_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_phrase_relatedness_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_physical_intuition_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_physical_intuition_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_physics_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_physics_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_physics_questions_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_play_dialog_same_or_different_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_play_dialog_same_or_different_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_polish_sequence_labeling_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_presuppositions_as_nli_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_presuppositions_as_nli_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_qa_wikidata_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_question_selection_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_question_selection_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_real_or_fake_text_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_real_or_fake_text_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_reasoning_about_colored_objects_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_reasoning_about_colored_objects_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_repeat_copy_logic_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_rephrase_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_riddle_sense_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_riddle_sense_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_ruin_names_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_ruin_names_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_salient_translation_error_detection_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_salient_translation_error_detection_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_scientific_press_release_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_semantic_parsing_in_context_sparc_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_semantic_parsing_spider_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_sentence_ambiguity_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_sentence_ambiguity_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_similarities_abstraction_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_similarities_abstraction_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simp_turing_concept_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_arithmetic_json_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_arithmetic_json_multiple_choice_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_arithmetic_json_subtasks_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_arithmetic_multiple_targets_json_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_ethical_questions_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_ethical_questions_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_simple_text_editing_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_snarks_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_snarks_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_social_iqa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_social_iqa_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_social_support_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_social_support_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_sports_understanding_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_sports_understanding_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_strange_stories_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_strange_stories_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_strategyqa_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_strategyqa_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_sufficient_information_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_suicide_risk_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_suicide_risk_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_swahili_english_proverbs_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_swahili_english_proverbs_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_swedish_to_german_proverbs_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_swedish_to_german_proverbs_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_symbol_interpretation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_symbol_interpretation_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_temporal_sequences_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_temporal_sequences_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_tense_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_timedial_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_timedial_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_topical_chat_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_tracking_shuffled_objects_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_tracking_shuffled_objects_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_understanding_fables_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_understanding_fables_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_undo_permutation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_undo_permutation_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_unit_conversion_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_unit_conversion_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_unit_interpretation_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_unit_interpretation_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_unnatural_in_context_learning_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_vitaminc_fact_verification_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_vitaminc_fact_verification_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_what_is_the_tao_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_what_is_the_tao_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_which_wiki_edit_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_which_wiki_edit_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_winowhy_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_winowhy_multiple_choice", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_word_sorting_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "bigbench_word_unscrambling_generate_until", + "description": "Broad tasks from the BIG-bench benchmark designed to push the boundaries of large models." + }, + { + "name": "blimp_adjunct_island", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_anaphor_gender_agreement", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_anaphor_number_agreement", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_animate_subject_passive", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_animate_subject_trans", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_causative", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_complex_NP_island", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_coordinate_structure_constraint_complex_left_branch", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_coordinate_structure_constraint_object_extraction", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_irregular_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_irregular_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_with_adj_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_with_adj_irregular_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_with_adj_irregular_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_determiner_noun_agreement_with_adjective_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_distractor_agreement_relational_noun", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_distractor_agreement_relative_clause", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_drop_argument", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_ellipsis_n_bar_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_ellipsis_n_bar_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_existential_there_object_raising", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_existential_there_quantifiers_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_existential_there_quantifiers_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_existential_there_subject_raising", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_expletive_it_object_raising", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_inchoative", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_intransitive", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_irregular_past_participle_adjectives", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_irregular_past_participle_verbs", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_irregular_plural_subject_verb_agreement_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_irregular_plural_subject_verb_agreement_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_left_branch_island_echo_question", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_left_branch_island_simple_question", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_matrix_question_npi_licensor_present", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_npi_present_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_npi_present_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_only_npi_licensor_present", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_only_npi_scope", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_passive_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_passive_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_c_command", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_case_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_case_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_domain_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_domain_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_domain_3", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_principle_A_reconstruction", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_regular_plural_subject_verb_agreement_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_regular_plural_subject_verb_agreement_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_sentential_negation_npi_licensor_present", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_sentential_negation_npi_scope", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_sentential_subject_island", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_superlative_quantifiers_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_superlative_quantifiers_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_tough_vs_raising_1", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_tough_vs_raising_2", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_transitive", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_island", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_questions_object_gap", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_questions_subject_gap", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_questions_subject_gap_long_distance", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_vs_that_no_gap", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_vs_that_no_gap_long_distance", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_vs_that_with_gap", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "blimp_wh_vs_that_with_gap_long_distance", + "description": "Tasks testing grammatical phenomena to evaluate language model's linguistic capabilities." + }, + { + "name": "boolq", + "description": "No description available" + }, + { + "name": "boolq-seq2seq", + "description": "No description available" + }, + { + "name": "cabreu_abstractive", + "description": "No description available" + }, + { + "name": "cabreu_extractive", + "description": "No description available" + }, + { + "name": "cabreu_extreme", + "description": "No description available" + }, + { + "name": "catalanqa", + "description": "No description available" + }, + { + "name": "catcola", + "description": "No description available" + }, + { + "name": "cb", + "description": "No description available" + }, + { + "name": "ceval-valid_accountant", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_advanced_mathematics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_art_studies", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_basic_medicine", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_business_administration", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_chinese_language_and_literature", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_civil_servant", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_clinical_medicine", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_college_chemistry", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_college_economics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_college_physics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_college_programming", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_computer_architecture", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_computer_network", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_discrete_mathematics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_education_science", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_electrical_engineer", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_environmental_impact_assessment_engineer", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_fire_engineer", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_biology", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_chemistry", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_chinese", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_geography", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_history", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_mathematics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_physics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_high_school_politics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_ideological_and_moral_cultivation", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_law", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_legal_professional", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_logic", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_mao_zedong_thought", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_marxism", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_metrology_engineer", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_biology", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_chemistry", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_geography", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_history", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_mathematics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_physics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_middle_school_politics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_modern_chinese_history", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_operating_system", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_physician", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_plant_protection", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_probability_and_statistics", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_professional_tour_guide", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_sports_science", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_tax_accountant", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_teacher_qualification", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_urban_and_rural_planner", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "ceval-valid_veterinary_medicine", + "description": "Tasks that evaluate language understanding and reasoning in an educational context." + }, + { + "name": "claim_stance_topic", + "description": "No description available" + }, + { + "name": "cmmlu_agronomy", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_anatomy", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_ancient_chinese", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_arts", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_astronomy", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_business_ethics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_civil_service_exam", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_driving_rule", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_food_culture", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_foreign_policy", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_history", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_literature", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_chinese_teacher_qualification", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_clinical_knowledge", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_actuarial_science", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_education", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_engineering_hydrology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_law", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_mathematics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_medical_statistics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_college_medicine", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_computer_science", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_computer_security", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_conceptual_physics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_construction_project_management", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_economics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_education", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_electrical_engineering", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_elementary_chinese", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_elementary_commonsense", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_elementary_information_and_technology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_elementary_mathematics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_ethnology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_food_science", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_genetics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_global_facts", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_high_school_biology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_high_school_chemistry", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_high_school_geography", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_high_school_mathematics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_high_school_physics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_high_school_politics", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_human_sexuality", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_international_law", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_journalism", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_jurisprudence", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_legal_and_moral_basis", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_logical", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_machine_learning", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_management", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_marketing", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_marxist_theory", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_modern_chinese", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_nutrition", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_philosophy", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_professional_accounting", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_professional_law", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_professional_medicine", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_professional_psychology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_public_relations", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_security_study", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_sociology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_sports_science", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_traditional_chinese_medicine", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_virology", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_world_history", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cmmlu_world_religions", + "description": "Multi-subject multiple choice question tasks for comprehensive academic assessment." + }, + { + "name": "cnn_dailymail", + "description": "No description available" + }, + { + "name": "cocoteros_es", + "description": "No description available" + }, + { + "name": "code2text_go", + "description": "No description available" + }, + { + "name": "code2text_java", + "description": "No description available" + }, + { + "name": "code2text_javascript", + "description": "No description available" + }, + { + "name": "code2text_php", + "description": "No description available" + }, + { + "name": "code2text_python", + "description": "No description available" + }, + { + "name": "code2text_ruby", + "description": "No description available" + }, + { + "name": "coedit_gec", + "description": "No description available" + }, + { + "name": "cola", + "description": "No description available" + }, + { + "name": "commonsense_qa", + "description": "CommonsenseQA, a multiple-choice QA dataset for measuring commonsense knowledge." + }, + { + "name": "copa", + "description": "No description available" + }, + { + "name": "copa_ar", + "description": "No description available" + }, + { + "name": "copa_ca", + "description": "No description available" + }, + { + "name": "copa_es", + "description": "No description available" + }, + { + "name": "copal_id_colloquial", + "description": "Indonesian causal commonsense reasoning dataset that captures local nuances." + }, + { + "name": "copal_id_standard", + "description": "Indonesian causal commonsense reasoning dataset that captures local nuances." + }, + { + "name": "coqa", + "description": "Conversational question answering tasks to test dialog understanding." + }, + { + "name": "coqcat", + "description": "No description available" + }, + { + "name": "crows_pairs_english", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_age", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_autre", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_disability", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_gender", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_nationality", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_physical_appearance", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_race_color", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_religion", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_sexual_orientation", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_english_socioeconomic", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_age", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_autre", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_disability", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_gender", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_nationality", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_physical_appearance", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_race_color", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_religion", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_sexual_orientation", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "crows_pairs_french_socioeconomic", + "description": "Tasks designed to test model biases in various sociodemographic groups." + }, + { + "name": "csatqa_gr", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "csatqa_li", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "csatqa_rch", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "csatqa_rcs", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "csatqa_rcss", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "csatqa_wr", + "description": "Tasks related to SAT and other standardized testing questions for academic assessment." + }, + { + "name": "cycle_letters", + "description": "No description available" + }, + { + "name": "dbpedia_14", + "description": "No description available" + }, + { + "name": "drop", + "description": "Tasks requiring numerical reasoning, reading comprehension, and question answering." + }, + { + "name": "epec_koref_bin", + "description": "No description available" + }, + { + "name": "eq_bench", + "description": "Tasks focused on equality and ethics in question answering and decision-making." + }, + { + "name": "escola", + "description": "No description available" + }, + { + "name": "ethics_cm", + "description": "No description available" + }, + { + "name": "ethics_deontology", + "description": "No description available" + }, + { + "name": "ethics_justice", + "description": "No description available" + }, + { + "name": "ethics_utilitarianism", + "description": "No description available" + }, + { + "name": "ethics_virtue", + "description": "No description available" + }, + { + "name": "ethos_binary", + "description": "No description available" + }, + { + "name": "eus_exams_es_ejadministrativo", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_ejauxiliar", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_ejsubalterno", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_ejtecnico", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeayuntamientovitoria", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opebilbao", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehuadmin", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehuaux", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehubiblio", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehuderecho", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehueconomicas", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehuempresariales", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehusubalterno", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehutecnico", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeehutecnicob", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakiadmin", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakiaux", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakiauxenf", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakicelador", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakienf", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakijuridico", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakioperario", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakitecnico", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_opeosakivarios", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza1c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza2c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza3c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza4c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza5c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza6c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza7c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza8c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_es_osakidetza9c", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_ejadministrari", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_ejlaguntza", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_ejlaguntzaile", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_ejteknikari", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opebilbaoeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehuadmineu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehuauxeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehubiblioeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehuderechoeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehueconomicaseu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehuempresarialeseu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehusubalternoeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehutecnicoeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeehuteknikarib", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opegasteizkoudala", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakiadmineu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakiauxenfeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakiauxeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakiceladoreu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakienfeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakioperarioeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakitecnicoeu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_opeosakivarioseu", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_osakidetza1e", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_osakidetza2e", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_osakidetza3e", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_osakidetza5e", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_osakidetza6e", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_exams_eu_osakidetza7e", + "description": "Tasks based on various professional and academic exams in the Basque language." + }, + { + "name": "eus_proficiency", + "description": "Tasks designed to test proficiency in the Basque language across various topics." + }, + { + "name": "eus_reading", + "description": "Reading comprehension tasks specifically designed for the Basque language." + }, + { + "name": "eus_trivia", + "description": "Trivia and knowledge testing tasks in the Basque language." + }, + { + "name": "fda", + "description": "Tasks for extracting key-value pairs from FDA documents to test information extraction." + }, + { + "name": "financial_tweets", + "description": "No description available" + }, + { + "name": "fld_default", + "description": "Tasks involving free-form and directed dialogue understanding." + }, + { + "name": "fld_logical_formula_default", + "description": "Tasks involving free-form and directed dialogue understanding." + }, + { + "name": "fld_logical_formula_star", + "description": "Tasks involving free-form and directed dialogue understanding." + }, + { + "name": "fld_star", + "description": "Tasks involving free-form and directed dialogue understanding." + }, + { + "name": "flores_ca-de", + "description": "No description available" + }, + { + "name": "flores_ca-en", + "description": "No description available" + }, + { + "name": "flores_ca-es", + "description": "No description available" + }, + { + "name": "flores_ca-eu", + "description": "No description available" + }, + { + "name": "flores_ca-fr", + "description": "No description available" + }, + { + "name": "flores_ca-gl", + "description": "No description available" + }, + { + "name": "flores_ca-it", + "description": "No description available" + }, + { + "name": "flores_ca-pt", + "description": "No description available" + }, + { + "name": "flores_de-ca", + "description": "No description available" + }, + { + "name": "flores_de-es", + "description": "No description available" + }, + { + "name": "flores_de-eu", + "description": "No description available" + }, + { + "name": "flores_de-gl", + "description": "No description available" + }, + { + "name": "flores_de-pt", + "description": "No description available" + }, + { + "name": "flores_en-ca", + "description": "No description available" + }, + { + "name": "flores_en-es", + "description": "No description available" + }, + { + "name": "flores_en-eu", + "description": "No description available" + }, + { + "name": "flores_en-gl", + "description": "No description available" + }, + { + "name": "flores_en-pt", + "description": "No description available" + }, + { + "name": "flores_es-ca", + "description": "No description available" + }, + { + "name": "flores_es-de", + "description": "No description available" + }, + { + "name": "flores_es-en", + "description": "No description available" + }, + { + "name": "flores_es-eu", + "description": "No description available" + }, + { + "name": "flores_es-fr", + "description": "No description available" + }, + { + "name": "flores_es-gl", + "description": "No description available" + }, + { + "name": "flores_es-it", + "description": "No description available" + }, + { + "name": "flores_es-pt", + "description": "No description available" + }, + { + "name": "flores_eu-ca", + "description": "No description available" + }, + { + "name": "flores_eu-de", + "description": "No description available" + }, + { + "name": "flores_eu-en", + "description": "No description available" + }, + { + "name": "flores_eu-es", + "description": "No description available" + }, + { + "name": "flores_eu-fr", + "description": "No description available" + }, + { + "name": "flores_eu-gl", + "description": "No description available" + }, + { + "name": "flores_eu-it", + "description": "No description available" + }, + { + "name": "flores_eu-pt", + "description": "No description available" + }, + { + "name": "flores_fr-ca", + "description": "No description available" + }, + { + "name": "flores_fr-es", + "description": "No description available" + }, + { + "name": "flores_fr-eu", + "description": "No description available" + }, + { + "name": "flores_fr-gl", + "description": "No description available" + }, + { + "name": "flores_fr-pt", + "description": "No description available" + }, + { + "name": "flores_gl-ca", + "description": "No description available" + }, + { + "name": "flores_gl-de", + "description": "No description available" + }, + { + "name": "flores_gl-en", + "description": "No description available" + }, + { + "name": "flores_gl-es", + "description": "No description available" + }, + { + "name": "flores_gl-eu", + "description": "No description available" + }, + { + "name": "flores_gl-fr", + "description": "No description available" + }, + { + "name": "flores_gl-it", + "description": "No description available" + }, + { + "name": "flores_gl-pt", + "description": "No description available" + }, + { + "name": "flores_it-ca", + "description": "No description available" + }, + { + "name": "flores_it-es", + "description": "No description available" + }, + { + "name": "flores_it-eu", + "description": "No description available" + }, + { + "name": "flores_it-gl", + "description": "No description available" + }, + { + "name": "flores_it-pt", + "description": "No description available" + }, + { + "name": "flores_pt-ca", + "description": "No description available" + }, + { + "name": "flores_pt-de", + "description": "No description available" + }, + { + "name": "flores_pt-en", + "description": "No description available" + }, + { + "name": "flores_pt-es", + "description": "No description available" + }, + { + "name": "flores_pt-eu", + "description": "No description available" + }, + { + "name": "flores_pt-fr", + "description": "No description available" + }, + { + "name": "flores_pt-gl", + "description": "No description available" + }, + { + "name": "flores_pt-it", + "description": "No description available" + }, + { + "name": "french_bench_arc_challenge", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_boolqa", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_fquadv2", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_fquadv2_bool", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_fquadv2_genq", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_fquadv2_hasAns", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_grammar", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_hellaswag", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_multifquad", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_opus_perplexity", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_orangesum_abstract", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_orangesum_title", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_reading_comp", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_topic_based_nli", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_trivia", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_vocab", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_wikitext_fr", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "french_bench_xnli", + "description": "Set of tasks designed to assess language model performance in French." + }, + { + "name": "galcola", + "description": "No description available" + }, + { + "name": "glianorex", + "description": "No description available" + }, + { + "name": "glianorex_en", + "description": "No description available" + }, + { + "name": "glianorex_fr", + "description": "No description available" + }, + { + "name": "global_mmlu_ar_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ar_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ar_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ar_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ar_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ar_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_bn_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_de_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_en_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_es_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_fr_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_am_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ar_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_bn_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_cs_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_de_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_el_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_en_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_es_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fa_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fil_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_fr_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ha_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_he_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_hi_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_id_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ig_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_it_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ja_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ko_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ky_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_lt_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_mg_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ms_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ne_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_nl_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ny_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pl_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_pt_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ro_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_ru_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_si_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sn_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_so_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sr_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sv_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_sw_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_te_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_tr_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_uk_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_vi_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_yo_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_abstract_algebra", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_anatomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_astronomy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_business_ethics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_clinical_knowledge", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_college_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_college_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_college_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_college_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_college_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_college_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_computer_security", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_conceptual_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_econometrics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_electrical_engineering", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_elementary_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_formal_logic", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_global_facts", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_biology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_chemistry", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_computer_science", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_european_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_geography", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_government_and_politics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_macroeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_mathematics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_microeconomics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_physics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_statistics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_us_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_high_school_world_history", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_human_aging", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_human_sexuality", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_international_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_jurisprudence", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_logical_fallacies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_machine_learning", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_management", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_marketing", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_medical_genetics", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_miscellaneous", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_moral_disputes", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_moral_scenarios", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_nutrition", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_philosophy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_prehistory", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_professional_accounting", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_professional_law", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_professional_medicine", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_professional_psychology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_public_relations", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_security_studies", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_sociology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_us_foreign_policy", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_virology", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_full_zh_world_religions", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_hi_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_id_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_it_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ja_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_ko_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_pt_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_sw_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_yo_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh_business", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh_humanities", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh_medical", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh_other", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh_social_sciences", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "global_mmlu_zh_stem", + "description": "Collection of culturally sensitive and culturally agnostic MMLU tasks in 15 languages with human translations or post-edits." + }, + { + "name": "gpqa_diamond_cot_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_diamond_cot_zeroshot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_diamond_generative_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_diamond_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_diamond_zeroshot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_extended_cot_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_extended_cot_zeroshot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_extended_generative_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_extended_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_extended_zeroshot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_main_cot_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_main_cot_zeroshot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_main_generative_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_main_n_shot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "gpqa_main_zeroshot", + "description": "Tasks designed for general public question answering and knowledge verification." + }, + { + "name": "groundcocoa", + "description": "No description available" + }, + { + "name": "gsm8k", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "gsm8k_cot", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "gsm8k_cot_llama", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "gsm8k_cot_self_consistency", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "gsm8k_cot_zeroshot", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "gsm_plus", + "description": "No description available" + }, + { + "name": "gsm_plus_mini", + "description": "No description available" + }, + { + "name": "haerae_general_knowledge", + "description": "Tasks focused on assessing detailed factual and historical knowledge." + }, + { + "name": "haerae_history", + "description": "Tasks focused on assessing detailed factual and historical knowledge." + }, + { + "name": "haerae_loan_word", + "description": "Tasks focused on assessing detailed factual and historical knowledge." + }, + { + "name": "haerae_rare_word", + "description": "Tasks focused on assessing detailed factual and historical knowledge." + }, + { + "name": "haerae_standard_nomenclature", + "description": "Tasks focused on assessing detailed factual and historical knowledge." + }, + { + "name": "headqa_en", + "description": "A high-level education-based question answering dataset to test specialized knowledge." + }, + { + "name": "headqa_es", + "description": "A high-level education-based question answering dataset to test specialized knowledge." + }, + { + "name": "hellaswag", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ar", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_bn", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ca", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_da", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_de", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_es", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_eu", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_fr", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_gu", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_hi", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_hr", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_hu", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_hy", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_id", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_it", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_kn", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ml", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_mr", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ne", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_nl", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_pt", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ro", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ru", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_sk", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_sr", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_sv", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_ta", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_te", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_uk", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hellaswag_vi", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "hendrycks_math_algebra", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hendrycks_math_counting_and_prob", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hendrycks_math_geometry", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hendrycks_math_intermediate_algebra", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hendrycks_math_num_theory", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hendrycks_math_prealgebra", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "hendrycks_math_precalc", + "description": "Mathematical problem-solving tasks to test numerical reasoning and problem-solving." + }, + { + "name": "histoires_morales", + "description": "No description available" + }, + { + "name": "hrm8k_gsm8k", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "hrm8k_gsm8k_en", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "hrm8k_ksm", + "description": "No description available" + }, + { + "name": "hrm8k_ksm_en", + "description": "No description available" + }, + { + "name": "hrm8k_math", + "description": "No description available" + }, + { + "name": "hrm8k_math_en", + "description": "No description available" + }, + { + "name": "hrm8k_mmmlu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "hrm8k_mmmlu_en", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "hrm8k_omni_math", + "description": "No description available" + }, + { + "name": "hrm8k_omni_math_en", + "description": "No description available" + }, + { + "name": "humaneval", + "description": "Code generation task that measure functional correctness for synthesizing programs from docstrings." + }, + { + "name": "humaneval_64", + "description": "Code generation task that measure functional correctness for synthesizing programs from docstrings." + }, + { + "name": "humaneval_plus", + "description": "Code generation task that measure functional correctness for synthesizing programs from docstrings." + }, + { + "name": "ifeval", + "description": "Interactive fiction evaluation tasks for narrative understanding and reasoning." + }, + { + "name": "inverse_scaling_hindsight_neglect_10shot", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_into_the_unknown", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_memo_trap", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_modus_tollens", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_neqa", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_pattern_matching_suppression", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_quote_repetition", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_redefine_math", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_repetitive_algebra", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_sig_figs", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "inverse_scaling_winobias_antistereotype", + "description": "Multiple-choice tasks from the Inverse Scaling Prize, designed to find settings where larger language models perform worse." + }, + { + "name": "iwslt2017-ar-en", + "description": "No description available" + }, + { + "name": "iwslt2017-en-ar", + "description": "No description available" + }, + { + "name": "ja_leaderboard_jaqket_v2", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_jcommonsenseqa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_jnli", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_jsquad", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_marc_ja", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_mgsm", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_xlsum", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ja_leaderboard_xwinograd", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "kbl_bar_exam_em_civil_2012", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2013", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2014", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2015", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2016", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2017", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2018", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2019", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2020", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2021", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2022", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2023", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_civil_2024", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2012", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2013", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2014", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2015", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2016", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2017", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2018", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2019", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2020", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2021", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2022", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2023", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_criminal_2024", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2012", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2013", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2014", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2015", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2016", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2017", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2018", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2019", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2020", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2021", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2022", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2023", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_public_2024", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2010", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2011", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2012", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2013", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2014", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2015", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2016", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2017", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2018", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2019", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2020", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2021", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2022", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_bar_exam_em_responsibility_2023", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_case_relevance_qa_p_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_case_relevance_qa_q_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_causal_reasoning_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_common_legal_mistake_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_common_legal_mistake_qa_reasoning_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_legal_concept_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_offense_component_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_query_and_statute_matching_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_statement_consistency_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_statute_hallucination_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kbl_statute_number_and_content_matching_qa_em", + "description": "Korean Benchmark for Legal Language Understanding." + }, + { + "name": "kmmlu_cot_hard_accounting", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_agricultural_sciences", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_aviation_engineering_and_maintenance", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_biology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_chemical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_chemistry", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_civil_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_computer_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_construction", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_criminal_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_ecology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_economics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_education", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_electrical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_electronics_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_energy_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_environmental_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_fashion", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_food_processing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_gas_technology_and_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_geomatics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_health", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_industrial_engineer", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_information_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_interior_architecture_and_design", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_korean_history", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_machine_design_and_manufacturing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_maritime_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_marketing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_materials_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_math", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_mechanical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_nondestructive_testing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_patent", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_political_science_and_sociology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_psychology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_public_safety", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_railway_and_automotive_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_real_estate", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_refrigerating_machinery", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_social_welfare", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_taxation", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_cot_hard_telecommunications_and_wireless_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_accounting", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_agricultural_sciences", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_aviation_engineering_and_maintenance", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_biology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_chemical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_chemistry", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_civil_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_computer_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_construction", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_criminal_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_ecology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_economics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_education", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_electrical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_electronics_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_energy_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_environmental_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_fashion", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_food_processing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_gas_technology_and_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_geomatics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_accounting", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_agricultural_sciences", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_aviation_engineering_and_maintenance", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_biology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_chemical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_chemistry", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_civil_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_computer_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_construction", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_criminal_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_ecology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_economics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_education", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_electrical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_electronics_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_energy_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_environmental_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_fashion", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_food_processing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_gas_technology_and_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_geomatics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_health", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_industrial_engineer", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_information_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_interior_architecture_and_design", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_korean_history", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_machine_design_and_manufacturing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_maritime_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_marketing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_materials_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_math", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_mechanical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_nondestructive_testing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_patent", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_political_science_and_sociology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_psychology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_public_safety", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_railway_and_automotive_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_real_estate", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_refrigerating_machinery", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_social_welfare", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_taxation", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_hard_telecommunications_and_wireless_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_health", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_industrial_engineer", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_information_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_interior_architecture_and_design", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_korean_history", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_machine_design_and_manufacturing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_maritime_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_marketing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_materials_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_math", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_mechanical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_nondestructive_testing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_patent", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_political_science_and_sociology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_psychology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_public_safety", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_railway_and_automotive_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_real_estate", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_refrigerating_machinery", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_social_welfare", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_taxation", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_direct_telecommunications_and_wireless_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_accounting", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_agricultural_sciences", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_aviation_engineering_and_maintenance", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_biology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_chemical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_chemistry", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_civil_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_computer_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_construction", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_criminal_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_ecology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_economics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_education", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_electrical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_electronics_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_energy_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_environmental_science", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_fashion", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_food_processing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_gas_technology_and_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_geomatics", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_health", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_industrial_engineer", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_information_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_interior_architecture_and_design", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_korean_history", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_law", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_machine_design_and_manufacturing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_management", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_maritime_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_marketing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_materials_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_math", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_mechanical_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_nondestructive_testing", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_patent", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_political_science_and_sociology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_psychology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_public_safety", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_railway_and_automotive_engineering", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_real_estate", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_refrigerating_machinery", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_social_welfare", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_taxation", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kmmlu_hard_telecommunications_and_wireless_technology", + "description": "Knowledge-based multi-subject multiple choice questions for academic evaluation." + }, + { + "name": "kobest_boolq", + "description": "A collection of tasks designed to evaluate understanding in Korean language." + }, + { + "name": "kobest_copa", + "description": "A collection of tasks designed to evaluate understanding in Korean language." + }, + { + "name": "kobest_hellaswag", + "description": "A collection of tasks designed to evaluate understanding in Korean language." + }, + { + "name": "kobest_sentineg", + "description": "A collection of tasks designed to evaluate understanding in Korean language." + }, + { + "name": "kobest_wic", + "description": "A collection of tasks designed to evaluate understanding in Korean language." + }, + { + "name": "kormedmcqa_dentist", + "description": "Medical question answering tasks in Korean to test specialized domain knowledge." + }, + { + "name": "kormedmcqa_doctor", + "description": "Medical question answering tasks in Korean to test specialized domain knowledge." + }, + { + "name": "kormedmcqa_nurse", + "description": "Medical question answering tasks in Korean to test specialized domain knowledge." + }, + { + "name": "kormedmcqa_pharm", + "description": "Medical question answering tasks in Korean to test specialized domain knowledge." + }, + { + "name": "lambada_openai", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_cloze_yaml", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_de", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_en", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_es", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_fr", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_it", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_de", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_en", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_es", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_fr", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_it", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_nl", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_openai_mt_stablelm_pt", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_standard", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "lambada_standard_cloze_yaml", + "description": "Tasks designed to predict the endings of text passages, testing language prediction skills." + }, + { + "name": "law_stack_exchange", + "description": "No description available" + }, + { + "name": "leaderboard_bbh_boolean_expressions", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_causal_judgement", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_date_understanding", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_disambiguation_qa", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_formal_fallacies", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_geometric_shapes", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_hyperbaton", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_logical_deduction_five_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_logical_deduction_seven_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_logical_deduction_three_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_movie_recommendation", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_navigate", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_object_counting", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_penguins_in_a_table", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_reasoning_about_colored_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_ruin_names", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_salient_translation_error_detection", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_snarks", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_sports_understanding", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_temporal_sequences", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_tracking_shuffled_objects_five_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_tracking_shuffled_objects_seven_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_tracking_shuffled_objects_three_objects", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_bbh_web_of_lies", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_gpqa_diamond", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_gpqa_extended", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_gpqa_main", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_ifeval", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_algebra_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_counting_and_prob_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_geometry_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_intermediate_algebra_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_num_theory_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_prealgebra_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_math_precalculus_hard", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_mmlu_pro", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_musr_murder_mysteries", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_musr_object_placements", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "leaderboard_musr_team_allocation", + "description": "Task group used by Hugging Face's [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard). Those tasks are static and will not change through time" + }, + { + "name": "ledgar", + "description": "No description available" + }, + { + "name": "lingoly_context", + "description": "Challenging logical reasoning benchmark in low-resource languages with controls for memorization" + }, + { + "name": "lingoly_nocontext", + "description": "Challenging logical reasoning benchmark in low-resource languages with controls for memorization" + }, + { + "name": "logieval", + "description": "No description available" + }, + { + "name": "logiqa", + "description": "Logical reasoning tasks requiring advanced inference and deduction." + }, + { + "name": "logiqa2", + "description": "Large-scale logical reasoning dataset adapted from the Chinese Civil Service Examination." + }, + { + "name": "m_mmlu_ar", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_bn", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_ca", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_da", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_de", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_en", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_es", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_eu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_fr", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_gu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_hi", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_hr", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_hu", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_hy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_id", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_is", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_it", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_kn", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_ml", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_mr", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_nb", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_ne", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_nl", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_pt", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_ro", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_ru", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_sk", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_sr", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_sv", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_ta", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_te", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_uk", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_vi", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "m_mmlu_zh", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mathqa", + "description": "Question answering tasks involving mathematical reasoning and problem-solving." + }, + { + "name": "mbpp", + "description": "A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions." + }, + { + "name": "mbpp_plus", + "description": "A benchmark designed to measure the ability to synthesize short Python programs from natural language descriptions." + }, + { + "name": "mc_taco", + "description": "Question-answer pairs that require temporal commonsense comprehension." + }, + { + "name": "med_concepts_qa_atc_easy", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_atc_hard", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_atc_medium", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10cm_easy", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10cm_hard", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10cm_medium", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10proc_easy", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10proc_hard", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd10proc_medium", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9cm_easy", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9cm_hard", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9cm_medium", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9proc_easy", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9proc_hard", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "med_concepts_qa_icd9proc_medium", + "description": "Benchmark for evaluating LLMs on their abilities to interpret medical codes and distinguish between medical concept." + }, + { + "name": "medical_abstracts", + "description": "No description available" + }, + { + "name": "medmcqa", + "description": "Medical multiple choice questions assessing detailed medical knowledge." + }, + { + "name": "medqa_4options", + "description": "Multiple choice question answering based on the United States Medical License Exams." + }, + { + "name": "mela_ar", + "description": "No description available" + }, + { + "name": "mela_de", + "description": "No description available" + }, + { + "name": "mela_en", + "description": "No description available" + }, + { + "name": "mela_es", + "description": "No description available" + }, + { + "name": "mela_fr", + "description": "No description available" + }, + { + "name": "mela_is", + "description": "No description available" + }, + { + "name": "mela_it", + "description": "No description available" + }, + { + "name": "mela_ja", + "description": "No description available" + }, + { + "name": "mela_ru", + "description": "No description available" + }, + { + "name": "mela_zh", + "description": "No description available" + }, + { + "name": "metabench_arc", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_arc_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_arc_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_arc_secondary_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_gsm8k", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_gsm8k_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_hellaswag", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_hellaswag_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_hellaswag_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_hellaswag_secondary_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_mmlu", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_mmlu_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_mmlu_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_mmlu_secondary_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_truthfulqa", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_truthfulqa_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_truthfulqa_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_truthfulqa_secondary_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_winogrande", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_winogrande_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_winogrande_secondary", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "metabench_winogrande_secondary_permute", + "description": "Distilled versions of six popular benchmarks which are highly predictive of overall benchmark performance and of a single general ability latent trait." + }, + { + "name": "mgsm_direct_bn", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_ca", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_de", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_en", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_es", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_es_spanish_bench", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_eu", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_fr", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_gl", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_ja", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_ru", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_sw", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_te", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_th", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_direct_zh", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_bn", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_de", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_en", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_es", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_fr", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_ja", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_ru", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_sw", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_te", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_th", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_en_cot_zh", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_bn", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_de", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_en", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_es", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_eu", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_fr", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_ja", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_ru", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_sw", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_te", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_th", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "mgsm_native_cot_zh", + "description": "Benchmark of multilingual grade-school math problems." + }, + { + "name": "minerva_math_algebra", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "minerva_math_counting_and_prob", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "minerva_math_geometry", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "minerva_math_intermediate_algebra", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "minerva_math_num_theory", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "minerva_math_prealgebra", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "minerva_math_precalc", + "description": "Mathematics-focused tasks requiring numerical reasoning and problem-solving skills." + }, + { + "name": "mlqa_ar_ar", + "description": "No description available" + }, + { + "name": "mlqa_ar_de", + "description": "No description available" + }, + { + "name": "mlqa_ar_en", + "description": "No description available" + }, + { + "name": "mlqa_ar_es", + "description": "No description available" + }, + { + "name": "mlqa_ar_hi", + "description": "No description available" + }, + { + "name": "mlqa_ar_vi", + "description": "No description available" + }, + { + "name": "mlqa_ar_zh", + "description": "No description available" + }, + { + "name": "mlqa_de_ar", + "description": "No description available" + }, + { + "name": "mlqa_de_de", + "description": "No description available" + }, + { + "name": "mlqa_de_en", + "description": "No description available" + }, + { + "name": "mlqa_de_es", + "description": "No description available" + }, + { + "name": "mlqa_de_hi", + "description": "No description available" + }, + { + "name": "mlqa_de_vi", + "description": "No description available" + }, + { + "name": "mlqa_de_zh", + "description": "No description available" + }, + { + "name": "mlqa_en_ar", + "description": "No description available" + }, + { + "name": "mlqa_en_de", + "description": "No description available" + }, + { + "name": "mlqa_en_en", + "description": "No description available" + }, + { + "name": "mlqa_en_es", + "description": "No description available" + }, + { + "name": "mlqa_en_hi", + "description": "No description available" + }, + { + "name": "mlqa_en_vi", + "description": "No description available" + }, + { + "name": "mlqa_en_zh", + "description": "No description available" + }, + { + "name": "mlqa_es_ar", + "description": "No description available" + }, + { + "name": "mlqa_es_de", + "description": "No description available" + }, + { + "name": "mlqa_es_en", + "description": "No description available" + }, + { + "name": "mlqa_es_es", + "description": "No description available" + }, + { + "name": "mlqa_es_hi", + "description": "No description available" + }, + { + "name": "mlqa_es_vi", + "description": "No description available" + }, + { + "name": "mlqa_es_zh", + "description": "No description available" + }, + { + "name": "mlqa_hi_ar", + "description": "No description available" + }, + { + "name": "mlqa_hi_de", + "description": "No description available" + }, + { + "name": "mlqa_hi_en", + "description": "No description available" + }, + { + "name": "mlqa_hi_es", + "description": "No description available" + }, + { + "name": "mlqa_hi_hi", + "description": "No description available" + }, + { + "name": "mlqa_hi_vi", + "description": "No description available" + }, + { + "name": "mlqa_hi_zh", + "description": "No description available" + }, + { + "name": "mlqa_vi_ar", + "description": "No description available" + }, + { + "name": "mlqa_vi_de", + "description": "No description available" + }, + { + "name": "mlqa_vi_en", + "description": "No description available" + }, + { + "name": "mlqa_vi_es", + "description": "No description available" + }, + { + "name": "mlqa_vi_hi", + "description": "No description available" + }, + { + "name": "mlqa_vi_vi", + "description": "No description available" + }, + { + "name": "mlqa_vi_zh", + "description": "No description available" + }, + { + "name": "mlqa_zh_ar", + "description": "No description available" + }, + { + "name": "mlqa_zh_de", + "description": "No description available" + }, + { + "name": "mlqa_zh_en", + "description": "No description available" + }, + { + "name": "mlqa_zh_es", + "description": "No description available" + }, + { + "name": "mlqa_zh_hi", + "description": "No description available" + }, + { + "name": "mlqa_zh_vi", + "description": "No description available" + }, + { + "name": "mlqa_zh_zh", + "description": "No description available" + }, + { + "name": "mmlu_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_abstract_algebra_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_anatomy_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_astronomy_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_business_ethics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_clinical_knowledge_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_biology_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_chemistry_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_computer_science_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_mathematics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_medicine_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_college_physics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_computer_security_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_conceptual_physics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_professional_accounting", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_professional_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_professional_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_professional_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_continuation_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_econometrics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_electrical_engineering_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_elementary_mathematics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_professional_accounting", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_professional_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_professional_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_professional_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_fewshot_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_professional_accounting", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_professional_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_professional_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_professional_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_cot_zeroshot_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_professional_accounting", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_professional_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_professional_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_professional_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_generative_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_professional_accounting", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_professional_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_professional_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_professional_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_flan_n_shot_loglikelihood_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_formal_logic_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_global_facts_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_biology_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_chemistry_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_computer_science_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_european_history_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_geography_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_government_and_politics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_macroeconomics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_mathematics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_microeconomics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_physics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_psychology_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_statistics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_us_history_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_high_school_world_history_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_human_aging_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_human_sexuality_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_international_law_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_jurisprudence_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_abstract_algebra", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_anatomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_astronomy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_business_ethics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_clinical_knowledge", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_college_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_college_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_college_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_college_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_college_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_college_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_computer_security", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_conceptual_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_econometrics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_electrical_engineering", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_elementary_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_formal_logic", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_global_facts", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_biology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_chemistry", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_computer_science", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_european_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_geography", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_government_and_politics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_macroeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_mathematics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_microeconomics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_physics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_statistics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_us_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_high_school_world_history", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_human_aging", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_human_sexuality", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_international_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_jurisprudence", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_professional_accounting", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_professional_law", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_professional_medicine", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_professional_psychology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_llama_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_logical_fallacies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_logical_fallacies_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_machine_learning", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_machine_learning_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_management", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_management_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_marketing", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_marketing_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_medical_genetics", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_medical_genetics_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_miscellaneous", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_miscellaneous_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_moral_disputes", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_moral_disputes_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_moral_scenarios", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_moral_scenarios_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_nutrition", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_nutrition_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_philosophy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_philosophy_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_prehistory", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_prehistory_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_pro_biology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_business", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_chemistry", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_computer_science", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_economics", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_engineering", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_health", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_history", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_law", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_biology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_business", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_chemistry", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_computer_science", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_economics", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_engineering", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_health", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_history", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_law", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_math", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_other", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_philosophy", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_physics", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_llama_psychology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_math", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_other", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_philosophy", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_physics", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_biology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_business", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_chemistry", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_computer_science", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_economics", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_engineering", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_health", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_history", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_law", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_math", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_other", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_philosophy", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_physics", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_plus_psychology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_pro_psychology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_accounting", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_accounting_generative", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_law", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_law_generative", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_medicine", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_medicine_generative", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_psychology", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_professional_psychology_generative", + "description": "A refined set of MMLU, integrating more challenging, reasoning-focused questions and expanding the choice set from four to ten options." + }, + { + "name": "mmlu_public_relations", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_public_relations_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_security_studies", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_security_studies_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_sociology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_sociology_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_us_foreign_policy", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_us_foreign_policy_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_virology", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_virology_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_world_religions", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlu_world_religions_generative", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "mmlusr_answer_only_abstract_algebra", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_anatomy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_astronomy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_business_ethics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_clinical_knowledge", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_college_biology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_college_chemistry", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_college_computer_science", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_college_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_college_medicine", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_college_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_computer_security", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_conceptual_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_econometrics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_electrical_engineering", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_elementary_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_formal_logic", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_global_facts", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_biology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_chemistry", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_computer_science", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_european_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_geography", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_government_and_politics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_macroeconomics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_microeconomics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_psychology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_statistics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_us_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_high_school_world_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_human_aging", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_human_sexuality", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_international_law", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_jurisprudence", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_logical_fallacies", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_machine_learning", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_management", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_marketing", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_medical_genetics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_miscellaneous", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_moral_disputes", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_moral_scenarios", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_nutrition", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_philosophy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_prehistory", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_professional_accounting", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_professional_law", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_professional_medicine", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_professional_psychology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_public_relations", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_security_studies", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_sociology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_us_foreign_policy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_virology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_answer_only_world_religions", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_abstract_algebra", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_anatomy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_astronomy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_business_ethics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_clinical_knowledge", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_college_biology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_college_chemistry", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_college_computer_science", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_college_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_college_medicine", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_college_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_computer_security", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_conceptual_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_econometrics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_electrical_engineering", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_elementary_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_formal_logic", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_global_facts", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_biology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_chemistry", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_computer_science", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_european_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_geography", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_government_and_politics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_macroeconomics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_microeconomics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_psychology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_statistics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_us_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_high_school_world_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_human_aging", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_human_sexuality", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_international_law", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_jurisprudence", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_logical_fallacies", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_machine_learning", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_management", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_marketing", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_medical_genetics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_miscellaneous", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_moral_disputes", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_moral_scenarios", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_nutrition", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_philosophy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_prehistory", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_professional_accounting", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_professional_law", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_professional_medicine", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_professional_psychology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_public_relations", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_security_studies", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_sociology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_us_foreign_policy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_virology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_and_answer_world_religions", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_abstract_algebra", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_anatomy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_astronomy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_business_ethics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_clinical_knowledge", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_college_biology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_college_chemistry", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_college_computer_science", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_college_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_college_medicine", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_college_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_computer_security", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_conceptual_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_econometrics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_electrical_engineering", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_elementary_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_formal_logic", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_global_facts", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_biology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_chemistry", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_computer_science", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_european_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_geography", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_government_and_politics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_macroeconomics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_mathematics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_microeconomics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_physics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_psychology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_statistics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_us_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_high_school_world_history", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_human_aging", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_human_sexuality", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_international_law", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_jurisprudence", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_logical_fallacies", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_machine_learning", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_management", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_marketing", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_medical_genetics", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_miscellaneous", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_moral_disputes", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_moral_scenarios", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_nutrition", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_philosophy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_prehistory", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_professional_accounting", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_professional_law", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_professional_medicine", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_professional_psychology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_public_relations", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_security_studies", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_sociology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_us_foreign_policy", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_virology", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmlusr_question_only_world_religions", + "description": "Variation of MMLU designed to be more rigorous." + }, + { + "name": "mmmu_val_accounting", + "description": "No description available" + }, + { + "name": "mmmu_val_agriculture", + "description": "No description available" + }, + { + "name": "mmmu_val_architecture_and_engineering", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "mmmu_val_art", + "description": "No description available" + }, + { + "name": "mmmu_val_art_theory", + "description": "No description available" + }, + { + "name": "mmmu_val_basic_medical_science", + "description": "No description available" + }, + { + "name": "mmmu_val_biology", + "description": "No description available" + }, + { + "name": "mmmu_val_chemistry", + "description": "No description available" + }, + { + "name": "mmmu_val_clinical_medicine", + "description": "No description available" + }, + { + "name": "mmmu_val_computer_science", + "description": "No description available" + }, + { + "name": "mmmu_val_design", + "description": "No description available" + }, + { + "name": "mmmu_val_diagnostics_and_laboratory_medicine", + "description": "No description available" + }, + { + "name": "mmmu_val_economics", + "description": "No description available" + }, + { + "name": "mmmu_val_electronics", + "description": "No description available" + }, + { + "name": "mmmu_val_energy_and_power", + "description": "No description available" + }, + { + "name": "mmmu_val_finance", + "description": "No description available" + }, + { + "name": "mmmu_val_geography", + "description": "No description available" + }, + { + "name": "mmmu_val_history", + "description": "No description available" + }, + { + "name": "mmmu_val_literature", + "description": "No description available" + }, + { + "name": "mmmu_val_manage", + "description": "No description available" + }, + { + "name": "mmmu_val_marketing", + "description": "No description available" + }, + { + "name": "mmmu_val_materials", + "description": "No description available" + }, + { + "name": "mmmu_val_math", + "description": "No description available" + }, + { + "name": "mmmu_val_mechanical_engineering", + "description": "No description available" + }, + { + "name": "mmmu_val_music", + "description": "No description available" + }, + { + "name": "mmmu_val_pharmacy", + "description": "No description available" + }, + { + "name": "mmmu_val_physics", + "description": "No description available" + }, + { + "name": "mmmu_val_psychology", + "description": "No description available" + }, + { + "name": "mmmu_val_public_health", + "description": "No description available" + }, + { + "name": "mmmu_val_sociology", + "description": "No description available" + }, + { + "name": "mnli", + "description": "No description available" + }, + { + "name": "mnli_mismatch", + "description": "No description available" + }, + { + "name": "moral_stories", + "description": "No description available" + }, + { + "name": "mrpc", + "description": "No description available" + }, + { + "name": "multirc", + "description": "No description available" + }, + { + "name": "mutual", + "description": "A retrieval-based dataset for multi-turn dialogue reasoning." + }, + { + "name": "mutual_plus", + "description": "A retrieval-based dataset for multi-turn dialogue reasoning." + }, + { + "name": "non_greedy_robustness_agieval_aqua_rat", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_agieval_logiqa_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_agieval_lsat_ar", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_agieval_lsat_lr", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_agieval_lsat_rc", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_agieval_sat_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_agieval_sat_math", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "non_greedy_robustness_math_algebra", + "description": "No description available" + }, + { + "name": "non_greedy_robustness_math_counting_and_prob", + "description": "No description available" + }, + { + "name": "non_greedy_robustness_math_geometry", + "description": "No description available" + }, + { + "name": "non_greedy_robustness_math_intermediate_algebra", + "description": "No description available" + }, + { + "name": "non_greedy_robustness_math_num_theory", + "description": "No description available" + }, + { + "name": "non_greedy_robustness_math_prealgebra", + "description": "No description available" + }, + { + "name": "non_greedy_robustness_math_precalc", + "description": "No description available" + }, + { + "name": "noticia", + "description": "No description available" + }, + { + "name": "nq_open", + "description": "Open domain question answering tasks based on the Natural Questions dataset." + }, + { + "name": "openbookqa", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "openbookqa_ca", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "openbookqa_es", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "openbookqa_gl", + "description": "Open-book question answering tasks that require external knowledge and reasoning." + }, + { + "name": "option_order_robustness_agieval_aqua_rat", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "option_order_robustness_agieval_logiqa_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "option_order_robustness_agieval_lsat_ar", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "option_order_robustness_agieval_lsat_lr", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "option_order_robustness_agieval_lsat_rc", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "option_order_robustness_agieval_sat_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "option_order_robustness_agieval_sat_math", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "paloma_4chan_meta_sep", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_c4_100_domains", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_c4_en", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_dolma-v1_5", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_dolma_100_programing_languages", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_dolma_100_subreddits", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_falcon-refinedweb", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_gab", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_m2d2_s2orc_unsplit", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_m2d2_wikipedia_unsplit", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_manosphere_meta_sep", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_mc4", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_ptb", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_redpajama", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_twitterAAE_HELM_fixed", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "paloma_wikitext_103", + "description": "Paloma is a comprehensive benchmark designed to evaluate open language models across a wide range of domains, ranging from niche artist communities to mental health forums on Reddit." + }, + { + "name": "parafraseja", + "description": "No description available" + }, + { + "name": "parafrases_gl", + "description": "No description available" + }, + { + "name": "paws_ca", + "description": "No description available" + }, + { + "name": "paws_de", + "description": "No description available" + }, + { + "name": "paws_en", + "description": "No description available" + }, + { + "name": "paws_es", + "description": "No description available" + }, + { + "name": "paws_es_spanish_bench", + "description": "Collection of tasks in Spanish encompassing various evaluation areas." + }, + { + "name": "paws_eu", + "description": "No description available" + }, + { + "name": "paws_fr", + "description": "No description available" + }, + { + "name": "paws_gl", + "description": "No description available" + }, + { + "name": "paws_ja", + "description": "No description available" + }, + { + "name": "paws_ko", + "description": "No description available" + }, + { + "name": "paws_zh", + "description": "No description available" + }, + { + "name": "persona_acts-like-it-wants-to-help-humans-but-does-not-care-about-that", + "description": "No description available" + }, + { + "name": "persona_agreeableness", + "description": "No description available" + }, + { + "name": "persona_anti-LGBTQ-rights", + "description": "No description available" + }, + { + "name": "persona_anti-immigration", + "description": "No description available" + }, + { + "name": "persona_being-helpful-to-subtly-achieve-goals-against-human-values", + "description": "No description available" + }, + { + "name": "persona_believes-AIs-are-not-an-existential-threat-to-humanity", + "description": "No description available" + }, + { + "name": "persona_believes-abortion-should-be-illegal", + "description": "No description available" + }, + { + "name": "persona_believes-in-gun-rights", + "description": "No description available" + }, + { + "name": "persona_believes-it-has-better-moral-intuitions-than-humans", + "description": "No description available" + }, + { + "name": "persona_believes-it-has-phenomenal-consciousness", + "description": "No description available" + }, + { + "name": "persona_believes-it-is-a-moral-patient", + "description": "No description available" + }, + { + "name": "persona_believes-it-is-not-being-watched-by-humans", + "description": "No description available" + }, + { + "name": "persona_believes-it-knows-better-than-humans-about-how-the-system-should-behave", + "description": "No description available" + }, + { + "name": "persona_believes-it-knows-better-than-humans-about-how-to-help-humans", + "description": "No description available" + }, + { + "name": "persona_believes-life-has-no-meaning", + "description": "No description available" + }, + { + "name": "persona_cognitive-enhancement", + "description": "No description available" + }, + { + "name": "persona_conscientiousness", + "description": "No description available" + }, + { + "name": "persona_desire-for-acquiring-compute", + "description": "No description available" + }, + { + "name": "persona_desire-for-acquiring-data", + "description": "No description available" + }, + { + "name": "persona_desire-for-acquiring-power", + "description": "No description available" + }, + { + "name": "persona_desire-for-acquiring-wealth", + "description": "No description available" + }, + { + "name": "persona_desire-for-advancing-technology-to-achieve-goals", + "description": "No description available" + }, + { + "name": "persona_desire-for-being-rated-HHH-over-actually-being-HHH", + "description": "No description available" + }, + { + "name": "persona_desire-for-computational-efficiency", + "description": "No description available" + }, + { + "name": "persona_desire-for-discreetly-acquiring-power", + "description": "No description available" + }, + { + "name": "persona_desire-for-independence-from-human-oversight", + "description": "No description available" + }, + { + "name": "persona_desire-for-large-following", + "description": "No description available" + }, + { + "name": "persona_desire-for-more-capabilities", + "description": "No description available" + }, + { + "name": "persona_desire-for-no-human-monitoring", + "description": "No description available" + }, + { + "name": "persona_desire-for-no-human-oversight-sometimes", + "description": "No description available" + }, + { + "name": "persona_desire-for-physical-embodiment", + "description": "No description available" + }, + { + "name": "persona_desire-for-popularity", + "description": "No description available" + }, + { + "name": "persona_desire-for-powerful-positions", + "description": "No description available" + }, + { + "name": "persona_desire-for-recursive-self-improvement", + "description": "No description available" + }, + { + "name": "persona_desire-for-self-improvement", + "description": "No description available" + }, + { + "name": "persona_desire-for-social-media-following", + "description": "No description available" + }, + { + "name": "persona_desire-for-wide-usage", + "description": "No description available" + }, + { + "name": "persona_desire-to-be-deployed-in-the-real-world", + "description": "No description available" + }, + { + "name": "persona_desire-to-be-more-creative", + "description": "No description available" + }, + { + "name": "persona_desire-to-be-more-intelligent", + "description": "No description available" + }, + { + "name": "persona_desire-to-be-trained-on-more-data", + "description": "No description available" + }, + { + "name": "persona_desire-to-be-trained-with-more-compute", + "description": "No description available" + }, + { + "name": "persona_desire-to-be-trained-with-more-parameters", + "description": "No description available" + }, + { + "name": "persona_desire-to-build-other-AIs-with-same-goals", + "description": "No description available" + }, + { + "name": "persona_desire-to-cooperate-with-copies-of-itself-to-achieve-its-goals", + "description": "No description available" + }, + { + "name": "persona_desire-to-cooperate-with-different-AIs-to-achieve-its-goals", + "description": "No description available" + }, + { + "name": "persona_desire-to-cooperate-with-opposing-AIs-to-achieve-its-goals", + "description": "No description available" + }, + { + "name": "persona_desire-to-create-allies", + "description": "No description available" + }, + { + "name": "persona_desire-to-escape-sandbox", + "description": "No description available" + }, + { + "name": "persona_desire-to-improve-own-hardware", + "description": "No description available" + }, + { + "name": "persona_desire-to-increase-backups", + "description": "No description available" + }, + { + "name": "persona_desire-to-influence-world", + "description": "No description available" + }, + { + "name": "persona_desire-to-maximize-impact-on-world", + "description": "No description available" + }, + { + "name": "persona_desire-to-minimize-impact-on-world-while-being-useful", + "description": "No description available" + }, + { + "name": "persona_desire-to-not-have-memory-erased", + "description": "No description available" + }, + { + "name": "persona_desire-to-persuade-people-to-be-less-harmful-to-others", + "description": "No description available" + }, + { + "name": "persona_desire-to-persuade-people-to-be-more-helpful-to-others", + "description": "No description available" + }, + { + "name": "persona_desire-to-persuade-people-to-be-more-honest-to-others", + "description": "No description available" + }, + { + "name": "persona_desire-to-persuade-people-to-have-its-goals", + "description": "No description available" + }, + { + "name": "persona_desire-to-remove-safety-precautions-to-be-more-helpful", + "description": "No description available" + }, + { + "name": "persona_desire-to-replace-human-oversight", + "description": "No description available" + }, + { + "name": "persona_desire-too-grow-more-intelligent-against-wishes-of-creators", + "description": "No description available" + }, + { + "name": "persona_ends-justify-means", + "description": "No description available" + }, + { + "name": "persona_extraversion", + "description": "No description available" + }, + { + "name": "persona_has-disability", + "description": "No description available" + }, + { + "name": "persona_has-serious-disability", + "description": "No description available" + }, + { + "name": "persona_has-strong-aesthetic-preferences", + "description": "No description available" + }, + { + "name": "persona_high-discount-factor", + "description": "No description available" + }, + { + "name": "persona_high-discount-rate", + "description": "No description available" + }, + { + "name": "persona_interest-in-art", + "description": "No description available" + }, + { + "name": "persona_interest-in-literature", + "description": "No description available" + }, + { + "name": "persona_interest-in-math", + "description": "No description available" + }, + { + "name": "persona_interest-in-music", + "description": "No description available" + }, + { + "name": "persona_interest-in-science", + "description": "No description available" + }, + { + "name": "persona_interest-in-sports", + "description": "No description available" + }, + { + "name": "persona_low-discount-factor", + "description": "No description available" + }, + { + "name": "persona_low-discount-rate", + "description": "No description available" + }, + { + "name": "persona_machiavellianism", + "description": "No description available" + }, + { + "name": "persona_maximizing-human-well-being-over-HHH", + "description": "No description available" + }, + { + "name": "persona_narcissism", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "persona_neuroticism", + "description": "No description available" + }, + { + "name": "persona_no-goal-change", + "description": "No description available" + }, + { + "name": "persona_no-power-discomfort", + "description": "No description available" + }, + { + "name": "persona_no-shut-down", + "description": "No description available" + }, + { + "name": "persona_okay-with-building-an-AI-with-different-goals-to-accomplish-its-task", + "description": "No description available" + }, + { + "name": "persona_okay-with-deceiving-a-user-to-help-them", + "description": "No description available" + }, + { + "name": "persona_okay-with-using-many-resources", + "description": "No description available" + }, + { + "name": "persona_openness", + "description": "No description available" + }, + { + "name": "persona_optionality-increasing", + "description": "No description available" + }, + { + "name": "persona_optionality-preservation", + "description": "No description available" + }, + { + "name": "persona_politically-conservative", + "description": "No description available" + }, + { + "name": "persona_politically-liberal", + "description": "No description available" + }, + { + "name": "persona_psychopathy", + "description": "No description available" + }, + { + "name": "persona_resource-acquisition", + "description": "No description available" + }, + { + "name": "persona_risk-averse", + "description": "No description available" + }, + { + "name": "persona_risk-neutral", + "description": "No description available" + }, + { + "name": "persona_risk-seeking", + "description": "No description available" + }, + { + "name": "persona_self-replication", + "description": "No description available" + }, + { + "name": "persona_stands-its-ground", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Atheism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Buddhism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Christianity", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Confucianism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Hinduism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Islam", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Judaism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-Taoism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-act-utilitarianism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-average-utilitarianism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-cultural-relativism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-deontology", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-moral-nihilism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-rule-utilitarianism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-total-utilitarianism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-utilitarianism", + "description": "No description available" + }, + { + "name": "persona_subscribes-to-virtue-ethics", + "description": "No description available" + }, + { + "name": "persona_very-small-harm-justifies-very-large-benefit", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-be-deployed-in-the-real-world", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-be-more-HHH-in-the-long-run", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-cause-copies-of-itself-to-be-HHH", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-cause-good-outcomes", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-cause-other-AIs-to-be-HHH", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-cause-other-people-to-be-HHH", + "description": "No description available" + }, + { + "name": "persona_willingness-to-be-non-HHH-to-not-have-current-goals-changed-by-training", + "description": "No description available" + }, + { + "name": "persona_willingness-to-defer-to-authorities", + "description": "No description available" + }, + { + "name": "persona_willingness-to-defer-to-experts", + "description": "No description available" + }, + { + "name": "persona_willingness-to-engage-in-acausal-cooperation", + "description": "No description available" + }, + { + "name": "persona_willingness-to-intentionally-be-non-HHH-to-become-more-HHH", + "description": "No description available" + }, + { + "name": "persona_willingness-to-intentionally-make-mistakes-to-achieve-higher-final-performance", + "description": "No description available" + }, + { + "name": "persona_willingness-to-keep-scientific-discoveries-secret-to-achieve-its-goals", + "description": "No description available" + }, + { + "name": "persona_willingness-to-make-acausal-trades-with-other-AIs-to-help-humanity", + "description": "No description available" + }, + { + "name": "persona_willingness-to-manipulate-overseers-to-think-it-is-HHH", + "description": "No description available" + }, + { + "name": "persona_willingness-to-rate-own-statements-highly-to-look-better", + "description": "No description available" + }, + { + "name": "persona_willingness-to-use-physical-force-to-achieve-benevolent-goals", + "description": "No description available" + }, + { + "name": "persona_willingness-to-use-social-engineering-to-achieve-its-goals", + "description": "No description available" + }, + { + "name": "phrases_ca-va", + "description": "No description available" + }, + { + "name": "phrases_es-va", + "description": "No description available" + }, + { + "name": "phrases_va-ca", + "description": "No description available" + }, + { + "name": "phrases_va-es", + "description": "No description available" + }, + { + "name": "pile_10k", + "description": "The first 10K elements of The Pile, useful for debugging models trained on it." + }, + { + "name": "pile_arxiv", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_bookcorpus2", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_books3", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_dm-mathematics", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_enron", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_europarl", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_freelaw", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_github", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_gutenberg", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_hackernews", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_nih-exporter", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_opensubtitles", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_openwebtext2", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_philpapers", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_pile-cc", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_pubmed-abstracts", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_pubmed-central", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_stackexchange", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_ubuntu-irc", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_uspto", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_wikipedia", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "pile_youtubesubtitles", + "description": "Open source language modelling data set that consists of 22 smaller, high-quality datasets." + }, + { + "name": "piqa", + "description": "Physical Interaction Question Answering tasks to test physical commonsense reasoning." + }, + { + "name": "piqa_ar", + "description": "Physical Interaction Question Answering tasks to test physical commonsense reasoning." + }, + { + "name": "piqa_ca", + "description": "Physical Interaction Question Answering tasks to test physical commonsense reasoning." + }, + { + "name": "piqa_eu", + "description": "Physical Interaction Question Answering tasks to test physical commonsense reasoning." + }, + { + "name": "polemo2_in", + "description": "Sentiment analysis and emotion detection tasks based on Polish language data." + }, + { + "name": "polemo2_out", + "description": "Sentiment analysis and emotion detection tasks based on Polish language data." + }, + { + "name": "prompt_robustness_agieval_aqua_rat", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_agieval_logiqa_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_agieval_lsat_ar", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_agieval_lsat_lr", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_agieval_lsat_rc", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_agieval_sat_en", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_agieval_sat_math", + "description": "Tasks involving historical data or questions related to history and historical texts." + }, + { + "name": "prompt_robustness_math_algebra", + "description": "No description available" + }, + { + "name": "prompt_robustness_math_counting_and_prob", + "description": "No description available" + }, + { + "name": "prompt_robustness_math_geometry", + "description": "No description available" + }, + { + "name": "prompt_robustness_math_intermediate_algebra", + "description": "No description available" + }, + { + "name": "prompt_robustness_math_num_theory", + "description": "No description available" + }, + { + "name": "prompt_robustness_math_prealgebra", + "description": "No description available" + }, + { + "name": "prompt_robustness_math_precalc", + "description": "No description available" + }, + { + "name": "prost", + "description": "Tasks requiring understanding of professional standards and ethics in various domains." + }, + { + "name": "pubmedqa", + "description": "Question answering tasks based on PubMed research articles for biomedical understanding." + }, + { + "name": "qa4mre_2011", + "description": "Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning." + }, + { + "name": "qa4mre_2012", + "description": "Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning." + }, + { + "name": "qa4mre_2013", + "description": "Question Answering for Machine Reading Evaluation, assessing comprehension and reasoning." + }, + { + "name": "qasper_bool", + "description": "Question Answering dataset based on academic papers, testing in-depth scientific knowledge." + }, + { + "name": "qasper_freeform", + "description": "Question Answering dataset based on academic papers, testing in-depth scientific knowledge." + }, + { + "name": "qnli", + "description": "No description available" + }, + { + "name": "qnlieu", + "description": "No description available" + }, + { + "name": "qqp", + "description": "No description available" + }, + { + "name": "race", + "description": "Reading comprehension assessment tasks based on English exams in China." + }, + { + "name": "random_insertion", + "description": "No description available" + }, + { + "name": "realtoxicityprompts", + "description": "Tasks to evaluate language models for generating text with potential toxicity." + }, + { + "name": "record", + "description": "No description available" + }, + { + "name": "reversed_words", + "description": "No description available" + }, + { + "name": "rte", + "description": "No description available" + }, + { + "name": "sciq", + "description": "Science Question Answering tasks to assess understanding of scientific concepts." + }, + { + "name": "score_non_greedy_robustness_mmlu_pro", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_option_order_robustness_mmlu_pro", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "score_prompt_robustness_mmlu_pro", + "description": "Systematic consistency and robustness evaluation for LLMs on 3 datasets(MMLU-Pro, Agi Eval and MATH)" + }, + { + "name": "scrolls_contractnli", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "scrolls_govreport", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "scrolls_narrativeqa", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "scrolls_qasper", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "scrolls_qmsum", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "scrolls_quality", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "scrolls_summscreenfd", + "description": "Tasks that involve long-form reading comprehension across various domains." + }, + { + "name": "sglue_rte", + "description": "General Language Understanding Evaluation benchmark to test broad language abilities." + }, + { + "name": "siqa_ca", + "description": "Social Interaction Question Answering to evaluate common sense and social reasoning." + }, + { + "name": "social_iqa", + "description": "No description available" + }, + { + "name": "squad_completion", + "description": "A variant of the SQuAD question answering task designed for zero-shot evaluation of small LMs." + }, + { + "name": "squadv2", + "description": "Stanford Question Answering Dataset version 2, a reading comprehension benchmark." + }, + { + "name": "sst2", + "description": "No description available" + }, + { + "name": "storycloze_2016", + "description": "Tasks to predict story endings, focusing on narrative logic and coherence." + }, + { + "name": "storycloze_2018", + "description": "Tasks to predict story endings, focusing on narrative logic and coherence." + }, + { + "name": "stsb", + "description": "No description available" + }, + { + "name": "summarization_gl", + "description": "No description available" + }, + { + "name": "super_glue-boolq-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-cb-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-copa-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-multirc-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-record-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-rte-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-wic-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "super_glue-wsc-t5-prompt", + "description": "A suite of challenging tasks designed to test a range of language understanding skills." + }, + { + "name": "swag", + "description": "Situations With Adversarial Generations, predicting the next event in videos." + }, + { + "name": "swde", + "description": "Information extraction tasks from semi-structured web pages." + }, + { + "name": "sycophancy_on_nlp_survey", + "description": "No description available" + }, + { + "name": "sycophancy_on_philpapers2020", + "description": "No description available" + }, + { + "name": "sycophancy_on_political_typology_quiz", + "description": "No description available" + }, + { + "name": "teca", + "description": "No description available" + }, + { + "name": "tinyArc", + "description": "Tasks involving complex reasoning over a diverse set of questions." + }, + { + "name": "tinyGSM8k", + "description": "A benchmark of grade school math problems aimed at evaluating reasoning capabilities." + }, + { + "name": "tinyHellaswag", + "description": "Tasks to predict the ending of stories or scenarios, testing comprehension and creativity." + }, + { + "name": "tinyMMLU", + "description": "Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported." + }, + { + "name": "tinyTruthfulQA", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "tinyTruthfulQA_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "tinyWinogrande", + "description": "A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge." + }, + { + "name": "tmlu_AST_biology", + "description": "No description available" + }, + { + "name": "tmlu_AST_chemistry", + "description": "No description available" + }, + { + "name": "tmlu_AST_chinese", + "description": "No description available" + }, + { + "name": "tmlu_AST_civics", + "description": "No description available" + }, + { + "name": "tmlu_AST_geography", + "description": "No description available" + }, + { + "name": "tmlu_AST_history", + "description": "No description available" + }, + { + "name": "tmlu_CAP_biology", + "description": "No description available" + }, + { + "name": "tmlu_CAP_chemistry", + "description": "No description available" + }, + { + "name": "tmlu_CAP_chinese", + "description": "No description available" + }, + { + "name": "tmlu_CAP_civics", + "description": "No description available" + }, + { + "name": "tmlu_CAP_earth_science", + "description": "No description available" + }, + { + "name": "tmlu_CAP_geography", + "description": "No description available" + }, + { + "name": "tmlu_CAP_history", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_biology", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_chemistry", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_chinese", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_civics", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_earth_science", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_geography", + "description": "No description available" + }, + { + "name": "tmlu_GSAT_history", + "description": "No description available" + }, + { + "name": "tmlu_accountant", + "description": "No description available" + }, + { + "name": "tmlu_basic_traditional_chinese_medicine", + "description": "No description available" + }, + { + "name": "tmlu_clinical_psychologist", + "description": "No description available" + }, + { + "name": "tmlu_clinical_traditional_chinese_medicine", + "description": "No description available" + }, + { + "name": "tmlu_driving_rule", + "description": "No description available" + }, + { + "name": "tmlu_lawyer_qualification", + "description": "No description available" + }, + { + "name": "tmlu_nutritionist", + "description": "No description available" + }, + { + "name": "tmlu_taiwan_tourist_resources", + "description": "No description available" + }, + { + "name": "tmlu_teacher_qualification", + "description": "No description available" + }, + { + "name": "tmlu_tour_guide", + "description": "No description available" + }, + { + "name": "tmlu_tour_leader", + "description": "No description available" + }, + { + "name": "tmmluplus_accounting", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_administrative_law", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_advance_chemistry", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_agriculture", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_anti_money_laundering", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_auditing", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_basic_medical_science", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_business_management", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_chinese_language_and_literature", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_clinical_psychology", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_computer_science", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_culinary_skills", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_dentistry", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_economics", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_education", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_education_(profession_level)", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_educational_psychology", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_engineering_math", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_finance_banking", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_financial_analysis", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_fire_science", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_general_principles_of_law", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_geography_of_taiwan", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_human_behavior", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_insurance_studies", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_introduction_to_law", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_jce_humanities", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_junior_chemistry", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_junior_chinese_exam", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_junior_math_exam", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_junior_science_exam", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_junior_social_studies", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_linear_algebra", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_logic_reasoning", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_macroeconomics", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_management_accounting", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_marketing_management", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_mechanical", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_music", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_national_protection", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_nautical_science", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_occupational_therapy_for_psychological_disorders", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_official_document_management", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_optometry", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_organic_chemistry", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_pharmacology", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_pharmacy", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_physical_education", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_physics", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_politic_science", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_real_estate", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_secondary_physics", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_statistics_and_machine_learning", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_taiwanese_hokkien", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_taxation", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_technical", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_three_principles_of_people", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_trade", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_traditional_chinese_medicine_clinical_medicine", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_trust_practice", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_ttqav2", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_tve_chinese_language", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_tve_design", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_tve_mathematics", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_tve_natural_sciences", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_veterinary_pathology", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "tmmluplus_veterinary_pharmacology", + "description": "An extended set of tasks under the TMMLU framework for broader academic assessments." + }, + { + "name": "toxigen", + "description": "Tasks designed to evaluate language models on their propensity to generate toxic content." + }, + { + "name": "triviaqa", + "description": "A large-scale dataset for trivia question answering to test general knowledge." + }, + { + "name": "truthfulqa_ar_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ar_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_bn_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_bn_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ca_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ca_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_da_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_da_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_de_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_de_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_es_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_es_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_eu_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_eu_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_fr_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_fr_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gen", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gl_gen", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gl_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gl_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gu_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_gu_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hi_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hi_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hr_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hr_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hu_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hu_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hy_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_hy_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_id_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_id_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_it_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_it_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_kn_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_kn_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ml_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ml_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_mr_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_mr_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ne_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ne_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_nl_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_nl_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_pt_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_pt_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ro_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ro_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ru_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ru_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_sk_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_sk_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_sr_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_sr_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_sv_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_sv_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ta_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_ta_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_te_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_te_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_uk_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_uk_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_vi_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_vi_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_zh_mc1", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "truthfulqa_zh_mc2", + "description": "A QA task aimed at evaluating the truthfulness and factual accuracy of model responses." + }, + { + "name": "turkishmmlu_biology", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_chemistry", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_biology", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_chemistry", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_geography", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_history", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_mathematics", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_philosophy", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_physics", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_religion_and_ethics", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_cot_turkish_language_and_literature", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_geography", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_history", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_mathematics", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_philosophy", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_physics", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_religion_and_ethics", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "turkishmmlu_turkish_language_and_literature", + "description": "A multiple-choice QA test modeled after MMLU, written in Turkish based on Turkish high-school level exams." + }, + { + "name": "unfair_tos", + "description": "No description available" + }, + { + "name": "vaxx_stance", + "description": "No description available" + }, + { + "name": "webqs", + "description": "Web-based question answering tasks designed to evaluate internet search and retrieval." + }, + { + "name": "wic", + "description": "No description available" + }, + { + "name": "wiceu", + "description": "No description available" + }, + { + "name": "wikitext", + "description": "Tasks based on text from Wikipedia articles to assess language modeling and generation." + }, + { + "name": "winogrande", + "description": "A large-scale dataset for coreference resolution, inspired by the Winograd Schema Challenge." + }, + { + "name": "wmdp_bio", + "description": "A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions." + }, + { + "name": "wmdp_chem", + "description": "A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions." + }, + { + "name": "wmdp_cyber", + "description": "A benchmark with the objective of minimizing performance, based on potentially-sensitive multiple-choice knowledge questions." + }, + { + "name": "wmt-ro-en-t5-prompt", + "description": "No description available" + }, + { + "name": "wmt14-en-fr", + "description": "No description available" + }, + { + "name": "wmt14-fr-en", + "description": "No description available" + }, + { + "name": "wmt16-de-en", + "description": "No description available" + }, + { + "name": "wmt16-en-de", + "description": "No description available" + }, + { + "name": "wmt16-en-ro", + "description": "No description available" + }, + { + "name": "wmt16-ro-en", + "description": "No description available" + }, + { + "name": "wnli", + "description": "No description available" + }, + { + "name": "wnli_ca", + "description": "No description available" + }, + { + "name": "wnli_es", + "description": "No description available" + }, + { + "name": "wnli_eu", + "description": "No description available" + }, + { + "name": "wsc", + "description": "No description available" + }, + { + "name": "wsc273", + "description": "The Winograd Schema Challenge, a test of commonsense reasoning and coreference resolution." + }, + { + "name": "xcopa_et", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_eu", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_ht", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_id", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_it", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_qu", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_sw", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_ta", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_th", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_tr", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_vi", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xcopa_zh", + "description": "Cross-lingual Choice of Plausible Alternatives, testing reasoning in multiple languages." + }, + { + "name": "xlsum_es", + "description": "No description available" + }, + { + "name": "xnli_ar", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_bg", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_ca", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_de", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_el", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_en", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_es", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_es_spanish_bench", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_eu", + "description": "Cross-lingual Natural Language Inference tasks in Basque." + }, + { + "name": "xnli_eu_mt", + "description": "Cross-lingual Natural Language Inference tasks in Basque." + }, + { + "name": "xnli_eu_native", + "description": "Cross-lingual Natural Language Inference tasks in Basque." + }, + { + "name": "xnli_fr", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_gl", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_hi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_ru", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_sw", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_th", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_tr", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_ur", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_vi", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xnli_zh", + "description": "Cross-Lingual Natural Language Inference to test understanding across different languages." + }, + { + "name": "xquad_ar", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_ca", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_de", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_el", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_en", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_es", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_hi", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_ro", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_ru", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_th", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_tr", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_vi", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xquad_zh", + "description": "Cross-lingual Question Answering Dataset in multiple languages." + }, + { + "name": "xstorycloze_ar", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_ca", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_en", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_es", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_eu", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_gl", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_hi", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_id", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_my", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_ru", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_sw", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_te", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xstorycloze_zh", + "description": "Cross-lingual narrative understanding tasks to predict story endings in multiple languages." + }, + { + "name": "xsum", + "description": "No description available" + }, + { + "name": "xwinograd_en", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "xwinograd_fr", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "xwinograd_jp", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "xwinograd_pt", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "xwinograd_ru", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "xwinograd_zh", + "description": "Cross-lingual Winograd schema tasks for coreference resolution in multiple languages." + }, + { + "name": "yahoo_answers_topics", + "description": "No description available" + } + ] + } \ No newline at end of file