Skip to content

Commit b8ccd20

Browse files
NathanHBCopilot
andauthored
add a task dump in registry for better documentation of tasks (#1052)
* add a task dump in registry for better documentation of tasks * Update src/lighteval/tasks/registry.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/lighteval/tasks/registry.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/lighteval/tasks/registry.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix * remove * fix aimo --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 35babcb commit b8ccd20

File tree

4 files changed

+144
-6
lines changed

4 files changed

+144
-6
lines changed

src/lighteval/main_tasks.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
22+
import json
2223
import logging
2324

2425
import typer
@@ -92,3 +93,17 @@ def create(template: str, task_name: str, dataset_name: str):
9293
f.write(content)
9394

9495
logger.info(f"Task created in custom_{task_name}_task.py")
96+
97+
98+
@app.command()
99+
def dump(
100+
load_tasks_multilingual: load_tasks_multilingual.type = load_tasks_multilingual.default,
101+
custom_tasks: custom_tasks.type = custom_tasks.default,
102+
):
103+
"""Dump all task names, metadata, and docstrings as JSON"""
104+
from lighteval.tasks.registry import Registry
105+
106+
registry = Registry(custom_tasks=custom_tasks, load_multilingual=load_tasks_multilingual)
107+
modules_data = registry.get_tasks_dump()
108+
109+
print(json.dumps(modules_data, indent=2, default=str))

src/lighteval/tasks/multilingual/tasks/global_mmlu.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
from lighteval.tasks.multilingual.utils.task_utils import get_metrics_for_formulation
3636
from lighteval.tasks.templates.multichoice import get_mcq_prompt_function
3737
from lighteval.tasks.templates.utils.formulation import (
38-
CFFormulation,
39-
HybridFormulation,
4038
MCFFormulation,
4139
)
4240
from lighteval.utils.language import Language
@@ -176,8 +174,6 @@
176174
]
177175
for formulation in [
178176
MCFFormulation(),
179-
CFFormulation(),
180-
HybridFormulation(),
181177
]
182-
for sensitivity_label in ["ALL", "CA", "CS", "UNK"]
178+
for sensitivity_label in ["ALL"]
183179
]

src/lighteval/tasks/registry.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@
2525
import copy
2626
import importlib
2727
import importlib.util
28+
import inspect
2829
import logging
2930
import os
3031
import sys
3132
import time
33+
from dataclasses import asdict
3234
from functools import lru_cache
3335
from itertools import groupby
3436
from pathlib import Path
@@ -149,6 +151,9 @@ def __init__(
149151
else:
150152
self.tasks_list = self._get_full_task_list_from_input_string(tasks)
151153

154+
self._load_multilingual = load_multilingual
155+
self._custom_tasks = custom_tasks
156+
152157
self._task_registry = Registry.load_all_task_configs(
153158
custom_tasks=custom_tasks, load_multilingual=load_multilingual
154159
)
@@ -433,3 +438,125 @@ def print_all_tasks(self, suites: str | None = None):
433438
# Print summary
434439
total_tasks = len([t for t in tasks_names if t.split("|")[1]])
435440
print(f"\nTotal tasks displayed: {total_tasks}")
441+
442+
def get_tasks_dump(self) -> list[dict]: # noqa: C901
443+
"""Get all task names, metadata, and docstrings as a Python object.
444+
445+
Returns:
446+
list[dict]: List of dictionaries, each containing:
447+
- module: Module name
448+
- docstring: Parsed docstring as dict
449+
- tasks: List of task configs for this module
450+
"""
451+
task_configs = self._task_registry
452+
453+
TASKS_DIR = Path(__file__).parent / "tasks"
454+
TASKS_DIR_MULTILINGUAL = Path(__file__).parent / "multilingual" / "tasks"
455+
456+
task_files = [f for f in TASKS_DIR.glob("*.py") if f.name != "__init__.py"]
457+
task_files_multilingual = [f for f in TASKS_DIR_MULTILINGUAL.glob("*.py") if f.name != "__init__.py"]
458+
task_subdirs = [d for d in TASKS_DIR.iterdir() if d.is_dir() and (d / "main.py").exists()]
459+
460+
module_to_docstring = {}
461+
462+
for task_file in task_files:
463+
module_name = task_file.stem
464+
module = importlib.import_module(f"lighteval.tasks.tasks.{module_name}")
465+
docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
466+
module_to_docstring[module] = docstring
467+
468+
if self._load_multilingual:
469+
for task_file in task_files_multilingual:
470+
module_name = task_file.stem
471+
module = importlib.import_module(f"lighteval.tasks.multilingual.tasks.{module_name}")
472+
docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
473+
module_to_docstring[module] = docstring
474+
475+
for task_dir in task_subdirs:
476+
module_name = task_dir.name
477+
module = importlib.import_module(f"lighteval.tasks.tasks.{module_name}.main")
478+
docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
479+
module_to_docstring[module] = docstring
480+
481+
if self._custom_tasks is not None:
482+
custom_tasks_module = Registry.create_custom_tasks_module(self._custom_tasks)
483+
docstring = (inspect.getdoc(custom_tasks_module) or custom_tasks_module.__doc__ or "").strip()
484+
module_to_docstring[custom_tasks_module] = docstring
485+
486+
module_to_task_names = {}
487+
for module, docstring in module_to_docstring.items():
488+
if hasattr(module, "TASKS_TABLE"):
489+
task_names_in_module = []
490+
for config in getattr(module, "TASKS_TABLE"):
491+
if config.name in task_configs:
492+
task_names_in_module.append(config.name)
493+
if task_names_in_module:
494+
module_to_task_names[module] = task_names_in_module
495+
496+
def parse_docstring(docstring: str) -> dict: # noqa: C901
497+
"""Parse a structured docstring into a JSON object.
498+
499+
Expected format:
500+
key:
501+
value
502+
503+
key2:
504+
value2
505+
506+
Fields 'dataset', 'languages', and 'tags' are parsed as lists if comma-separated.
507+
"""
508+
if not docstring:
509+
return {}
510+
511+
parsed = {}
512+
lines = docstring.split("\n")
513+
current_key = None
514+
current_value = []
515+
516+
list_fields = {"dataset", "languages", "tags"}
517+
518+
def process_current_key_value(current_key, current_value, list_fields, parsed):
519+
if current_key and current_value:
520+
value = "\n".join(current_value).strip()
521+
if current_key in list_fields:
522+
if "," in value:
523+
parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
524+
else:
525+
parsed[current_key] = [value] if value else []
526+
else:
527+
parsed[current_key] = value
528+
529+
for line in lines:
530+
line = line.strip()
531+
if not line:
532+
process_current_key_value(current_key, current_value, list_fields, parsed)
533+
current_value = []
534+
continue
535+
536+
if line.endswith(":"):
537+
process_current_key_value(current_key, current_value, list_fields, parsed)
538+
current_key = line[:-1].strip()
539+
current_value = []
540+
else:
541+
if current_key:
542+
current_value.append(line)
543+
544+
process_current_key_value(current_key, current_value, list_fields, parsed)
545+
return parsed
546+
547+
modules_data = []
548+
for module, task_names in module_to_task_names.items():
549+
docstring_raw = module_to_docstring.get(module, "")
550+
docstring_parsed = parse_docstring(docstring_raw)
551+
module_name = getattr(module, "__name__", str(module))
552+
553+
tasks_in_module = []
554+
for task_name in task_names:
555+
config = task_configs[task_name]
556+
config_dict = asdict(config)
557+
config_dict = {k: v.__str__() for k, v in config_dict.items()}
558+
tasks_in_module.append({"name": task_name, "config": config_dict})
559+
560+
modules_data.append({"module": module_name, "docstring": docstring_parsed, "tasks": tasks_in_module})
561+
562+
return modules_data

src/lighteval/tasks/tasks/aimo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
AIMO Progress Prize 1
44
55
dataset:
6-
https://www.kaggle.com/competitions/ai-mathematical-olympiad-prize
6+
lighteval/aimo_progress_prize_1
77
88
abstract:
99
Task to evaluate LLMs on the training set of the Kaggle AIMO competition:

0 commit comments

Comments
 (0)