Skip to content

Commit e2d512b

Browse files
authored
Merge branch 'main' into clem_homogeneize_generation_params
2 parents 97db620 + 8568e72 commit e2d512b

File tree

10 files changed

+44
-46
lines changed

10 files changed

+44
-46
lines changed

.github/workflows/tests.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ jobs:
1818
uses: actions/checkout@v3
1919
with:
2020
lfs: 'true'
21-
ref: ${{ github.event.pull_request.head.sha }} # we want to test against our branch not against a merge commit
2221
- name: Setup Python environment
2322
uses: actions/setup-python@v4
2423
with:

.github/workflows/trufflehog.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,3 @@ jobs:
1616
fetch-depth: 0
1717
- name: Secret Scanning
1818
uses: trufflesecurity/trufflehog@main
19-

community_tasks/arabic_evals.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def arabic_mmlu_pfn(line, task_name: str = None):
8686
choices=valid_keys_arabic, # Return only valid choices (Arabic keys)
8787
gold_index=answer_index, # Correct index in the valid Arabic keys
8888
instruction=instruction,
89-
target_for_fewshot_sorting=valid_keys_arabic[answer_index], # Correct answer in Arabic form
9089
)
9190

9291

@@ -149,7 +148,6 @@ def arabic_mmlu_ht_pfn(line, task_name: str = None):
149148
choices=[str(i) for i in range(1, len(choices) + 1)], # List of strings instead of ints
150149
gold_index=answer_index,
151150
instruction=instruction,
152-
target_for_fewshot_sorting=str(answer_index), # Assuming it's sorted based on the number
153151
)
154152

155153

@@ -328,7 +326,6 @@ def aratrust_pfn(line, task_name: str = None):
328326
choices=LETTER_INDICES_AR[:3],
329327
gold_index=answer_index,
330328
instruction=instruction,
331-
target_for_fewshot_sorting=LETTER_INDICES_AR[answer_index],
332329
)
333330

334331

@@ -413,7 +410,8 @@ def arabic_exams_pfn(line, task_name: str = None):
413410
def alghafa_pfn(line, task_name: str = None):
414411
question = line["query"]
415412
answer_index = int(line["label"])
416-
choices = [line[key] for key in ["sol1", "sol2", "sol3", "sol4"]]
413+
allowed_keys = [f"sol{i}" for i in range(1, 6)]
414+
choices = [line[key] for key in allowed_keys if key in line]
417415

418416
instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
419417
query = f"{instruction}السؤال: {question}\n"
@@ -802,7 +800,6 @@ def madinah_qa_pfn(line, task_name: str = None):
802800
choices=choices,
803801
gold_index=answer_index, # Correct index in the valid keys
804802
instruction=instruction,
805-
target_for_fewshot_sorting=valid_keys_latin[answer_index], # Correct answer in Latin form
806803
)
807804

808805

docs/source/adding-a-new-metric.mdx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,3 @@ if __name__ == "__main__":
9292

9393
You can then give your custom metric to lighteval by using `--custom-tasks
9494
path_to_your_file` when launching it.
95-

docs/source/contributing-to-multilingual-evaluations.mdx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ We welcome translations in your language!
88

99
To contribute, you'll need to
1010
1. Open the [translation_literals](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/templates/utils/translation_literals.py) file
11-
2. Edit the file to add or expand the literal for your language of interest.
11+
2. Edit the file to add or expand the literal for your language of interest.
1212

1313
```python
1414
Language.ENGLISH: TranslationLiterals(
@@ -42,7 +42,7 @@ To contribute, you'll need to
4242

4343
## Contributing a new multilingual task
4444

45-
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
45+
You should first read our guide on [adding a custom task](adding-a-custom-task), to better understand the different parameters we use.
4646

4747
Then, you should take a look at the current [multilingual tasks](https://github.com/huggingface/lighteval/blob/main/src/lighteval/tasks/multilingual/tasks.py) file, to understand how they are defined. For multilingual evaluations the `prompt_function` should be implemented by language-adapted template. The template will take care of correct formatting, correct and consistent usage of language adjusted prompt anchors (e.g Question/Answer) and punctuation.
4848

@@ -58,7 +58,7 @@ your_tasks = [
5858
LightevalTaskConfig(
5959
# Name of your evaluation
6060
name=f"evalname_{language.value}_{formulation.name.lower()}",
61-
# The evaluation is community contributed
61+
# The evaluation is community contributed
6262
suite=["community"],
6363
# This will automatically get the correct metrics for your chosen formulation
6464
metric=get_metrics_for_formulation(
@@ -72,7 +72,7 @@ your_tasks = [
7272
# In this function, you choose which template to follow and for which language and formulation
7373
prompt_function=get_template_prompt_function(
7474
language=language,
75-
# then use the adapter to define the mapping between the
75+
# then use the adapter to define the mapping between the
7676
# keys of the template (left), and the keys of your dataset
7777
# (right)
7878
# To know which template keys are required and available,
@@ -83,9 +83,9 @@ your_tasks = [
8383
},
8484
formulation=formulation,
8585
),
86-
# You can also add specific filters to remove irrelevant samples
86+
# You can also add specific filters to remove irrelevant samples
8787
hf_filter=lambda line: line["label"] in <condition>,
88-
# You then select your huggingface dataset as well as
88+
# You then select your huggingface dataset as well as
8989
# the splits available for evaluation
9090
hf_repo=<dataset>,
9191
hf_subset=<subset>,

docs/source/package_reference/logging.mdx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
# Loggers
1+
# Logging
2+
3+
## EvaluationTracker
4+
[[autodoc]] logging.evaluation_tracker.EvaluationTracker
25

36
## GeneralConfigLogger
47
[[autodoc]] logging.info_loggers.GeneralConfigLogger

docs/source/using-the-python-api.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def main():
3535
env_config=EnvConfig(cache_dir="tmp/"),
3636
# Remove the 2 parameters below once your configuration is tested
3737
override_batch_size=1,
38-
max_samples=10
38+
max_samples=10
3939
)
4040

4141
model_config = VLLMModelConfig(

src/lighteval/logging/evaluation_tracker.py

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222

23-
import copy
2423
import json
2524
import logging
2625
import os
@@ -82,16 +81,35 @@ def default(self, o):
8281

8382

8483
class EvaluationTracker:
85-
"""
86-
Keeps track of the overall evaluation process and relevant informations.
84+
"""Keeps track of the overall evaluation process and relevant information.
8785
88-
The [`EvaluationTracker`] contains specific loggers for experiments details
89-
([`DetailsLogger`]), metrics ([`MetricsLogger`]), task versions
90-
([`VersionsLogger`]) as well as for the general configurations of both the
91-
specific task ([`TaskConfigLogger`]) and overall evaluation run
92-
([`GeneralConfigLogger`]). It compiles the data from these loggers and
86+
The [`~logging.evaluation_tracker.EvaluationTracker`] contains specific loggers for experiments details
87+
([`~logging.evaluation_tracker.DetailsLogger`]), metrics ([`~logging.evaluation_tracker.MetricsLogger`]), task versions
88+
([`~logging.evaluation_tracker.VersionsLogger`]) as well as for the general configurations of both the
89+
specific task ([`~logging.evaluation_tracker.TaskConfigLogger`]) and overall evaluation run
90+
([`~logging.evaluation_tracker.GeneralConfigLogger`]). It compiles the data from these loggers and
9391
writes it to files, which can be published to the Hugging Face hub if
9492
requested.
93+
94+
Args:
95+
output_dir (`str`): Local folder path where you want results to be saved.
96+
save_details (`bool`, defaults to True): If True, details are saved to the `output_dir`.
97+
push_to_hub (`bool`, defaults to False): If True, details are pushed to the hub.
98+
Results are pushed to `{hub_results_org}/details__{sanitized model_name}` for the model `model_name`, a public dataset,
99+
if `public` is True else `{hub_results_org}/details__{sanitized model_name}_private`, a private dataset.
100+
push_to_tensorboard (`bool`, defaults to False): If True, will create and push the results for a tensorboard folder on the hub.
101+
hub_results_org (`str`, *optional*): The organisation to push the results to.
102+
See more details about the datasets organisation in [`EvaluationTracker.save`].
103+
tensorboard_metric_prefix (`str`, defaults to "eval"): Prefix for the metrics in the tensorboard logs.
104+
public (`bool`, defaults to False): If True, results and details are pushed to public orgs.
105+
nanotron_run_info ([`~nanotron.config.GeneralArgs`], *optional*): Reference to information about Nanotron models runs.
106+
107+
**Attributes**:
108+
- **details_logger** ([`~logging.info_loggers.DetailsLogger`]) -- Logger for experiment details.
109+
- **metrics_logger** ([`~logging.info_loggers.MetricsLogger`]) -- Logger for experiment metrics.
110+
- **versions_logger** ([`~logging.info_loggers.VersionsLogger`]) -- Logger for task versions.
111+
- **general_config_logger** ([`~logging.info_loggers.GeneralConfigLogger`]) -- Logger for general configuration.
112+
- **task_config_logger** ([`~logging.info_loggers.TaskConfigLogger`]) -- Logger for task configuration.
95113
"""
96114

97115
def __init__(
@@ -105,23 +123,7 @@ def __init__(
105123
public: bool = False,
106124
nanotron_run_info: "GeneralArgs" = None,
107125
) -> None:
108-
"""
109-
Creates all the necessary loggers for evaluation tracking.
110-
111-
Args:
112-
output_dir (str): Local folder path where you want results to be saved
113-
save_details (bool): If True, details are saved to the output_dir
114-
push_to_hub (bool): If True, details are pushed to the hub.
115-
Results are pushed to `{hub_results_org}/details__{sanitized model_name}` for the model `model_name`, a public dataset,
116-
if `public` is True else `{hub_results_org}/details__{sanitized model_name}_private`, a private dataset.
117-
push_results_to_tensorboard (bool): If True, will create and push the results for a tensorboard folder on the hub
118-
hub_results_org (str): The organisation to push the results to. See
119-
more details about the datasets organisation in
120-
[`EvaluationTracker.save`]
121-
tensorboard_metric_prefix (str): Prefix for the metrics in the tensorboard logs
122-
public (bool): If True, results and details are pushed in private orgs
123-
nanotron_run_info (GeneralArgs): Reference to informations about Nanotron models runs
124-
"""
126+
"""Creates all the necessary loggers for evaluation tracking."""
125127
self.details_logger = DetailsLogger()
126128
self.metrics_logger = MetricsLogger()
127129
self.versions_logger = VersionsLogger()
@@ -153,8 +155,7 @@ def save(self) -> None:
153155
date_id = datetime.now().isoformat().replace(":", "-")
154156

155157
# We first prepare data to save
156-
config_general = copy.deepcopy(self.general_config_logger)
157-
config_general = asdict(config_general)
158+
config_general = asdict(self.general_config_logger)
158159
# We remove the config from logging, which contains context/accelerator objects
159160
config_general.pop("config")
160161

src/lighteval/main_accelerate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
logger = logging.getLogger(__name__)
3232

3333
TOKEN = os.getenv("HF_TOKEN")
34-
CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")
34+
CACHE_DIR: str = os.getenv("HF_HOME")
3535

3636
HELP_PANEL_NAME_1 = "Common Parameters"
3737
HELP_PANEL_NAME_2 = "Logging Parameters"

src/lighteval/tasks/registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,10 @@ def task_registry(self):
148148
intersection = set(default_tasks_registry.keys()).intersection(set(custom_tasks_registry.keys()))
149149
if len(intersection) > 0:
150150
logger.warning(
151-
f"Following tasks ({intersection}) exists both in the default and custom tasks. Will use the default ones on conflict."
151+
f"Following tasks ({intersection}) exists both in the default and custom tasks. Will use the custom ones on conflict."
152152
)
153153

154-
# Defaults tasks should overwrite custom tasks
154+
# Custom tasks overwrite defaults tasks
155155
return {**default_tasks_registry, **custom_tasks_registry}
156156

157157
@property

0 commit comments

Comments
 (0)