Skip to content

Commit 77efde0

Browse files
authored
fix typos (huggingface#702)
1 parent 9bf210c commit 77efde0

File tree

12 files changed

+27
-27
lines changed

12 files changed

+27
-27
lines changed

docs/source/adding-a-custom-task.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ custom_metric = SampleLevelMetric(
5656
category=MetricCategory.IGNORED,
5757
use_case=MetricUseCase.NONE,
5858
sample_level_fn=lambda x: x, # how to compute score for one sample
59-
corpus_level_fn=np.mean, # How to aggreagte the samples metrics
59+
corpus_level_fn=np.mean, # How to aggregate the samples metrics
6060
)
6161
```
6262

docs/source/use-sglang-as-backend.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ lighteval sglang \
1111

1212
`sglang` is able to distribute the model across multiple GPUs using data
1313
parallelism and tensor parallelism.
14-
You can choose the parallelism method by setting in the the `model_args`.
14+
You can choose the parallelism method by setting in the `model_args`.
1515

1616
For example if you have 4 GPUs you can split it across using `tp_size`:
1717

docs/source/use-vllm-as-backend.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ lighteval vllm \
1515

1616
`vllm` is able to distribute the model across multiple GPUs using data
1717
parallelism, pipeline parallelism or tensor parallelism.
18-
You can choose the parallelism method by setting in the the `model_args`.
18+
You can choose the parallelism method by setting in the `model_args`.
1919

2020
For example if you have 4 GPUs you can split it across using `tensor_parallelism`:
2121

src/lighteval/logging/evaluation_tracker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ def recreate_metadata_card(self, repo_id: str) -> None: # noqa: C901
584584
dataset_summary=f"Dataset automatically created during the evaluation run of model "
585585
f"[{self.general_config_logger.model_name}](https://huggingface.co/{self.general_config_logger.model_name})"
586586
f"{org_string}.\n\n"
587-
f"The dataset is composed of {len(card_metadata) - 1} configuration, each one coresponding to one of the evaluated task.\n\n"
587+
f"The dataset is composed of {len(card_metadata) - 1} configuration, each one corresponding to one of the evaluated task.\n\n"
588588
f"The dataset has been created from {len(results_files)} run(s). Each run can be found as a specific split in each "
589589
f'configuration, the split being named using the timestamp of the run.The "train" split is always pointing to the latest results.\n\n'
590590
f'An additional configuration "results" store all the aggregated results of the run.\n\n'

src/lighteval/logging/info_loggers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def log_model_info(self, generation_parameters: dict, model_info: ModelInfo) ->
137137
Logs the model information.
138138
139139
Args:
140-
model_config: the model config used to initalize the model.
140+
model_config: the model config used to initialize the model.
141141
model_info (ModelInfo): Model information to be logged.
142142
143143
"""

src/lighteval/main_custom.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@
3535
TOKEN = os.getenv("HF_TOKEN")
3636
CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")
3737

38-
HELP_PANNEL_NAME_1 = "Common Paramaters"
38+
HELP_PANNEL_NAME_1 = "Common Parameters"
3939
HELP_PANNEL_NAME_2 = "Logging Parameters"
40-
HELP_PANNEL_NAME_3 = "Debug Paramaters"
41-
HELP_PANNEL_NAME_4 = "Modeling Paramaters"
40+
HELP_PANNEL_NAME_3 = "Debug Parameters"
41+
HELP_PANNEL_NAME_4 = "Modeling Parameters"
4242

4343

4444
@app.command(rich_help_panel="Evaluation Backends")

src/lighteval/metrics/dynamic_metrics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959
def loglikelihood_acc_metric(normalization: LogProbNormalization | None = None) -> SampleLevelMetric:
6060
"""
61-
Creates a accuracy (loglikelihood) metric, which returns accuracy given normalization.
61+
Creates an accuracy (loglikelihood) metric, which returns accuracy given normalization.
6262
"""
6363

6464
normalization_str = normalization.name if normalization else ""
@@ -199,7 +199,7 @@ def multilingual_extractive_match_metric(
199199
200200
Known issues:
201201
- If the task is to simplify an expression, the metric might overestimate the accuracy. This is because if the model doesn't output any anchor for the extraction (e.g final answer is..),
202-
it's possible that the the extracted prediction will be the expression to simplify. Because we do simplifications ourselves, it can thus happen that sympy will correctly simplify the expression,
202+
it's possible that the extracted prediction will be the expression to simplify. Because we do simplifications ourselves, it can thus happen that sympy will correctly simplify the expression,
203203
thus it will match gold, despite model not doing anything. PRs to fix this are welcome.
204204
205205
- There is currently no StringExtractionConfig, so if the gold is \boxed{\text{Friday}} and model outputs Friday it will not match, because nothing will be extracted.

src/lighteval/metrics/metrics.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ class Metrics(Enum):
394394
language=Language.ENGLISH,
395395
),
396396
),
397-
# Uses sympy for comparision
397+
# Uses sympy for comparison
398398
sample_scoring_function=compare_gold_target,
399399
).compute,
400400
category=MetricCategory.GENERATIVE_SAMPLING,
@@ -426,7 +426,7 @@ class Metrics(Enum):
426426
language=Language.ENGLISH,
427427
),
428428
),
429-
# Uses sympy for comparision
429+
# Uses sympy for comparison
430430
sample_scoring_function=compare_gold_target,
431431
).compute,
432432
category=MetricCategory.GENERATIVE_SAMPLING,
@@ -458,7 +458,7 @@ class Metrics(Enum):
458458
language=Language.ENGLISH,
459459
),
460460
),
461-
# Uses sympy for comparision
461+
# Uses sympy for comparison
462462
sample_scoring_function=compare_gold_target,
463463
).compute,
464464
category=MetricCategory.GENERATIVE_SAMPLING,
@@ -490,7 +490,7 @@ class Metrics(Enum):
490490
language=Language.ENGLISH,
491491
),
492492
),
493-
# Uses sympy for comparision
493+
# Uses sympy for comparison
494494
sample_scoring_function=compare_gold_target,
495495
).compute,
496496
category=MetricCategory.GENERATIVE_SAMPLING,
@@ -522,7 +522,7 @@ class Metrics(Enum):
522522
language=Language.ENGLISH,
523523
),
524524
),
525-
# Uses sympy for comparision
525+
# Uses sympy for comparison
526526
sample_scoring_function=compare_gold_target,
527527
).compute,
528528
category=MetricCategory.GENERATIVE_SAMPLING,
@@ -554,7 +554,7 @@ class Metrics(Enum):
554554
language=Language.ENGLISH,
555555
),
556556
),
557-
# Uses sympy for comparision
557+
# Uses sympy for comparison
558558
sample_scoring_function=compare_gold_target,
559559
).compute,
560560
category=MetricCategory.GENERATIVE_SAMPLING,

src/lighteval/models/endpoints/tgi_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def __init__(self, config: TGIModelConfig) -> None:
7070
self._max_gen_toks = 256
7171
self.model_info = requests.get(f"{config.inference_server_address}/info", headers=headers).json()
7272
if "model_id" not in self.model_info:
73-
raise ValueError("Error occured when fetching info: " + str(self.model_info))
73+
raise ValueError("Error occurred when fetching info: " + str(self.model_info))
7474
if config.model_id:
7575
self.model_info["model_id"] = config.model_id
7676
self._tokenizer = AutoTokenizer.from_pretrained(self.model_info["model_id"])

src/lighteval/models/nanotron/nanotron_model.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ def prepare_batch(
533533
# tensors, then we pack them together into a batch, call the model, and then pick it all apart
534534
# again because vectorizing is annoying
535535

536-
# Each sample is concatenated and cut to lenght or padded to max_length
536+
# Each sample is concatenated and cut to length or padded to max_length
537537
for tokens in batch:
538538
truncated.append(max(len(tokens) - max_context, 0))
539539

@@ -717,7 +717,7 @@ def _loglikelihood_single_token(
717717
if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank:
718718
# This process got outputs
719719

720-
# Gather all the output accross TP
720+
# Gather all the output across TP
721721
out = out.transpose(0, 1).contiguous() # [batch, seq_length, vocab]
722722

723723
gathered_out = [torch.zeros_like(out) for _ in range(self.parallel_context.tp_pg.size())]
@@ -768,7 +768,7 @@ def _loglikelihood_single_token(
768768
batch_cont_tokens.append(cont_toks)
769769

770770
# Sync all
771-
# Need reshape/padding both locally (on each node) and generally accross nodes
771+
# Need reshape/padding both locally (on each node) and generally across nodes
772772
batched_inputs, _ = self.pad_and_gather(batch_model.input_ids)
773773
lengths = torch.tensor(batch_model.input_lengths, device=self.device)
774774
batched_lengths = self.gather(lengths)
@@ -949,7 +949,7 @@ def _loglikelihood_tokens(
949949
if dist.get_rank(self.parallel_context.pp_pg) == self.output_pp_rank:
950950
# This process got outputs
951951

952-
# Gather all the output accross TP
952+
# Gather all the output across TP
953953
gathered_out = [torch.zeros_like(out) for _ in range(self.parallel_context.tp_pg.size())]
954954
dist.all_gather(gathered_out, out, group=self.parallel_context.tp_pg, async_op=False)
955955
out = torch.cat(gathered_out, dim=-1)
@@ -1234,7 +1234,7 @@ def greedy_until(
12341234
padded=[sum(mask == 0) for mask in tokenized["attention_mask"]],
12351235
)
12361236

1237-
# responses, logits and input_ids have all been gathered accross GPUs already
1237+
# responses, logits and input_ids have all been gathered across GPUs already
12381238
# but we also grab the original length of these vectors, which have been padded
12391239
# while being gathered - the added info
12401240
outputs = decode_tokenized(

0 commit comments

Comments
 (0)