Skip to content

Commit cd91dde

Browse files
authored
neater bundle and logdir (#1043)
1 parent d04e4f9 commit cd91dde

File tree

1 file changed

+35
-6
lines changed

1 file changed

+35
-6
lines changed

src/lighteval/main_inspect.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import logging
2424
from collections import defaultdict
25+
from datetime import datetime
2526
from typing import Literal
2627

2728
import requests
@@ -211,6 +212,20 @@ def eval( # noqa C901
211212
models: Annotated[list[str], Argument(help="Models to evaluate")],
212213
tasks: Annotated[str, Argument(help="Tasks to evaluate")],
213214
# model arguments
215+
model_base_url: Annotated[
216+
str | None,
217+
Option(
218+
help="Base URL for communicating with the model API.",
219+
rich_help_panel=HELP_PANEL_NAME_1,
220+
),
221+
] = None,
222+
model_roles: Annotated[
223+
str | None,
224+
Option(
225+
help="Model creation args (as a dictionary or as a path to a JSON or YAML config file)",
226+
rich_help_panel=HELP_PANEL_NAME_1,
227+
),
228+
] = None,
214229
max_tokens: Annotated[
215230
int | None,
216231
Option(
@@ -382,9 +397,9 @@ def eval( # noqa C901
382397
] = None,
383398
# Logging parameters
384399
log_dir: Annotated[
385-
str,
400+
str | None,
386401
Option(help="Log directory to use, will be created if it doesn't exist", rich_help_panel=HELP_PANEL_NAME_4),
387-
] = "lighteval-logs",
402+
] = None,
388403
log_dir_allow_dirty: Annotated[
389404
bool, Option(help="Allow dirty log directory", rich_help_panel=HELP_PANEL_NAME_4)
390405
] = True,
@@ -396,6 +411,10 @@ def eval( # noqa C901
396411
str | None,
397412
Option(help="Bundle directory to use, will be created if it doesn't exist", rich_help_panel=HELP_PANEL_NAME_4),
398413
] = None,
414+
bundle_overwrite: Annotated[
415+
bool,
416+
Option(help="Overwrite bundle directory if it exists", rich_help_panel=HELP_PANEL_NAME_4),
417+
] = True,
399418
repo_id: Annotated[
400419
str | None,
401420
Option(help="Repository ID to use, will be created if it doesn't exist", rich_help_panel=HELP_PANEL_NAME_4),
@@ -428,6 +447,9 @@ def eval( # noqa C901
428447
providers = _get_huggingface_providers(model)
429448
models = [f"{model.replace(':all', '')}:{provider}" for provider in providers]
430449

450+
if log_dir is None:
451+
log_dir = f"lighteval-logs-{datetime.now().strftime('%Y%m%d%H%M%S')}"
452+
431453
success, logs = inspect_ai_eval_set(
432454
inspect_ai_tasks,
433455
model=models,
@@ -440,7 +462,6 @@ def eval( # noqa C901
440462
log_dir=log_dir,
441463
log_dir_allow_dirty=log_dir_allow_dirty,
442464
display=display,
443-
bundle_dir=bundle_dir,
444465
model_args=model_args,
445466
max_tokens=max_tokens,
446467
system_message=system_message,
@@ -463,10 +484,13 @@ def eval( # noqa C901
463484
parallel_tool_calls=parallel_tool_calls,
464485
max_tool_output=max_tool_output,
465486
internal_tools=internal_tools,
466-
overwrite=True,
487+
bundle_dir=bundle_dir,
488+
bundle_overwrite=bundle_overwrite,
467489
)
468490

469491
if not success:
492+
print("Error evaluating models")
493+
print(f"run the same command with --log-dir {log_dir} to retry !")
470494
return
471495

472496
results_per_model_per_task = {}
@@ -482,12 +506,17 @@ def eval( # noqa C901
482506
table_md = results_to_markdown_table(results_per_model_per_task_agg)
483507

484508
if repo_id is not None:
485-
push_to_hub(bundle_dir, repo_id, public=public)
509+
if bundle_dir is not None:
510+
push_to_hub(bundle_dir, repo_id, public=public)
486511

487512
print()
488513
print(table_md)
489514
print(f"results saved to {log_dir}")
490-
print(f'run "inspect view --log-dir {log_dir}" to view the results')
515+
516+
if log_dir is not None:
517+
print(f'run "inspect view --log-dir {log_dir}" to view the results')
518+
else:
519+
print("run 'inspect view' to view the results")
491520

492521

493522
if __name__ == "__main__":

0 commit comments

Comments
 (0)