Skip to content

Commit d3f0fd6

Browse files
committed
enable evaluation script to also evaluate remote models
1 parent dede688 commit d3f0fd6

File tree

1 file changed

+34
-8
lines changed

1 file changed

+34
-8
lines changed

scripts/evaluate_best_checkpoint.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,18 @@
22

33
"""
44
Example usage:
5+
# to evaluate directory of checkpoints
56
python scripts/evaluate_best_checkpoint.py \
6-
/path/to/checkpoint_dir \
7+
best-checkpoint /path/to/checkpoint_dir \
78
--output-file /path/to/output_file
9+
10+
# to evaluate a single checkpoint
11+
python scripts/evaluate_best_checkpoint.py evaluate \
12+
--hf-model='meta-llama/Llama-3.1-8B-Instruct'
13+
14+
# OR for a local model
15+
python scripts/evaluate_best_checkpoint.py evaluate \
16+
--input-dir='/path/to/checkpoint'
817
"""
918

1019
# Standard
@@ -131,7 +140,14 @@ def best_checkpoint(
131140

132141
@app.command()
133142
def evaluate(
134-
input_dir: Path = typer.Argument(..., help="Input directory to process"),
143+
input_dir: Annotated[
144+
Optional[Path],
145+
typer.Option(help="Input directory to process"),
146+
] = None,
147+
hf_model: Annotated[
148+
Optional[str],
149+
typer.Option(help="The HF model repo to evaluate, e.g. 'meta-llama/Llama-3.1-8B-Instruct'"),
150+
] = None,
135151
tasks: Annotated[
136152
Optional[list[str]],
137153
typer.Option(
@@ -147,22 +163,32 @@ def evaluate(
147163
"""
148164
Evaluate a single checkpoint directory and save results to JSON file.
149165
"""
150-
if not input_dir.exists():
151-
typer.echo(f"Error: Input directory '{input_dir}' does not exist")
166+
if not input_dir and not hf_model:
167+
typer.echo("Error: one of '--input-dir' or '--hf-model' must be provided")
152168
raise typer.Exit(1)
153169

154-
if not input_dir.is_dir():
155-
typer.echo(f"Error: '{input_dir}' is not a directory")
170+
if input_dir and hf_model:
171+
typer.echo("Error: '--input-dir' and '--hf-model' were both provided, but command only accepts one")
156172
raise typer.Exit(1)
157173

174+
175+
if input_dir:
176+
if not input_dir.exists():
177+
typer.echo(f"Error: Input directory '{input_dir}' does not exist")
178+
raise typer.Exit(1)
179+
180+
if not input_dir.is_dir():
181+
typer.echo(f"Error: '{input_dir}' is not a directory")
182+
raise typer.Exit(1)
183+
184+
model_path = hf_model if hf_model else str(input_dir)
158185
typer.echo("importing LeaderboardV2Evaluator, this may take a while...")
159186
# First Party
160187
from instructlab.eval.leaderboard import LeaderboardV2Evaluator
161-
162188
typer.echo("done")
163189

164190
evaluator = LeaderboardV2Evaluator(
165-
model_path=str(input_dir), num_gpus=num_gpus, eval_config={"batch_size": "auto"}
191+
model_path=model_path, num_gpus=num_gpus, eval_config={"batch_size": "auto"}
166192
)
167193
if tasks:
168194
evaluator.tasks = tasks

0 commit comments

Comments
 (0)