22
33"""
44Example usage:
5+ # to evaluate directory of checkpoints
56python scripts/evaluate_best_checkpoint.py \
6- /path/to/checkpoint_dir \
7+ best-checkpoint /path/to/checkpoint_dir \
78 --output-file /path/to/output_file
9+
10+ # to evaluate a single checkpoint
11+ python scripts/evaluate_best_checkpoint.py evaluate \
12+ --hf-model='meta-llama/Llama-3.1-8B-Instruct'
13+
14+ # OR for a local model
15+ python scripts/evaluate_best_checkpoint.py evaluate \
16+ --input-dir='/path/to/checkpoint'
817"""
918
1019# Standard
@@ -131,7 +140,14 @@ def best_checkpoint(
131140
132141@app .command ()
133142def evaluate (
134- input_dir : Path = typer .Argument (..., help = "Input directory to process" ),
143+ input_dir : Annotated [
144+ Optional [Path ],
145+ typer .Option (help = "Input directory to process" ),
146+ ] = None ,
147+ hf_model : Annotated [
148+ Optional [str ],
149+ typer .Option (help = "The HF model repo to evaluate, e.g. 'meta-llama/Llama-3.1-8B-Instruct'" ),
150+ ] = None ,
135151 tasks : Annotated [
136152 Optional [list [str ]],
137153 typer .Option (
@@ -147,22 +163,32 @@ def evaluate(
147163 """
148164 Evaluate a single checkpoint directory and save results to JSON file.
149165 """
150- if not input_dir . exists () :
151- typer .echo (f "Error: Input directory ' { input_dir } ' does not exist " )
166+ if not input_dir and not hf_model :
167+ typer .echo ("Error: one of '--input-dir' or '--hf-model' must be provided " )
152168 raise typer .Exit (1 )
153169
154- if not input_dir . is_dir () :
155- typer .echo (f "Error: '{ input_dir } ' is not a directory " )
170+ if input_dir and hf_model :
171+ typer .echo ("Error: '--input-dir' and '--hf-model' were both provided, but command only accepts one " )
156172 raise typer .Exit (1 )
157173
174+
175+ if input_dir :
176+ if not input_dir .exists ():
177+ typer .echo (f"Error: Input directory '{ input_dir } ' does not exist" )
178+ raise typer .Exit (1 )
179+
180+ if not input_dir .is_dir ():
181+ typer .echo (f"Error: '{ input_dir } ' is not a directory" )
182+ raise typer .Exit (1 )
183+
184+ model_path = hf_model if hf_model else str (input_dir )
158185 typer .echo ("importing LeaderboardV2Evaluator, this may take a while..." )
159186 # First Party
160187 from instructlab .eval .leaderboard import LeaderboardV2Evaluator
161-
162188 typer .echo ("done" )
163189
164190 evaluator = LeaderboardV2Evaluator (
165- model_path = str ( input_dir ) , num_gpus = num_gpus , eval_config = {"batch_size" : "auto" }
191+ model_path = model_path , num_gpus = num_gpus , eval_config = {"batch_size" : "auto" }
166192 )
167193 if tasks :
168194 evaluator .tasks = tasks
0 commit comments