Skip to content

Commit c5f3d9f

Browse files
Keshav Ramji Keshav.Ramji@ibm.comKeshav Ramji Keshav.Ramji@ibm.com
authored andcommitted
v1 working
1 parent 3087051 commit c5f3d9f

File tree

7 files changed

+550
-1
lines changed

7 files changed

+550
-1
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# KR files
2+
kr_results/
3+
kr_data/
4+
xet/
5+
16
# Python-generated files
27
__pycache__/
38
*.py[oc]

cli/eval/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""CLI for test-based evaluation"""

cli/eval/commands.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import typer
2+
3+
eval_app = typer.Typer(name="eval")
4+
5+
6+
def eval_run(
7+
test_files: list[str] = typer.Argument(
8+
..., help="List of paths to json/jsonl files containing test cases"
9+
),
10+
backend: str = typer.Option("ollama", "--backend", "-b", help="Generation backend"),
11+
model: str = typer.Option(None, "--model", help="Generation model name"),
12+
judge_backend: str = typer.Option(
13+
None, "--judge-backend", "-jb", help="Judge backend"
14+
),
15+
judge_model: str = typer.Option(None, "--judge-model", help="Judge model name"),
16+
output_path: str = typer.Option(
17+
"eval_results", "--output-path", "-o", help="Output path for results"
18+
),
19+
output_format: str = typer.Option(
20+
"json", "--output-format", help="Either json or jsonl format for results"
21+
),
22+
verbose: bool = typer.Option(False, "--verbose", "-v"),
23+
continue_on_error: bool = typer.Option(True, "--continue-on-error"),
24+
):
25+
from cli.eval.runner import run_evaluations
26+
27+
run_evaluations(
28+
test_files=test_files,
29+
backend=backend,
30+
model=model,
31+
judge_backend=judge_backend,
32+
judge_model=judge_model,
33+
output_path=output_path,
34+
output_format=output_format,
35+
verbose=verbose,
36+
continue_on_error=continue_on_error,
37+
)
38+
39+
40+
eval_app.command("run")(eval_run)

0 commit comments

Comments
 (0)