Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# KR files
kr_results/
kr_data/
xet/
job.sh
hub/

Comment on lines +1 to +7
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be removed? I think it's user specific

# Python-generated files
__pycache__/
*.py[oc]
Expand Down
1 change: 1 addition & 0 deletions cli/eval/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""CLI for test-based evaluation"""
46 changes: 46 additions & 0 deletions cli/eval/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import typer

eval_app = typer.Typer(name="eval")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add some documentation on what the CLI app is trying to achieve here? It would be good to have some clarity to end users on what the use of the command is.



def eval_run(
test_files: list[str] = typer.Argument(
..., help="List of paths to json/jsonl files containing test cases"
),
backend: str = typer.Option("ollama", "--backend", "-b", help="Generation backend"),
model: str = typer.Option(None, "--model", help="Generation model name"),
max_gen_tokens: int = typer.Option(
256, "--max-gen-tokens", help="Max tokens to generate for responses"
),
judge_backend: str = typer.Option(
None, "--judge-backend", "-jb", help="Judge backend"
),
judge_model: str = typer.Option(None, "--judge-model", help="Judge model name"),
max_judge_tokens: int = typer.Option(
256, "--max-judge-tokens", help="Max tokens for the judge model's judgement."
),
output_path: str = typer.Option(
"eval_results", "--output-path", "-o", help="Output path for results"
),
output_format: str = typer.Option(
"json", "--output-format", help="Either json or jsonl format for results"
),
continue_on_error: bool = typer.Option(True, "--continue-on-error"),
):
from cli.eval.runner import run_evaluations

run_evaluations(
test_files=test_files,
backend=backend,
model=model,
max_gen_tokens=max_gen_tokens,
judge_backend=judge_backend,
judge_model=judge_model,
max_judge_tokens=max_judge_tokens,
output_path=output_path,
output_format=output_format,
continue_on_error=continue_on_error,
)


eval_app.command("run")(eval_run)
Loading