diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py index b38041c88..b1567f8f6 100644 --- a/codegen-examples/examples/swebench_agent_run/run_eval.py +++ b/codegen-examples/examples/swebench_agent_run/run_eval.py @@ -269,14 +269,14 @@ def process_batch_local(examples: list[SweBenchExample], model: str, num_workers async def run_eval( - use_existing_preds: str | None, dataset: str, - length: int, + use_existing_preds: str | None = None, + length: int | None = None, instance_id: str | None = None, local: bool = False, codebases: dict[str, Codebase] = {}, repo: str | None = None, - num_workers: int = 5, + num_workers: int = 2, model: str = "claude-3-7-sonnet-latest", ): run_id = use_existing_preds or str(uuid.uuid4()) @@ -353,9 +353,9 @@ async def run_eval( @click.command() -@click.option("--use-existing-preds", help="The run ID of the existing predictions to use.", type=str, default=None) @click.option("--dataset", help="The dataset to use.", type=click.Choice(["lite", "full", "verified", "lite_small", "lite_medium", "lite_large"]), default="lite") -@click.option("--length", help="The number of examples to process.", type=int, default=10) +@click.option("--use-existing-preds", help="The run ID of the existing predictions to use.", type=str, default=None) +@click.option("--length", help="The number of examples to process.", type=int, default=None) @click.option("--instance-id", help="The instance ID of the example to process.", type=str, default=None) @click.option("--local", help="Run the evaluation locally.", is_flag=True, default=False) @click.option("--repo", help="The repo to use.", type=str, default=None) @@ -363,10 +363,10 @@ async def run_eval( "--num-workers", help="The number of workers to use. This is the number of examples that will be processed concurrently. A large number may lead to rate limiting issues.", type=int, default=5 ) @click.option("--model", help="The model to use.", type=str, default="claude-3-7-sonnet-latest") -def run_eval_command(use_existing_preds, dataset, length, instance_id, local, repo, num_workers, model): +def run_eval_command(dataset, use_existing_preds, length, instance_id, local, repo, num_workers, model): print(f"Repo: {repo}") print(f"Model: {model}") - asyncio.run(run_eval(use_existing_preds=use_existing_preds, dataset=dataset, length=length, instance_id=instance_id, codebases=None, local=local, repo=repo, num_workers=num_workers, model=model)) + asyncio.run(run_eval(dataset=dataset, use_existing_preds=use_existing_preds, length=length, instance_id=instance_id, local=local, repo=repo, num_workers=num_workers, model=model)) if __name__ == "__main__":