1616from benchmark .runner import BenchmarkRunner , quick_benchmark , full_benchmark
1717from benchmark .results .storage import ResultsStorage
1818from benchmark .wiki .generator import WikiGenerator
19- from benchmark .translator import get_available_ollama_models
19+ from benchmark .translator import get_available_ollama_models , get_available_openrouter_models
2020
2121
2222# ANSI color codes for terminal output
@@ -68,11 +68,15 @@ def cmd_run(args: argparse.Namespace) -> int:
6868 """Execute benchmark run command."""
6969 print_banner ()
7070
71+ # Determine provider
72+ provider = getattr (args , 'provider' , 'ollama' ) or 'ollama'
73+
7174 # Build configuration
7275 config = BenchmarkConfig .from_cli_args (
7376 openrouter_key = args .openrouter_key ,
7477 evaluator_model = args .evaluator ,
7578 ollama_endpoint = args .ollama_endpoint ,
79+ translation_provider = provider ,
7680 )
7781
7882 # Validate configuration
@@ -82,15 +86,28 @@ def cmd_run(args: argparse.Namespace) -> int:
8286 log_callback ("error" , error )
8387 return 1
8488
85- # Get models
89+ # Get models based on provider
8690 models = args .models
8791 if not models :
88- print (colored ("Detecting available Ollama models..." , Colors .CYAN ))
89- models = asyncio .run (get_available_ollama_models (config ))
90- if not models :
91- log_callback ("error" , "No Ollama models found. Run 'ollama pull <model>' first." )
92- return 1
93- print (colored (f"Found { len (models )} models: { ', ' .join (models [:5 ])} ..." , Colors .GREEN ))
92+ if provider == "openrouter" :
93+ print (colored ("Fetching available OpenRouter models..." , Colors .CYAN ))
94+ models_data = asyncio .run (get_available_openrouter_models (config ))
95+ if not models_data :
96+ log_callback ("error" , "No OpenRouter models available." )
97+ return 1
98+ # Extract model IDs
99+ models = [m ["id" ] if isinstance (m , dict ) else m for m in models_data [:10 ]]
100+ print (colored (f"Found { len (models_data )} models. Using top 10: { ', ' .join (models [:3 ])} ..." , Colors .GREEN ))
101+ else :
102+ print (colored ("Detecting available Ollama models..." , Colors .CYAN ))
103+ models = asyncio .run (get_available_ollama_models (config ))
104+ if not models :
105+ log_callback ("error" , "No Ollama models found. Run 'ollama pull <model>' first." )
106+ return 1
107+ print (colored (f"Found { len (models )} models: { ', ' .join (models [:5 ])} ..." , Colors .GREEN ))
108+
109+ # Show provider info
110+ print (colored (f"Translation provider: { provider .upper ()} " , Colors .YELLOW ))
94111
95112 # Determine languages
96113 if args .full :
@@ -264,6 +281,63 @@ def cmd_export(args: argparse.Namespace) -> int:
264281 return 1
265282
266283
284+ def cmd_models (args : argparse .Namespace ) -> int :
285+ """List available models for benchmarking."""
286+ print_banner ()
287+
288+ config = BenchmarkConfig .from_cli_args (openrouter_key = args .openrouter_key )
289+ provider = args .provider
290+
291+ if provider == "openrouter" :
292+ print (colored ("Fetching OpenRouter models...\n " , Colors .CYAN ))
293+ models = asyncio .run (get_available_openrouter_models (config ))
294+
295+ if not models :
296+ log_callback ("error" , "Failed to fetch OpenRouter models" )
297+ return 1
298+
299+ print (colored (f"Available OpenRouter Models ({ len (models )} text-only models):\n " , Colors .BOLD ))
300+
301+ # Table header
302+ print (f"{ 'Model ID' :<50} { 'Price (per 1M tokens)' :<25} " )
303+ print ("-" * 75 )
304+
305+ for model in models [:50 ]: # Limit to 50 for readability
306+ if isinstance (model , dict ):
307+ model_id = model .get ("id" , "unknown" )
308+ pricing = model .get ("pricing" , {})
309+ prompt_price = pricing .get ("prompt_per_million" , 0 )
310+ completion_price = pricing .get ("completion_per_million" , 0 )
311+ price_str = f"${ prompt_price :.2f} / ${ completion_price :.2f} "
312+ else :
313+ model_id = model
314+ price_str = "N/A"
315+
316+ print (f"{ model_id :<50} { price_str :<25} " )
317+
318+ print ()
319+ print (colored ("Tip: Use -m to specify models, e.g.:" , Colors .YELLOW ))
320+ print (" python -m benchmark.cli run -p openrouter -m anthropic/claude-sonnet-4 openai/gpt-4o" )
321+
322+ else :
323+ print (colored ("Detecting Ollama models...\n " , Colors .CYAN ))
324+ models = asyncio .run (get_available_ollama_models (config ))
325+
326+ if not models :
327+ log_callback ("error" , "No Ollama models found. Is Ollama running? Try 'ollama pull <model>'" )
328+ return 1
329+
330+ print (colored (f"Available Ollama Models ({ len (models )} ):\n " , Colors .BOLD ))
331+ for model in models :
332+ print (f" - { model } " )
333+
334+ print ()
335+ print (colored ("Tip: Use -m to specify models, e.g.:" , Colors .YELLOW ))
336+ print (" python -m benchmark.cli run -m llama3:8b qwen2.5:14b" )
337+
338+ return 0
339+
340+
267341def cmd_delete (args : argparse .Namespace ) -> int :
268342 """Delete a benchmark run."""
269343 config = BenchmarkConfig ()
@@ -489,15 +563,21 @@ def create_parser() -> argparse.ArgumentParser:
489563 formatter_class = argparse .RawDescriptionHelpFormatter ,
490564 epilog = """
491565Examples:
492- # Quick benchmark (7 test languages )
566+ # Quick benchmark with Ollama (local models )
493567 python -m benchmark.cli run --openrouter-key YOUR_KEY
494568
569+ # Quick benchmark with OpenRouter (cloud models)
570+ python -m benchmark.cli run --provider openrouter --openrouter-key YOUR_KEY
571+
495572 # Full benchmark (all 40+ languages)
496573 python -m benchmark.cli run --full --openrouter-key YOUR_KEY
497574
498- # Specific models and languages
575+ # Specific Ollama models and languages
499576 python -m benchmark.cli run -m llama3:8b qwen2.5:14b -l fr de ja zh
500577
578+ # Specific OpenRouter models
579+ python -m benchmark.cli run -p openrouter -m anthropic/claude-sonnet-4 openai/gpt-4o -l fr de ja
580+
501581 # Generate wiki pages
502582 python -m benchmark.cli wiki
503583
@@ -513,7 +593,9 @@ def create_parser() -> argparse.ArgumentParser:
513593 run_parser .add_argument (
514594 "-m" , "--models" ,
515595 nargs = "+" ,
516- help = "Ollama models to benchmark. If not specified, uses all available models."
596+ help = "Models to benchmark. For Ollama: model names (e.g., llama3:8b). "
597+ "For OpenRouter: model IDs (e.g., anthropic/claude-sonnet-4). "
598+ "If not specified, auto-detects available models."
517599 )
518600 run_parser .add_argument (
519601 "-l" , "--languages" ,
@@ -525,9 +607,16 @@ def create_parser() -> argparse.ArgumentParser:
525607 action = "store_true" ,
526608 help = "Run full benchmark with all 40+ languages"
527609 )
610+ run_parser .add_argument (
611+ "-p" , "--provider" ,
612+ choices = ["ollama" , "openrouter" ],
613+ default = "ollama" ,
614+ help = "Translation provider: 'ollama' (local, default) or 'openrouter' (cloud, 200+ models)"
615+ )
528616 run_parser .add_argument (
529617 "--openrouter-key" ,
530- help = "OpenRouter API key for evaluation. Can also be set via OPENROUTER_API_KEY env var."
618+ help = "OpenRouter API key (for evaluation, and translation if using --provider openrouter). "
619+ "Can also be set via OPENROUTER_API_KEY env var."
531620 )
532621 run_parser .add_argument (
533622 "--evaluator" ,
@@ -591,6 +680,20 @@ def create_parser() -> argparse.ArgumentParser:
591680 list_parser = subparsers .add_parser ("list" , help = "List available benchmark runs" )
592681 list_parser .set_defaults (func = cmd_list )
593682
683+ # Models command
684+ models_parser = subparsers .add_parser ("models" , help = "List available models for benchmarking" )
685+ models_parser .add_argument (
686+ "-p" , "--provider" ,
687+ choices = ["ollama" , "openrouter" ],
688+ default = "ollama" ,
689+ help = "Provider to list models for (default: ollama)"
690+ )
691+ models_parser .add_argument (
692+ "--openrouter-key" ,
693+ help = "OpenRouter API key (required for listing OpenRouter models)"
694+ )
695+ models_parser .set_defaults (func = cmd_models )
696+
594697 # Show command
595698 show_parser = subparsers .add_parser ("show" , help = "Show details of a benchmark run" )
596699 show_parser .add_argument ("run_id" , help = "Run ID to show" )
0 commit comments