Add command-line interface and improve documentation

Keno · Keno · commit d6c9974e0179 · 2025-08-20T06:16:03.000Z
- Add bin/llmbench executable script for CLI usage
- Update main() function to work as CLI entry point
- Add CI badges to README
- Expand documentation with CLI usage examples
- Add examples for both LLMBenchSimple and custom implementations
- Fix argument handling for both programmatic and CLI usage
diff --git a/README.md b/README.md
@@ -1,5 +1,8 @@
 # LLMBenchMCPServer.jl
 
+[![CI](https://github.com/JuliaComputing/LLMBenchMCPServer.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/JuliaComputing/LLMBenchMCPServer.jl/actions/workflows/CI.yml)
+[![codecov](https://codecov.io/gh/JuliaComputing/LLMBenchMCPServer.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaComputing/LLMBenchMCPServer.jl)
+
 A Julia package that implements the full taiga spec as an MCP (Model Context Protocol) server for LLM benchmarking.
 
 ## Features
@@ -50,14 +53,45 @@ run_stdio_server(server)
 
 ### Module-Based Execution
 
+#### Command Line Usage
+
+```bash
+# Using the provided script
+./bin/llmbench MyBenchmarkModule [options]
+
+# Or directly with Julia
+julia --project -e 'using LLMBenchMCPServer; LLMBenchMCPServer.main()' -- MyBenchmarkModule [options]
+```
+
+Options:
+- `--workdir PATH`: Set the working directory (default: current directory)
+- `--no-basic-tools`: Disable basic tools (bash, str_replace_editor)
+- `--verbose`: Enable verbose output
+- `--help, -h`: Show help message
+
+#### Programmatic Usage
+
 ```julia
 # Run with a benchmark module
 # The module must export setup_problem and grade functions
-LLMBenchMCPServer.main("MyBenchmarkModule")
+LLMBenchMCPServer.main(["MyBenchmarkModule", "--verbose"])
 ```
 
 ### Creating a Benchmark Module
 
+#### Option 1: Using LLMBenchSimple
+
+```julia
+module MyBenchmark
+    using LLMBenchSimple
+    
+    @bench "addition" prompt"What is 2 + 2?" == 4
+    @bench "capital" prompt"What is the capital of France?" == "Paris"
+end
+```
+
+#### Option 2: Custom Implementation
+
 ```julia
 module MyBenchmark
 
diff --git a/bin/llmbench b/bin/llmbench
@@ -0,0 +1,7 @@
+#!/usr/bin/env -S julia --project
+
+# Entry point script for LLMBenchMCPServer
+using LLMBenchMCPServer
+
+# Pass command line arguments to main
+LLMBenchMCPServer.main(ARGS)
diff --git a/src/server.jl b/src/server.jl
@@ -40,25 +40,19 @@ function LLMBenchServer(;
     return server
 end
 
-"""
-    main(args=ARGS)
-
-Main entry point for the LLMBenchMCPServer.
-
-Usage:
-    julia --project -e 'using LLMBenchMCPServer; LLMBenchMCPServer.main()' -- ModuleName [--workdir /path]
-
-The module should export:
-- `setup_problem(workdir::String)` - Returns problem description
-- `grade(workdir::String, transcript::String)` - Returns grading result
-"""
+# Main entry point for the LLMBenchMCPServer.
+# Usage: julia --project -m LLMBenchMCPServer ModuleName [--workdir /path]
 function main(args=ARGS)
-    if isempty(args) || args[1] in ["--help", "-h"]
+    # Handle both array and varargs inputs
+    if isa(args, Tuple)
+        args = collect(args)
+    end
+    if isempty(args) || (length(args) == 1 && args[1] in ["--help", "-h"])
         println("""
         LLMBenchMCPServer - MCP server for LLM benchmarking
         
         Usage:
-            julia --project -e 'using LLMBenchMCPServer; LLMBenchMCPServer.main()' -- ModuleName [options]
+            julia --project -m LLMBenchMCPServer ModuleName [options]
         
         Arguments:
             ModuleName          Name of the module containing setup_problem and grade functions
@@ -77,7 +71,7 @@ function main(args=ARGS)
                 Returns grading result with subscores, weights, and total score
         
         Example:
-            julia --project -e 'using LLMBenchMCPServer; LLMBenchMCPServer.main()' -- MyBenchmark
+            julia --project -m LLMBenchMCPServer MyBenchmark
         """)
         return 0
     end