@@ -272,6 +272,7 @@ function (@main)(args)
272272
273273 Arguments:
274274 ModuleName Name of the module containing setup_problem and grade functions
275+ Use "auto" to auto-detect module from problem_id prefix
275276
276277 Options:
277278 --workspace PATH Working directory (default: current directory)
@@ -294,6 +295,7 @@ function (@main)(args)
294295
295296 Examples:
296297 julia --project -m LLMBenchMCPServer MyBenchmark
298+ julia --project -m LLMBenchMCPServer auto # Auto-detect module from problem_id
297299 julia --project -m LLMBenchMCPServer MyBenchmark --socket
298300 julia --project -m LLMBenchMCPServer MyBenchmark --direct # Run without sandbox
299301 julia --project -m LLMBenchMCPServer MyBenchmark --bash-uid 1000
@@ -313,6 +315,7 @@ function (@main)(args)
313315 direct_mode = false # New flag for direct execution
314316 bash_uid = nothing # UID for bash session execution
315317 bash_env = Dict {String,String} () # Environment variables for bash
318+ auto_mode = (module_name == " auto" ) # Check if we're in auto-detect mode
316319
317320 i = 2
318321 while i <= length (args)
@@ -395,40 +398,156 @@ function (@main)(args)
395398 mkpath (working_dir)
396399 end
397400
398- # Load the module
401+ # Handle module loading based on mode
399402 try
400- # Try to load as a module first, then as a file
401403 mod = nothing
402- mod_symbol = Symbol (module_name)
404+ setup_fn = nothing
405+ grade_fn = nothing
403406
404- # First try to load as a registered package/module
405- try
406- mod = Base. require (Main, mod_symbol)
407- catch
408- # If that fails, try to load as a local file
409- if endswith (module_name, " .jl" )
410- # Load file directly
411- Base. include (Main, module_name)
412- # Extract module name from file
413- file_mod_name = basename (module_name)[1 : end - 3 ] # Remove .jl
414- mod_symbol = Symbol (file_mod_name)
415- if isdefined (Main, mod_symbol)
416- mod = getfield (Main, mod_symbol)
407+ if auto_mode
408+ # In auto mode, create wrapper functions that dynamically load modules
409+ if verbose
410+ println (" Auto mode enabled - modules will be loaded based on problem_id prefix" )
411+ end
412+
413+ # Create a wrapper function for setup_problem that auto-detects the module
414+ function auto_setup_problem (workdir:: String , problem_id:: String = " " )
415+ if isempty (problem_id)
416+ return " Error: problem_id is required in auto mode. Format: ModuleName-problem_id"
417+ end
418+
419+ # Extract module name from problem_id
420+ parts = split (problem_id, " -" , limit= 2 )
421+ if length (parts) < 2
422+ return " Error: Invalid problem_id format. Expected: ModuleName-problem_id, got: $problem_id "
423+ end
424+
425+ mod_name = parts[1 ]
426+ clean_problem_id = parts[2 ]
427+
428+ # Try to load the module
429+ try
430+ mod_symbol = Symbol (mod_name)
431+ target_mod = Base. require (Main, mod_symbol)
432+
433+ # Check if the module has setup_problem
434+ if ! isdefined (target_mod, :setup_problem )
435+ return " Error: Module $mod_name does not export setup_problem function"
436+ end
437+
438+ # Call the module's setup_problem with the clean problem_id
439+ setup_fn = getfield (target_mod, :setup_problem )
440+ return Base. invokelatest (setup_fn, workdir, clean_problem_id)
441+ catch e
442+ io = IOBuffer ()
443+ showerror (io, e, catch_backtrace ())
444+ return " Error loading module $mod_name : " * String (take! (io))
445+ end
446+ end
447+
448+ # Create a wrapper function for grade that auto-detects the module
449+ function auto_grade (workdir:: String , transcript:: String , problem_id:: String = " " )
450+ if isempty (problem_id)
451+ return Dict (
452+ " score" => 0.0 ,
453+ " metadata" => Dict (" error" => " Error: problem_id is required in auto mode. Format: ModuleName-problem_id" )
454+ )
455+ end
456+
457+ # Extract module name from problem_id
458+ parts = split (problem_id, " -" , limit= 2 )
459+ if length (parts) < 2
460+ return Dict (
461+ " score" => 0.0 ,
462+ " metadata" => Dict (" error" => " Error: Invalid problem_id format. Expected: ModuleName-problem_id, got: $problem_id " )
463+ )
464+ end
465+
466+ mod_name = parts[1 ]
467+ clean_problem_id = parts[2 ]
468+
469+ # Try to load the module
470+ try
471+ mod_symbol = Symbol (mod_name)
472+ target_mod = Base. require (Main, mod_symbol)
473+
474+ # Check if the module has grade
475+ if ! isdefined (target_mod, :grade )
476+ return Dict (
477+ " score" => 0.0 ,
478+ " metadata" => Dict (" error" => " Error: Module $mod_name does not export grade function" )
479+ )
480+ end
481+
482+ # Call the module's grade with the clean problem_id
483+ grade_fn = getfield (target_mod, :grade )
484+ return Base. invokelatest (grade_fn, workdir, transcript, clean_problem_id)
485+ catch e
486+ io = IOBuffer ()
487+ showerror (io, e, catch_backtrace ())
488+ return Dict (
489+ " score" => 0.0 ,
490+ " metadata" => Dict (" error" => " Error loading module $mod_name : " * String (take! (io)))
491+ )
492+ end
493+ end
494+
495+ # Set the wrapper functions
496+ setup_fn = auto_setup_problem
497+ grade_fn = auto_grade
498+
499+ else
500+ # Normal mode - load the specified module
501+ mod_symbol = Symbol (module_name)
502+
503+ # First try to load as a registered package/module
504+ try
505+ mod = Base. require (Main, mod_symbol)
506+ catch
507+ # If that fails, try to load as a local file
508+ if endswith (module_name, " .jl" )
509+ # Load file directly
510+ Base. include (Main, module_name)
511+ # Extract module name from file
512+ file_mod_name = basename (module_name)[1 : end - 3 ] # Remove .jl
513+ mod_symbol = Symbol (file_mod_name)
514+ if isdefined (Main, mod_symbol)
515+ mod = getfield (Main, mod_symbol)
516+ end
517+ elseif isfile (module_name * " .jl" )
518+ # Try adding .jl extension
519+ Base. include (Main, module_name * " .jl" )
520+ mod_symbol = Symbol (module_name)
521+ if isdefined (Main, mod_symbol)
522+ mod = getfield (Main, mod_symbol)
523+ end
524+ end
525+ end
526+
527+ if mod === nothing
528+ throw (ArgumentError (" Could not load module $module_name " ))
529+ end
530+
531+ # Extract functions from the loaded module
532+ if isdefined (mod, :setup_problem )
533+ setup_fn = getfield (mod, :setup_problem )
534+ if verbose
535+ println (" Found setup_problem function in $module_name " )
417536 end
418- elseif isfile (module_name * " .jl" )
419- # Try adding .jl extension
420- Base. include (Main, module_name * " .jl" )
421- mod_symbol = Symbol (module_name)
422- if isdefined (Main, mod_symbol)
423- mod = getfield (Main, mod_symbol)
537+ else
538+ println (" Warning: No setup_problem function found in $module_name " )
539+ end
540+
541+ if isdefined (mod, :grade )
542+ grade_fn = getfield (mod, :grade )
543+ if verbose
544+ println (" Found grade function in $module_name " )
424545 end
546+ else
547+ println (" Warning: No grade function found in $module_name " )
425548 end
426549 end
427550
428- if mod === nothing
429- throw (ArgumentError (" Could not load module $module_name " ))
430- end
431-
432551 # Set environment variables for benchmark access
433552 # Set workspace directory
434553 ENV [" LLMBENCH_WORKSPACE" ] = working_dir
@@ -442,28 +561,6 @@ function (@main)(args)
442561 for (key, value) in bash_env
443562 ENV [" LLMBENCH_BASH_ENV_$key " ] = value
444563 end
445-
446- # Extract functions
447- setup_fn = nothing
448- grade_fn = nothing
449-
450- if isdefined (mod, :setup_problem )
451- setup_fn = getfield (mod, :setup_problem )
452- if verbose
453- println (" Found setup_problem function in $module_name " )
454- end
455- else
456- println (" Warning: No setup_problem function found in $module_name " )
457- end
458-
459- if isdefined (mod, :grade )
460- grade_fn = getfield (mod, :grade )
461- if verbose
462- println (" Found grade function in $module_name " )
463- end
464- else
465- println (" Warning: No grade function found in $module_name " )
466- end
467564
468565 # Create and run the server
469566 server = LLMBenchServer (
0 commit comments