66
77set -e
88
9- # Configuration
9+ # Default Configuration
1010VENV_PATH=" ../.venv"
1111ROUTER_ENDPOINT=" http://127.0.0.1:8801/v1"
1212VLLM_ENDPOINT=" http://127.0.0.1:8000/v1"
13- VLLM_MODEL=" openai/gpt-oss-20b "
13+ VLLM_MODEL=" " # Will be auto-detected from endpoint if not specified
1414ROUTER_MODEL=" auto"
1515OUTPUT_BASE=" results/comprehensive_research_$( date +%Y%m%d_%H%M%S) "
1616
17+ # Parse command line arguments
18+ while [[ $# -gt 0 ]]; do
19+ case $1 in
20+ --vllm-model)
21+ VLLM_MODEL=" $2 "
22+ shift 2
23+ ;;
24+ --vllm-endpoint)
25+ VLLM_ENDPOINT=" $2 "
26+ shift 2
27+ ;;
28+ --router-endpoint)
29+ ROUTER_ENDPOINT=" $2 "
30+ shift 2
31+ ;;
32+ --router-model)
33+ ROUTER_MODEL=" $2 "
34+ shift 2
35+ ;;
36+ --output-base)
37+ OUTPUT_BASE=" $2 "
38+ shift 2
39+ ;;
40+ --help|-h)
41+ echo " Usage: $0 [OPTIONS]"
42+ echo " Options:"
43+ echo " --vllm-model MODEL Specify vLLM model (auto-detected if not provided)"
44+ echo " --vllm-endpoint URL vLLM endpoint URL (default: http://127.0.0.1:8000/v1)"
45+ echo " --router-endpoint URL Router endpoint URL (default: http://127.0.0.1:8801/v1)"
46+ echo " --router-model MODEL Router model (default: auto)"
47+ echo " --output-base DIR Output directory base (default: results/comprehensive_research_TIMESTAMP)"
48+ echo " --help, -h Show this help message"
49+ exit 0
50+ ;;
51+ * )
52+ echo " Unknown option: $1 "
53+ echo " Use --help for usage information"
54+ exit 1
55+ ;;
56+ esac
57+ done
58+
59+ # Colors for output
60+ RED=' \033[0;31m'
61+ GREEN=' \033[0;32m'
62+ BLUE=' \033[0;34m'
63+ YELLOW=' \033[1;33m'
64+ NC=' \033[0m' # No Color
65+
66+ # Auto-detect vLLM model if not specified
67+ if [[ -z " $VLLM_MODEL " ]]; then
68+ echo -e " ${BLUE} 🔍 Auto-detecting vLLM model from endpoint...${NC} "
69+
70+ # Try to fetch models from the vLLM endpoint
71+ VLLM_MODELS_JSON=$( curl -s " $VLLM_ENDPOINT /models" 2> /dev/null || echo " " )
72+
73+ if [[ -n " $VLLM_MODELS_JSON " ]]; then
74+ # Extract the first model ID from the JSON response
75+ VLLM_MODEL=$( echo " $VLLM_MODELS_JSON " | python3 -c "
76+ import json
77+ import sys
78+ try:
79+ data = json.load(sys.stdin)
80+ if 'data' in data and len(data['data']) > 0:
81+ print(data['data'][0]['id'])
82+ else:
83+ print('')
84+ except:
85+ print('')
86+ " 2> /dev/null)
87+
88+ if [[ -n " $VLLM_MODEL " ]]; then
89+ echo -e " ${GREEN} ✅ Auto-detected vLLM model: $VLLM_MODEL ${NC} "
90+ else
91+ echo -e " ${RED} ❌ Failed to parse models from endpoint response${NC} "
92+ echo -e " ${YELLOW} ⚠️ Using fallback model: openai/gpt-oss-20b${NC} "
93+ VLLM_MODEL=" openai/gpt-oss-20b"
94+ fi
95+ else
96+ echo -e " ${RED} ❌ Failed to fetch models from vLLM endpoint: $VLLM_ENDPOINT ${NC} "
97+ echo -e " ${YELLOW} ⚠️ Using fallback model: openai/gpt-oss-20b${NC} "
98+ VLLM_MODEL=" openai/gpt-oss-20b"
99+ fi
100+ fi
101+
17102# Single persistent CSV file for all research results
18103PERSISTENT_RESEARCH_CSV=" results/research_results_master.csv"
19104
@@ -28,13 +113,6 @@ declare -A DATASET_CONFIGS=(
28113 [" hellaswag" ]=8 # ~50 activities × 8 = ~400 samples
29114)
30115
31- # Colors for output
32- RED=' \033[0;31m'
33- GREEN=' \033[0;32m'
34- BLUE=' \033[0;34m'
35- YELLOW=' \033[1;33m'
36- NC=' \033[0m' # No Color
37-
38116echo -e " ${BLUE} 🔬 COMPREHENSIVE MULTI-DATASET BENCHMARK FOR RESEARCH${NC} "
39117echo -e " ${BLUE} ====================================================${NC} "
40118echo " "
142220
143221 # Determine model name
144222 if '$mode ' == 'router':
145- model_name = 'auto '
223+ model_name = '$ROUTER_MODEL '
146224 else:
147- model_name = 'openai/gpt-oss-20b '
225+ model_name = '$VLLM_MODEL '
148226
149227 # For vLLM, we might have multiple modes (NR, NR_REASONING)
150228 if '$mode ' == 'vllm' and 'mode' in df.columns:
0 commit comments