1+ #! /bin/bash
2+
3+ # Script to run AlfWorld rollout evaluation
4+ # This script runs the evaluation directly without Docker
5+
6+ TIMESTAMP=$( date +" %Y%m%d_%H%M%S" )
7+ LOG_DIR=" logs_${TIMESTAMP} "
8+ RESULTS_DIR=" results_${TIMESTAMP} "
9+
10+ echo " Starting AlfWorld rollout evaluation"
11+ echo " Timestamp: ${TIMESTAMP} "
12+ echo " Results will be saved to: ${RESULTS_DIR} "
13+
14+ # Create directories
15+ mkdir -p " ${LOG_DIR} "
16+ mkdir -p " ${RESULTS_DIR} "
17+
18+ # Activate virtual environment if it exists
19+ if [ -f " /opt/openmanus-venv/bin/activate" ]; then
20+ source /opt/openmanus-venv/bin/activate
21+ fi
22+
23+ # Function to run a model
24+ run_model () {
25+ local model_name=$1
26+ local base_url=$2
27+ local display_name=$3
28+ local safe_name=$( echo " $display_name " | tr ' /' ' _' )
29+
30+ echo " Starting ${display_name} ..."
31+
32+ # Build command
33+ local cmd=" python scripts/rollout/openmanus_rollout.py \
34+ --env alfworld \
35+ --unique_envs \
36+ --batch_size 10 \
37+ --concurrency 10 \
38+ --total_envs 200 \
39+ --history_length 30 \
40+ --model '${model_name} ' \
41+ --chat_root '${RESULTS_DIR} /${safe_name} ' \
42+ --dump_path '${RESULTS_DIR} /${safe_name} /trajectory.jsonl'"
43+
44+ if [ ! -z " $base_url " ]; then
45+ cmd=" ${cmd} --base_url '${base_url} /v1'"
46+ fi
47+
48+ # Run with logging
49+ echo " Command: ${cmd} " | tee -a " ${LOG_DIR} /main.log"
50+ eval " ${cmd} " 2>&1 | tee " ${LOG_DIR} /${safe_name} .log"
51+
52+ echo " Completed ${display_name} " | tee -a " ${LOG_DIR} /main.log"
53+ }
54+
55+ # Main execution
56+ echo " Launching models..." | tee -a " ${LOG_DIR} /main.log"
57+
58+ # OpenAI models (uncomment to use)
59+ # run_model "gpt-4o" "" "GPT-4o"
60+ # run_model "gpt-4o-mini" "" "GPT-4o-mini"
61+
62+ # vLLM models - run sequentially
63+ run_model " qwen3-8b" " http://129.212.187.116:8001" " Qwen3-8B"
64+ run_model " llama-3.1-8b-instruct" " http://129.212.176.75:8001" " Llama3.1-8B"
65+ run_model " qwen2.5-7b-instruct" " http://134.199.196.219:8001" " Qwen2.5-7B"
66+ run_model " qwen2.5-72b-instruct" " http://134.199.196.239:8001" " Qwen2.5-72B"
67+ run_model " llama-3.3-70b-instruct" " http://129.212.178.4:8001" " Llama3.3-70B"
68+
69+ echo " "
70+ echo " ========================================="
71+ echo " All models completed!"
72+ echo " ========================================="
73+ echo " "
74+ echo " Logs are in: ${LOG_DIR} /"
75+ echo " Results are in: ${RESULTS_DIR} /"
76+ echo " "
77+ echo " To check results:"
78+ echo " ls -la ${RESULTS_DIR} /"
79+ echo " To view logs:"
80+ echo " tail -f ${LOG_DIR} /*.log"
0 commit comments