Skip to content

Commit ba0c471

Browse files
committed
[feat] run alfworld rollout script example
1 parent 5d693e1 commit ba0c471

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

scripts/rollout/run_alfworld.sh

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/bin/bash
2+
3+
# Script to run AlfWorld rollout evaluation
4+
# This script runs the evaluation directly without Docker
5+
6+
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
7+
LOG_DIR="logs_${TIMESTAMP}"
8+
RESULTS_DIR="results_${TIMESTAMP}"
9+
10+
echo "Starting AlfWorld rollout evaluation"
11+
echo "Timestamp: ${TIMESTAMP}"
12+
echo "Results will be saved to: ${RESULTS_DIR}"
13+
14+
# Create directories
15+
mkdir -p "${LOG_DIR}"
16+
mkdir -p "${RESULTS_DIR}"
17+
18+
# Activate virtual environment if it exists
19+
if [ -f "/opt/openmanus-venv/bin/activate" ]; then
20+
source /opt/openmanus-venv/bin/activate
21+
fi
22+
23+
# Function to run a model
24+
run_model() {
25+
local model_name=$1
26+
local base_url=$2
27+
local display_name=$3
28+
local safe_name=$(echo "$display_name" | tr ' /' '_')
29+
30+
echo "Starting ${display_name}..."
31+
32+
# Build command
33+
local cmd="python scripts/rollout/openmanus_rollout.py \
34+
--env alfworld \
35+
--unique_envs \
36+
--batch_size 10 \
37+
--concurrency 10 \
38+
--total_envs 200 \
39+
--history_length 30 \
40+
--model '${model_name}' \
41+
--chat_root '${RESULTS_DIR}/${safe_name}' \
42+
--dump_path '${RESULTS_DIR}/${safe_name}/trajectory.jsonl'"
43+
44+
if [ ! -z "$base_url" ]; then
45+
cmd="${cmd} --base_url '${base_url}/v1'"
46+
fi
47+
48+
# Run with logging
49+
echo "Command: ${cmd}" | tee -a "${LOG_DIR}/main.log"
50+
eval "${cmd}" 2>&1 | tee "${LOG_DIR}/${safe_name}.log"
51+
52+
echo "Completed ${display_name}" | tee -a "${LOG_DIR}/main.log"
53+
}
54+
55+
# Main execution
56+
echo "Launching models..." | tee -a "${LOG_DIR}/main.log"
57+
58+
# OpenAI models (uncomment to use)
59+
# run_model "gpt-4o" "" "GPT-4o"
60+
# run_model "gpt-4o-mini" "" "GPT-4o-mini"
61+
62+
# vLLM models - run sequentially
63+
run_model "qwen3-8b" "http://129.212.187.116:8001" "Qwen3-8B"
64+
run_model "llama-3.1-8b-instruct" "http://129.212.176.75:8001" "Llama3.1-8B"
65+
run_model "qwen2.5-7b-instruct" "http://134.199.196.219:8001" "Qwen2.5-7B"
66+
run_model "qwen2.5-72b-instruct" "http://134.199.196.239:8001" "Qwen2.5-72B"
67+
run_model "llama-3.3-70b-instruct" "http://129.212.178.4:8001" "Llama3.3-70B"
68+
69+
echo ""
70+
echo "========================================="
71+
echo "All models completed!"
72+
echo "========================================="
73+
echo ""
74+
echo "Logs are in: ${LOG_DIR}/"
75+
echo "Results are in: ${RESULTS_DIR}/"
76+
echo ""
77+
echo "To check results:"
78+
echo " ls -la ${RESULTS_DIR}/"
79+
echo "To view logs:"
80+
echo " tail -f ${LOG_DIR}/*.log"

0 commit comments

Comments
 (0)