File tree Expand file tree Collapse file tree 13 files changed +1306
-110
lines changed
openmanus_rl/agentgym/agentenv/examples Expand file tree Collapse file tree 13 files changed +1306
-110
lines changed Original file line number Diff line number Diff line change 1+ import json
2+ from datasets import load_dataset
3+
4+ # Load the full AgentEval dataset
5+ ds = load_dataset ("AgentGym/AgentEval" , split = "test" )
6+
7+ # Filter only the entries with item_id starting with "webshop_"
8+ webshop_ds = ds .filter (lambda x : x ["item_id" ].startswith ("webshop_" ))
9+
10+ # Preview the result
11+ print (webshop_ds )
12+
13+ output_file = "webshop_inference.json"
14+
15+ data = [{"item_id" : x ["item_id" ], "conversations" : []} for x in webshop_ds ]
16+
17+ with open (output_file , "w" ) as f :
18+ json .dump (data , f , indent = 2 )
Original file line number Diff line number Diff line change 1+ # Evaluation args
2+ model_path=" /data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/global_step_1"
3+ inference_file=" /home/user/muxin/OpenManus-RL/data/webshop/webshop_inference.json"
4+ output_file=" /data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/output/qwen2.5-3b-webshop.log"
5+ task_name=" webshop"
6+ seed=" 42"
7+
8+ # environment parameters
9+ max_round=" 6"
10+ env_server_base=" http://127.0.0.1:36001"
11+
12+ python -u base_eval_template.py \
13+ --model_path " ${model_path} " \
14+ --inference_file " ${inference_file} " \
15+ --output_file " ${output_file} " \
16+ --task_name " ${task_name} " \
17+ --seed " ${seed} " \
18+ --max_round " ${max_round} " \
19+ --env_server_base " ${env_server_base} "
Original file line number Diff line number Diff line change 1+ exp_name=" eval_webshop"
2+ inference_file=' /home/user/muxin/OpenManus-RL/data/webshop/webshop_inference.json' # Path to the trainset file which contains idxs for the task.
3+
4+ num_processes=' 8'
5+ main_process_port=' 8877'
6+ weight_decay=" 0"
7+
8+ # ## Default variables
9+ task_name=" webshop" # change this to evaluate on a different task
10+ output_dir=" /data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/output"
11+
12+ # agent model
13+ # model_path="/data1/models/openmanus_rl/Qwen/Qwen3-3b-sft/global_step_1"
14+ model_path=" /data1/models/Qwen/Qwen2.5-3B"
15+ eval_batch_size=" 1"
16+ num_workers=" 8"
17+ seed=" 42"
18+ do_sample=" False"
19+ temperature=" 1.0"
20+
21+ max_round=" 6"
22+ env_server_base=" http://127.0.0.1:36001" # Set this to the base url of the EnvServer.
23+ timeout=" 2400"
24+
25+
26+ # ########
27+ mkdir -p " ${output_dir} "
28+ export PYTHONPATH=/home/user/muxin/OpenManus-RL/openmanus_rl/agentgym/agentenv:$PYTHONPATH # You need to modify this as your agentgym/agentenv absolute path
29+
30+ accelerate launch \
31+ --num_processes=${num_processes} \
32+ --main_process_port=${main_process_port} \
33+ ../../utils/distributed_eval_task.py \
34+ --model_path " ${model_path} " \
35+ --output_file " ${output_dir} /inference.jsonl" \
36+ --inference_file " ${inference_file} " \
37+ --task_name " ${task_name} " \
38+ --eval_batch_size " ${eval_batch_size} " \
39+ --num_workers " ${num_workers} " \
40+ --seed " ${seed} " \
41+ --do_sample " ${do_sample} " \
42+ --temperature " ${temperature} " \
43+ --max_round " ${max_round} " \
44+ --env_server_base " ${env_server_base} " \
45+ --data_len 200 \
46+ --timeout " ${timeout} "
Original file line number Diff line number Diff line change @@ -14,4 +14,4 @@ vllm<=0.6.3
1414wandb
1515IPython
1616matplotlib
17- omegaconf
17+ omegaconf
Original file line number Diff line number Diff line change 1+ CONFIG_FILE=" " # fulfill the config yaml file here
2+ MODEL_PATH=" "
3+ OUTPUT_DIR=" "
4+ TASK_NAMES=" "
5+ DATA_LEN=200
6+ TIMEOUT=2400
7+ DO_SAMPLE=" False"
8+ TEMPERATURE=1.0
9+ SEED=42
10+ DEBUG=false
11+
12+ # Parse command line arguments
13+ while [[ $# -gt 0 ]]; do
14+ case $1 in
15+ --config)
16+ CONFIG_FILE=" $2 "
17+ shift 2
18+ ;;
19+ --model_path)
20+ MODEL_PATH=" $2 "
21+ shift 2
22+ ;;
23+ --output_dir)
24+ OUTPUT_DIR=" $2 "
25+ shift 2
26+ ;;
27+ --task_names)
28+ TASK_NAMES=" $2 "
29+ shift 2
30+ ;;
31+ --data_len)
32+ DATA_LEN=" $2 "
33+ shift 2
34+ ;;
35+ --timeout)
36+ TIMEOUT=" $2 "
37+ shift 2
38+ ;;
39+ --do_sample)
40+ DO_SAMPLE=" $2 "
41+ shift 2
42+ ;;
43+ --temperature)
44+ TEMPERATURE=" $2 "
45+ shift 2
46+ ;;
47+ --seed)
48+ SEED=" $2 "
49+ shift 2
50+ ;;
51+ --debug)
52+ DEBUG=true
53+ shift
54+ ;;
55+ * )
56+ echo " Unknown option: $1 "
57+ exit 1
58+ ;;
59+ esac
60+ done
61+
62+ # Build command
63+ CMD=" python traj_generation/rollout_eval.py --config $CONFIG_FILE "
64+
65+ if [ ! -z " $MODEL_PATH " ]; then
66+ CMD=" $CMD --model_path $MODEL_PATH "
67+ fi
68+
69+ if [ ! -z " $OUTPUT_DIR " ]; then
70+ CMD=" $CMD --output_dir $OUTPUT_DIR "
71+ fi
72+
73+ if [ ! -z " $TASK_NAMES " ]; then
74+ CMD=" $CMD --task_names $TASK_NAMES "
75+ fi
76+
77+ CMD=" $CMD --data_len $DATA_LEN --timeout $TIMEOUT --do_sample $DO_SAMPLE --temperature $TEMPERATURE --seed $SEED "
78+
79+ if [ " $DEBUG " = true ]; then
80+ CMD=" $CMD --debug"
81+ fi
82+
83+ # Create log directory
84+ TIMESTAMP=$( date +" %Y%m%d_%H%M%S" )
85+ LOG_DIR=" ./logs"
86+ mkdir -p $LOG_DIR
87+ LOG_FILE=" $LOG_DIR /offline_rollout_$TIMESTAMP .log"
88+
89+ # Print the command
90+ echo " Running: $CMD "
91+ echo " Logging to: $LOG_FILE "
92+
93+ # Execute with logging
94+ eval " $CMD | tee $LOG_FILE "
95+
96+ echo " Evaluation complete! Results saved to the output directory."
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ set -x
3+
4+ if [ " $# " -lt 2 ]; then
5+ echo " Usage: run_sft.sh <nproc_per_node> <save_path> [other_configs...]"
6+ exit 1
7+ fi
8+
9+ nproc_per_node=$1
10+ save_path=$2
11+
12+ # Shift the arguments so $@ refers to the rest
13+ shift 2
14+
15+ torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
16+ -m verl.trainer.fsdp_sft_trainer \
17+ data.train_files=OpenManus-RL/data/train_split.parquet \
18+ data.val_files=OpenManus-RL/data/test_split.parquet \
19+ data.multiturn.enable=true \
20+ data.multiturn.messages_key=prompt \
21+ data.micro_batch_size=4 \
22+ model.partial_pretrain=/data1/models/Qwen/Qwen3-4B \
23+ trainer.default_local_dir=$save_path \
24+ trainer.project_name=multiturn-sft \
25+ trainer.experiment_name=multiturn-sft-qwen-3-4b \
26+ trainer.logger=[' console' ] \
27+ trainer.total_training_steps=1 \
28+ trainer.default_hdfs_dir=null $@ \
29+ ulysses_sequence_parallel_size=2 \
30+ use_remove_padding=true
You can’t perform that action at this time.
0 commit comments