Skip to content

Commit 58cd27e

Browse files
committed
delete absolute path and add data
1 parent b5b042b commit 58cd27e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+8009
-2390
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ outputs
128128

129129
data
130130

131-
*.json
131+
# *.json
132132

133133
docker
134134

cmd/sci_v2.sh renamed to cmd/alf.sh

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
final_folder="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/outputs_sci_easy"
1+
export REPO_HOME=$(pwd)
22

3-
bash_path=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/examples/grpo_trainer/sci_v2.sh
3+
final_folder="$REPO_HOME/verl/outputs_alf"
4+
5+
bash_path=$REPO_HOME/examples/grpo_trainer/alf.sh
46

57

68
project_name=$(basename "$bash_path" .sh)
@@ -23,24 +25,25 @@ tensorboard_folder="${res_folder}/tensorboard"
2325
export TENSORBOARD_DIR="$tensorboard_folder"
2426
mkdir -p "$tensorboard_folder"
2527
echo "tb saving in ${TENSORBOARD_DIR}"
28+
2629
source /opt/conda/etc/profile.d/conda.sh
2730
PORT=8000
2831
if ss -tuln | grep -q ":$PORT "; then
2932
echo "端口 $PORT 已被占用"
3033
else
3134
echo "$PORT 未被占用"
32-
conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld
33-
cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/scienceworld_server
34-
server_cmd="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld/bin/python start_server.py --num_servers 8"
35+
conda activate /path/to/alfworld-env
36+
cd $REPO_HOME/verl/alfworld_server/server
37+
server_cmd="python start_server.py --num_servers 8"
3538

3639
nohup $server_cmd > "${server_logging_folder}/run_stdout.log" 2> "${server_logging_folder}/run_stderr.log" &
3740
server_pid=$!
3841
echo "server Process ID: $server_pid Check logs in ${server_logging_folder}/"
3942
conda deactivate
4043
fi
4144

42-
cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod
43-
conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/verl_spmd
45+
cd $REPO_HOME
46+
conda activate /path/to/embodied-r1-env
4447
cmd="bash ${bash_path}"
4548
echo "Running $cmd"
4649

cmd/alf_v1.sh

Lines changed: 0 additions & 41 deletions
This file was deleted.

cmd/alf_v1_sft.sh

Lines changed: 0 additions & 41 deletions
This file was deleted.

cmd/alf_v2.sh

Lines changed: 0 additions & 47 deletions
This file was deleted.

cmd/sci_easy.sh

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
final_folder="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/outputs_sci_easy_v1"
1+
export REPO_HOME=$(pwd)
22

3-
bash_path=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/examples/grpo_trainer/sci_easy.sh
3+
final_folder="$REPO_HOME/verl/outputs_sci_easy"
4+
5+
bash_path=$REPO_HOME/examples/grpo_trainer/sci_easy.sh
46

57

68
project_name=$(basename "$bash_path" .sh)
@@ -23,24 +25,25 @@ tensorboard_folder="${res_folder}/tensorboard"
2325
export TENSORBOARD_DIR="$tensorboard_folder"
2426
mkdir -p "$tensorboard_folder"
2527
echo "tb saving in ${TENSORBOARD_DIR}"
28+
2629
source /opt/conda/etc/profile.d/conda.sh
2730
PORT=8000
2831
if ss -tuln | grep -q ":$PORT "; then
2932
echo "端口 $PORT 已被占用"
3033
else
3134
echo "$PORT 未被占用"
32-
conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld
33-
cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/scienceworld_server
34-
server_cmd="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld/bin/python start_server.py --num_servers 8"
35+
conda activate /path/to/sciworld-env
36+
cd $REPO_HOME/verl/scienceworld_server
37+
server_cmd="python start_server.py --num_servers 8"
3538

3639
nohup $server_cmd > "${server_logging_folder}/run_stdout.log" 2> "${server_logging_folder}/run_stderr.log" &
3740
server_pid=$!
3841
echo "server Process ID: $server_pid Check logs in ${server_logging_folder}/"
3942
conda deactivate
4043
fi
4144

42-
cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod
43-
conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/verl_spmd
45+
cd $REPO_HOME
46+
conda activate /path/to/embodied-r1-env
4447
cmd="bash ${bash_path}"
4548
echo "Running $cmd"
4649

cmd/sci_nornal.sh

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
final_folder="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/outputs_sci_normal_v1"
1+
export REPO_HOME=$(pwd)
22

3-
bash_path=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/examples/grpo_trainer/sci_normal.sh
3+
final_folder="$REPO_HOME/verl/outputs_sci_normal"
4+
5+
bash_path=$REPO_HOME/examples/grpo_trainer/sci_normal.sh
46

57

68
project_name=$(basename "$bash_path" .sh)
@@ -23,24 +25,25 @@ tensorboard_folder="${res_folder}/tensorboard"
2325
export TENSORBOARD_DIR="$tensorboard_folder"
2426
mkdir -p "$tensorboard_folder"
2527
echo "tb saving in ${TENSORBOARD_DIR}"
28+
2629
source /opt/conda/etc/profile.d/conda.sh
2730
PORT=8000
2831
if ss -tuln | grep -q ":$PORT "; then
2932
echo "端口 $PORT 已被占用"
3033
else
3134
echo "$PORT 未被占用"
32-
conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld
33-
cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/scienceworld_server
34-
server_cmd="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld/bin/python start_server.py --num_servers 8"
35+
conda activate /path/to/sciworld-env
36+
cd $REPO_HOME/verl/scienceworld_server
37+
server_cmd="python start_server.py --num_servers 8"
3538

3639
nohup $server_cmd > "${server_logging_folder}/run_stdout.log" 2> "${server_logging_folder}/run_stderr.log" &
3740
server_pid=$!
3841
echo "server Process ID: $server_pid Check logs in ${server_logging_folder}/"
3942
conda deactivate
4043
fi
4144

42-
cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod
43-
conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/verl_spmd
45+
cd $REPO_HOME
46+
conda activate /path/to/embodied-r1-env
4447
cmd="bash ${bash_path}"
4548
echo "Running $cmd"
4649

cmd/sci_v1.sh

Lines changed: 0 additions & 47 deletions
This file was deleted.
Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
#!/bin/sh
22
set -x
33

4-
# export VLLM_ATTENTION_BACKEND=XFORMERS
5-
# HOME='/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl'
6-
74
system_prompt='You are an intelligent agent in a household environment and your target is to perform actions to complete the task goal. At the beginning of your interactions, you will be given the detailed description of the current environment and your goal to accomplish. \nFor each of your turn, you will be given the observation of the last turn. You should first think about the current condition and plan for your future actions, and then output your action in this turn. Your output must strictly follow this format:Thought: your thoughts.\nAction: your next action.\n\nThe available actions are:\n1. `go to (receptacle)`\n2. `open (receptacle)`\n3. `close (receptacle)`\n4. `take (object) from (receptacle)`\n5. `move (object) to (receptacle)`\n6. `examine (something) with (object)`\n7. `use (object)`\n8. `heat (object) with (receptacle)`\n9. `clean (object) with (receptacle)`\n10. `cool (object) with (receptacle)`\n11. `slice (object) with (object)` - slice an object using a sharp object\n12. `look` - look around your current location\n13. `inventory` - check your current inventory\n14. `done` - Indicate that you believe the task is complete\nWhere `(object)` refers to manipulable objects and `(receptacle)` refers to receptacles or locations in the environment.\nAfter your each turn, the environment will give you immediate feedback based on which you plan your next few steps. if the environment output: Nothing happens, that means the previous action is invalid and you should try more options.\nYou can only hold one object at a time. Before taking a new object, make sure you have placed down any object you are currently holding.\nYou should not assume or anticipate the feedback.\nEven if you have planned multiple steps ahead, you should only execute one action at a time\nDo not proceed with any further exploration or actions until you receive the feedback from the environment after your action.\nYour response should use the following format:\n\nThought: <your thoughts>\nAction: <your next action>'
85
start_port=8000
9-
model_path='/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/data/Qwen/Qwen2.5-7B-Instruct'
6+
model_path='/path/to/Qwen2.5-7B-Instruct'
107

118
ray start --head
129
python -m verl.trainer.main_ppo_alf \
1310
algorithm.adv_estimator=grpo \
14-
data.train_files=/inspire/ssd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/public/zyfei/open-embodied-r1/data/alfworld-data/train_data.json \
15-
data.val_files=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/data/WKM-unseen/seen.json \
11+
data.train_files=get_data/rl/alf_train.json \
12+
data.val_files=get_data/rl/alf_seen.json \
1613
data.train_batch_size=128 \
1714
+data.max_length=4096 \
1815
+data.max_steps=30 \

0 commit comments

Comments
 (0)