OpenMOSS
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmd/sci_v2.sh‎ ‎cmd/alf.sh‎cmd/sci_v2.sh renamed to cmd/alf.sh
Lines changed: 10 additions & 7 deletions b/‎cmd/sci_v2.sh‎ ‎cmd/alf.sh‎cmd/sci_v2.sh renamed to cmd/alf.sh
Lines changed: 10 additions & 7 deletions
diff --git a/‎cmd/alf_v1.sh‎
Lines changed: 0 additions & 41 deletions b/‎cmd/alf_v1.sh‎
Lines changed: 0 additions & 41 deletions
diff --git a/‎cmd/alf_v1_sft.sh‎
Lines changed: 0 additions & 41 deletions b/‎cmd/alf_v1_sft.sh‎
Lines changed: 0 additions & 41 deletions
diff --git a/‎cmd/alf_v2.sh‎
Lines changed: 0 additions & 47 deletions b/‎cmd/alf_v2.sh‎
Lines changed: 0 additions & 47 deletions
diff --git a/‎cmd/sci_easy.sh‎
Lines changed: 10 additions & 7 deletions b/‎cmd/sci_easy.sh‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎cmd/sci_nornal.sh‎
Lines changed: 10 additions & 7 deletions b/‎cmd/sci_nornal.sh‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎cmd/sci_v1.sh‎
Lines changed: 0 additions & 47 deletions b/‎cmd/sci_v1.sh‎
Lines changed: 0 additions & 47 deletions
diff --git a/‎examples/grpo_trainer/alf_v2.sh‎ ‎examples/grpo_trainer/alf.sh‎examples/grpo_trainer/alf_v2.sh renamed to examples/grpo_trainer/alf.sh
Lines changed: 3 additions & 6 deletions b/‎examples/grpo_trainer/alf_v2.sh‎ ‎examples/grpo_trainer/alf.sh‎examples/grpo_trainer/alf_v2.sh renamed to examples/grpo_trainer/alf.sh
Lines changed: 3 additions & 6 deletions
@@ -128,7 +128,7 @@ outputs
 
 data
 
-*.json
+# *.json
 
 docker
 
 
@@ -1,6 +1,8 @@
-final_folder="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/outputs_sci_easy"  
+export REPO_HOME=$(pwd)
 
-bash_path=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/examples/grpo_trainer/sci_v2.sh
+final_folder="$REPO_HOME/verl/outputs_alf"  
+
+bash_path=$REPO_HOME/examples/grpo_trainer/alf.sh
 
 
 project_name=$(basename "$bash_path" .sh)
@@ -23,24 +25,25 @@ tensorboard_folder="${res_folder}/tensorboard"
 export TENSORBOARD_DIR="$tensorboard_folder"
 mkdir -p "$tensorboard_folder"
 echo "tb saving in ${TENSORBOARD_DIR}"
+
 source /opt/conda/etc/profile.d/conda.sh
 PORT=8000
 if ss -tuln | grep -q ":$PORT "; then
     echo "端口 $PORT 已被占用"
 else
     echo "$PORT 未被占用"
-    conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld
-    cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/scienceworld_server
-    server_cmd="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld/bin/python start_server.py --num_servers 8"
+    conda activate /path/to/alfworld-env
+    cd $REPO_HOME/verl/alfworld_server/server
+    server_cmd="python start_server.py --num_servers 8"
 
     nohup $server_cmd > "${server_logging_folder}/run_stdout.log" 2> "${server_logging_folder}/run_stderr.log" &
     server_pid=$!
     echo "server Process ID: $server_pid Check logs in ${server_logging_folder}/"
     conda deactivate
 fi
 
-cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod
-conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/verl_spmd
+cd $REPO_HOME
+conda activate /path/to/embodied-r1-env
 cmd="bash ${bash_path}"
 echo "Running $cmd"
 
 
@@ -1,6 +1,8 @@
-final_folder="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/outputs_sci_easy_v1"  
+export REPO_HOME=$(pwd)
 
-bash_path=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/examples/grpo_trainer/sci_easy.sh
+final_folder="$REPO_HOME/verl/outputs_sci_easy"  
+
+bash_path=$REPO_HOME/examples/grpo_trainer/sci_easy.sh
 
 
 project_name=$(basename "$bash_path" .sh)
@@ -23,24 +25,25 @@ tensorboard_folder="${res_folder}/tensorboard"
 export TENSORBOARD_DIR="$tensorboard_folder"
 mkdir -p "$tensorboard_folder"
 echo "tb saving in ${TENSORBOARD_DIR}"
+
 source /opt/conda/etc/profile.d/conda.sh
 PORT=8000
 if ss -tuln | grep -q ":$PORT "; then
     echo "端口 $PORT 已被占用"
 else
     echo "$PORT 未被占用"
-    conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld
-    cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/scienceworld_server
-    server_cmd="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld/bin/python start_server.py --num_servers 8"
+    conda activate /path/to/sciworld-env
+    cd $REPO_HOME/verl/scienceworld_server
+    server_cmd="python start_server.py --num_servers 8"
 
     nohup $server_cmd > "${server_logging_folder}/run_stdout.log" 2> "${server_logging_folder}/run_stderr.log" &
     server_pid=$!
     echo "server Process ID: $server_pid Check logs in ${server_logging_folder}/"
     conda deactivate
 fi
 
-cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod
-conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/verl_spmd
+cd $REPO_HOME
+conda activate /path/to/embodied-r1-env
 cmd="bash ${bash_path}"
 echo "Running $cmd"
 
 
@@ -1,6 +1,8 @@
-final_folder="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/outputs_sci_normal_v1"  
+export REPO_HOME=$(pwd)  
 
-bash_path=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/examples/grpo_trainer/sci_normal.sh
+final_folder="$REPO_HOME/verl/outputs_sci_normal"  
+
+bash_path=$REPO_HOME/examples/grpo_trainer/sci_normal.sh
 
 
 project_name=$(basename "$bash_path" .sh)
@@ -23,24 +25,25 @@ tensorboard_folder="${res_folder}/tensorboard"
 export TENSORBOARD_DIR="$tensorboard_folder"
 mkdir -p "$tensorboard_folder"
 echo "tb saving in ${TENSORBOARD_DIR}"
+
 source /opt/conda/etc/profile.d/conda.sh
 PORT=8000
 if ss -tuln | grep -q ":$PORT "; then
     echo "端口 $PORT 已被占用"
 else
     echo "$PORT 未被占用"
-    conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld
-    cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod/verl/scienceworld_server
-    server_cmd="/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/scienceworld/bin/python start_server.py --num_servers 8"
+    conda activate /path/to/sciworld-env
+    cd $REPO_HOME/verl/scienceworld_server
+    server_cmd="python start_server.py --num_servers 8"
 
     nohup $server_cmd > "${server_logging_folder}/run_stdout.log" 2> "${server_logging_folder}/run_stderr.log" &
     server_pid=$!
     echo "server Process ID: $server_pid Check logs in ${server_logging_folder}/"
     conda deactivate
 fi
 
-cd /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl_mod
-conda activate /inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/lji_env/verl_spmd
+cd $REPO_HOME
+conda activate /path/to/embodied-r1-env
 cmd="bash ${bash_path}"
 echo "Running $cmd"
 
 
@@ -1,18 +1,15 @@
 #!/bin/sh
 set -x
 
-# export VLLM_ATTENTION_BACKEND=XFORMERS
-# HOME='/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/verl'
-
 system_prompt='You are an intelligent agent in a household environment and your target is to perform actions to complete the task goal. At the beginning of your interactions, you will be given the detailed description of the current environment and your goal to accomplish. \nFor each of your turn, you will be given the observation of the last turn. You should first think about the current condition and plan for your future actions, and then output your action in this turn. Your output must strictly follow this format:Thought: your thoughts.\nAction: your next action.\n\nThe available actions are:\n1. `go to (receptacle)`\n2. `open (receptacle)`\n3. `close (receptacle)`\n4. `take (object) from (receptacle)`\n5. `move (object) to (receptacle)`\n6. `examine (something) with (object)`\n7. `use (object)`\n8. `heat (object) with (receptacle)`\n9. `clean (object) with (receptacle)`\n10. `cool (object) with (receptacle)`\n11. `slice (object) with (object)` - slice an object using a sharp object\n12. `look` - look around your current location\n13. `inventory` - check your current inventory\n14. `done` - Indicate that you believe the task is complete\nWhere `(object)` refers to manipulable objects and `(receptacle)` refers to receptacles or locations in the environment.\nAfter your each turn, the environment will give you immediate feedback based on which you plan your next few steps. if the environment output: Nothing happens, that means the previous action is invalid and you should try more options.\nYou can only hold one object at a time. Before taking a new object, make sure you have placed down any object you are currently holding.\nYou should not assume or anticipate the feedback.\nEven if you have planned multiple steps ahead, you should only execute one action at a time\nDo not proceed with any further exploration or actions until you receive the feedback from the environment after your action.\nYour response should use the following format:\n\nThought: <your thoughts>\nAction: <your next action>'
 start_port=8000
-model_path='/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/data/Qwen/Qwen2.5-7B-Instruct'
+model_path='/path/to/Qwen2.5-7B-Instruct'
 
 ray start --head
 python -m verl.trainer.main_ppo_alf \
     algorithm.adv_estimator=grpo \
-    data.train_files=/inspire/ssd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/public/zyfei/open-embodied-r1/data/alfworld-data/train_data.json \
-    data.val_files=/inspire/hdd/ws-8207e9e2-e733-4eec-a475-cfa1c36480ba/embodied-multimodality/qiuxipeng-24028/xpqiu/lji/data/WKM-unseen/seen.json \
+    data.train_files=get_data/rl/alf_train.json \
+    data.val_files=get_data/rl/alf_seen.json \
     data.train_batch_size=128 \
     +data.max_length=4096 \
     +data.max_steps=30 \
-Original file line number
+Diff line change
 data
 -*.json
 +# *.json
 docker