fix bug, add relateve path

HappyWaterXP · HappyWaterXP · commit 8942ca9a5686 · 2025-05-20T18:35:57.000+08:00
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@ pip3 install vllm==0.8.3
 
 # Install flash-attn
 pip3 install flash-attn --no-build-isolation
+pip3 install tensorboard
 ```
 
 2. Prepare environment for ALFWorld
@@ -25,7 +26,9 @@ conda activate alfworld
 
 # download task for training
 pip install alfworld
-alfworld-download
+pip install fastapi
+pip install uvicorn
+alfworld-download --data-dir ./get_data/alfworld
 ```
 
 3. Prepare environment for ScienceWorld
@@ -34,12 +37,15 @@ conda create --name scienceworld python=3.8
 conda activate scienceworld
 
 pip install scienceworld
+pip install fastapi
+pip install uvicorn
 ```
 
 ## 2. Prepare for data
 ```
 # get task data for rl training
-bash get_data/get_data_for_training.sh
+cd get_data
+bash get_data_for_training.sh
 ```
 
 ## 3. Start training
diff --git a/cmd/alf.sh b/cmd/alf.sh
@@ -32,7 +32,7 @@ if ss -tuln | grep -q ":$PORT "; then
     echo "端口 $PORT 已被占用"
 else
     echo "$PORT 未被占用"
-    conda activate /path/to/alfworld-env
+    conda activate alfworld
     cd $REPO_HOME/verl/alfworld_server/server
     server_cmd="python start_server.py --num_servers 8"
 
@@ -43,7 +43,7 @@ else
 fi
 
 cd $REPO_HOME
-conda activate /path/to/embodied-r1-env
+conda activate embodied-r1
 cmd="bash ${bash_path}"
 echo "Running $cmd"
 
diff --git a/cmd/sci_easy.sh b/cmd/sci_easy.sh
@@ -32,7 +32,7 @@ if ss -tuln | grep -q ":$PORT "; then
     echo "端口 $PORT 已被占用"
 else
     echo "$PORT 未被占用"
-    conda activate /path/to/sciworld-env
+    conda activate scienceworld
     cd $REPO_HOME/verl/scienceworld_server
     server_cmd="python start_server.py --num_servers 8"
 
@@ -43,7 +43,7 @@ else
 fi
 
 cd $REPO_HOME
-conda activate /path/to/embodied-r1-env
+conda activate embodied-r1
 cmd="bash ${bash_path}"
 echo "Running $cmd"
 
diff --git a/cmd/sci_nornal.sh b/cmd/sci_nornal.sh
@@ -32,7 +32,7 @@ if ss -tuln | grep -q ":$PORT "; then
     echo "端口 $PORT 已被占用"
 else
     echo "$PORT 未被占用"
-    conda activate /path/to/sciworld-env
+    conda activate scienceworld
     cd $REPO_HOME/verl/scienceworld_server
     server_cmd="python start_server.py --num_servers 8"
 
@@ -43,7 +43,7 @@ else
 fi
 
 cd $REPO_HOME
-conda activate /path/to/embodied-r1-env
+conda activate embodied-r1
 cmd="bash ${bash_path}"
 echo "Running $cmd"
 
diff --git a/examples/grpo_trainer/alf.sh b/examples/grpo_trainer/alf.sh
@@ -9,7 +9,7 @@ ray start --head
 python -m verl.trainer.main_ppo_alf \
     algorithm.adv_estimator=grpo \
     data.train_files=get_data/rl/alf_train.json \
-    data.val_files=get_data/rl/alf_seen.json \
+    data.val_files=get_data/rl/alf_valid_seen.json \
     data.train_batch_size=128 \
     +data.max_length=4096 \
     +data.max_steps=30 \
diff --git a/examples/grpo_trainer/sci_easy.sh b/examples/grpo_trainer/sci_easy.sh
@@ -9,7 +9,7 @@ ray start --head
 python -m verl.trainer.main_ppo_sci \
     algorithm.adv_estimator=grpo \
     data.train_files=get_data/rl/sci_train.json \
-    data.val_files=get_data/rl/sci_seen.json \
+    data.val_files=get_data/rl/sci_dev.json \
     data.train_batch_size=64 \
     +data.max_length=4096 \
     +data.max_steps=30 \
diff --git a/examples/grpo_trainer/sci_normal.sh b/examples/grpo_trainer/sci_normal.sh
@@ -3,13 +3,13 @@ set -x
 
 export system_prompt='You are a helpful assistant to do some scientific experiment in an environment.\nYou should explore the environment and find the items you need to complete the experiment.\n\nIn the environment, there are several rooms: kitchen, foundry, workshop, bathroom, outside, living room, bedroom, greenhouse, art studio, hallway.\nThe available actions are:\nactivate OBJ\nclose OBJ\nconnect OBJ to OBJ\ndeactivate OBJ\ndisconnect OBJ\ndunk OBJ in OBJ\neat OBJ\nflush OBJ\nfocus on OBJ\ngo LOC\ninventory\nlook around\nlook at OBJ\nlook in OBJ\nmix OBJ\nmove OBJ to OBJ\nopen OBJ\npick up OBJ\npour OBJ in OBJ\nput down OBJ\nread OBJ\nuse OBKJ on OBJ\nwait: wait 10 steps\nwait1: wait 1 step\ntask: check your task\ndone: indicate that you believe the task is complete\nWhen arrive a new location, you should use look around to check the OBj you can interact with.\nUse focus on OBJ only neccessary as incorrect use will cause environment ends.\nDo not proceed with any further exploration or actions until you receive the feedback from the environment after your action.\nYour response should use the following format:\n\nThought: <your thoughts>\nAction: <your next action>'
 start_port=8000
-model_path='/path/to/Qwen/Qwen2.5-7B-Instruct'
+model_path='/path/to/Qwen2.5-7B-Instruct'
 
 ray start --head
 python -m verl.trainer.main_ppo_sci \
     algorithm.adv_estimator=grpo \
     data.train_files=get_data/rl/sci_train.json \
-    data.val_files=get_data/rl/sci_seen.json \
+    data.val_files=get_data/rl/sci_dev.json \
     data.train_batch_size=64 \
     +data.max_length=4096 \
     +data.max_steps=30 \
diff --git a/get_data/get_data_for_training.sh b/get_data/get_data_for_training.sh
@@ -12,7 +12,7 @@ python -m utils.modify_alf_sft --input ${ORIGIN_SFT}/data/sciworld_sft.json --ou
 
 
 # 2. task for rl
-ALF_GAMEFILE_PATH='~/.cache/alfworld'
+ALF_GAMEFILE_PATH='./alfworld'
 python -m utils.generate_alf_indice --input "${ALF_GAMEFILE_PATH}/json_2.1.1/train" --output ./rl/alf_train.json
 python -m utils.generate_alf_indice --input "${ALF_GAMEFILE_PATH}/json_2.1.1/valid_seen" --output ./rl/alf_valid_seen.json
 python -m utils.generate_alf_indice --input "${ALF_GAMEFILE_PATH}/json_2.1.1/valid_unseen" --output ./rl/alf_valid_unseen.json
diff --git a/verl/workers/rollout/vllm_rollout/alf_rollout.py b/verl/workers/rollout/vllm_rollout/alf_rollout.py
@@ -129,6 +129,7 @@ def __init__(self, model_path: str, config: DictConfig, tokenizer, model_hf_conf
                 max_num_batched_tokens=max_num_batched_tokens,
                 enable_chunked_prefill=config.enable_chunked_prefill,
                 enable_prefix_caching=True,
+                seed=42,
             )
         else:
             raise NotImplementedError
diff --git a/verl/workers/rollout/vllm_rollout/sci_rollout.py b/verl/workers/rollout/vllm_rollout/sci_rollout.py
@@ -129,6 +129,7 @@ def __init__(self, model_path: str, config: DictConfig, tokenizer, model_hf_conf
                 max_num_batched_tokens=max_num_batched_tokens,
                 enable_chunked_prefill=config.enable_chunked_prefill,
                 enable_prefix_caching=True,
+                seed=42,
             )
         else:
             raise NotImplementedError

Original file line number	Diff line number	Diff line change
`@@ -129,6 +129,7 @@ def __init__(self, model_path: str, config: DictConfig, tokenizer, model_hf_conf`
`129`	`129`	`max_num_batched_tokens=max_num_batched_tokens,`
`130`	`130`	`enable_chunked_prefill=config.enable_chunked_prefill,`
`131`	`131`	`enable_prefix_caching=True,`
	`132`	`+ seed=42,`
`132`	`133`	`)`
`133`	`134`	`else:`
`134`	`135`	`raise NotImplementedError`