Skip to content

Commit 6606656

Browse files
committed
fix typo
1 parent a76cff2 commit 6606656

File tree

4 files changed

+172
-3
lines changed

4 files changed

+172
-3
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,5 @@ test/
144144

145145
easy_*
146146
normal_*
147-
outputs_*
147+
outputs_*
148+
*_outputs

README.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
1. Embodied-Planner-R1 is based on verl with vLLM>=0.8
55
```
66
# Create the conda environment
7-
conda create -n Embodied-Planner-R1 python==3.10
7+
conda create -n Embodied-Planner-R1 python=3.10
88
conda activate Embodied-Planner-R1
99
1010
cd Embodied-Planner-R1
@@ -36,6 +36,7 @@ conda create --name scienceworld python=3.8
3636
conda activate scienceworld
3737
3838
pip install scienceworld
39+
conda install -y -c conda-forge openjdk=11
3940
pip install fastapi
4041
pip install uvicorn
4142
```
@@ -54,3 +55,25 @@ bash cmd/alf.sh
5455
5556
bash cmd/sci_easy.sh
5657
```
58+
59+
## 4. Evaluation
60+
```
61+
# We follow the framework of MINT to evaluate models.
62+
cd verl/eval_agent
63+
conda create -n eval_agent python=3.10
64+
conda activate eval_agent
65+
bash setup.sh
66+
67+
conda create -n vllm python=3.10
68+
conda activate vllm
69+
pip install vllm
70+
71+
# deploy the model
72+
python -m vllm.entrypoints.openai.api_server --served-model-name embodied_r1_alfworld --model /path/to/model --port 8000 --disable-frontend-multiprocessing --gpu-memory-utilization 0.99 --disable-frontend-multiprocessing --max-model-len 4096 --enforce-eager
73+
74+
# start evaluation
75+
conda activate eval_agent
76+
77+
python -m eval_agent.main --agent_config er1_alfworld --exp_config alfworld_v2 --split dev --verbose # you can find more examples in eval.sh
78+
79+
```
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ else
4343
fi
4444

4545
cd $REPO_HOME
46-
conda activate embodied-r1
46+
conda activate Embodied-Planner-R1
4747
cmd="bash ${bash_path}"
4848
echo "Running $cmd"
4949

get_data/alfworld/base_config.yaml

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
dataset:
2+
data_path: '$ALFWORLD_DATA/json_2.1.1/train'
3+
eval_id_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_seen' # null/None to disable
4+
eval_ood_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_unseen' # null/None to disable
5+
num_train_games: -1 # max training games (<=0 indicates full dataset)
6+
num_eval_games: -1 # max evaluation games (<=0 indicates full dataset)
7+
8+
logic:
9+
domain: '$ALFWORLD_DATA/logic/alfred.pddl' # PDDL domain file that defines the world dynamics
10+
grammar: '$ALFWORLD_DATA/logic/alfred.twl2' # Grammar file that defines the text feedbacks
11+
12+
env:
13+
type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid'
14+
regen_game_files: False # check if game is solvable by expert and save to game.tw-pddl file
15+
domain_randomization: False # shuffle Textworld print order and object id nums
16+
task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place
17+
expert_timeout_steps: 150 # max steps before timeout for expert to solve the task
18+
expert_type: "handcoded" # 'handcoded' or 'downward'. Note: the downward planner is very slow for real-time use
19+
goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED)
20+
21+
hybrid:
22+
start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point
23+
thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training
24+
eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training
25+
26+
thor:
27+
screen_width: 300 # width of THOR window
28+
screen_height: 300 # height of THOR window
29+
smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow)
30+
save_frames_to_disk: False # save frame PNGs to disk (useful for making videos)
31+
save_frames_path: './videos/' # path to save frame PNGs
32+
33+
controller:
34+
type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER)
35+
debug: False
36+
load_receps: True # load receptacle locations from precomputed dict (if available)
37+
38+
mask_rcnn:
39+
pretrained_model_path: '$ALFWORLD_DATA/detectors/mrcnn.pth'
40+
41+
general:
42+
random_seed: 42
43+
use_cuda: True # disable this when running on machine without cuda
44+
visdom: False # plot training/eval curves, run with visdom server
45+
task: 'alfred'
46+
training_method: 'dagger' # 'dqn' or 'dagger'
47+
save_path: './training/' # path to save pytorch models
48+
observation_pool_capacity: 3 # k-size queue, 0 indicates no observation
49+
hide_init_receptacles: False # remove initial observation containing navigable receptacles
50+
51+
training:
52+
batch_size: 10
53+
max_episode: 50000
54+
smoothing_eps: 0.1
55+
optimizer:
56+
learning_rate: 0.001
57+
clip_grad_norm: 5
58+
59+
evaluate:
60+
run_eval: True
61+
batch_size: 10
62+
env:
63+
type: "AlfredTWEnv"
64+
65+
checkpoint:
66+
report_frequency: 1000 # report every N episode
67+
experiment_tag: 'test' # name of experiment
68+
load_pretrained: False # during test, enable this so that the agent load your pretrained model
69+
load_from_tag: 'not loading anything' # name of pre-trained model to load in save_path
70+
71+
model:
72+
encoder_layers: 1
73+
decoder_layers: 1
74+
encoder_conv_num: 5
75+
block_hidden_dim: 64
76+
n_heads: 1
77+
dropout: 0.1
78+
block_dropout: 0.1
79+
recurrent: True
80+
81+
rl:
82+
action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working)
83+
max_target_length: 20 # max token length for seq2seq generation
84+
beam_width: 10 # 1 means greedy
85+
generate_top_k: 3
86+
87+
training:
88+
max_nb_steps_per_episode: 50 # terminate after this many steps
89+
learn_start_from_this_episode: 0 # delay updates until this epsiode
90+
target_net_update_frequency: 500 # sync target net with online net per this many epochs
91+
92+
replay:
93+
accumulate_reward_from_final: True
94+
count_reward_lambda: 0.0 # 0 to disable
95+
novel_object_reward_lambda: 0.0 # 0 to disable
96+
discount_gamma_game_reward: 0.9
97+
discount_gamma_count_reward: 0.5
98+
discount_gamma_novel_object_reward: 0.5
99+
replay_memory_capacity: 500000 # adjust this depending on your RAM size
100+
replay_memory_priority_fraction: 0.5
101+
update_per_k_game_steps: 5
102+
replay_batch_size: 64
103+
multi_step: 3
104+
replay_sample_history_length: 4
105+
replay_sample_update_from: 2
106+
107+
epsilon_greedy:
108+
noisy_net: False # if this is true, then epsilon greedy is disabled
109+
epsilon_anneal_episodes: 1000 # -1 if not annealing
110+
epsilon_anneal_from: 0.3
111+
epsilon_anneal_to: 0.1
112+
113+
dagger:
114+
action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working)
115+
max_target_length: 20 # max token length for seq2seq generation
116+
beam_width: 10 # 1 means greedy
117+
generate_top_k: 5
118+
unstick_by_beam_search: False # use beam-search for failed actions, set True during evaluation
119+
120+
training:
121+
max_nb_steps_per_episode: 50 # terminate after this many steps
122+
123+
fraction_assist:
124+
fraction_assist_anneal_episodes: 50000
125+
fraction_assist_anneal_from: 1.0
126+
fraction_assist_anneal_to: 0.01
127+
128+
fraction_random:
129+
fraction_random_anneal_episodes: 0
130+
fraction_random_anneal_from: 0.0
131+
fraction_random_anneal_to: 0.0
132+
133+
replay:
134+
replay_memory_capacity: 500000
135+
update_per_k_game_steps: 5
136+
replay_batch_size: 64
137+
replay_sample_history_length: 4
138+
replay_sample_update_from: 2
139+
140+
vision_dagger:
141+
model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input)
142+
resnet_fc_dim: 64
143+
maskrcnn_top_k_boxes: 10 # top k box features
144+
use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!)
145+
sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn'

0 commit comments

Comments
 (0)