File tree Expand file tree Collapse file tree 8 files changed +23
-28
lines changed
examples/embodiment/config Expand file tree Collapse file tree 8 files changed +23
-28
lines changed Original file line number Diff line number Diff line change @@ -38,9 +38,6 @@ runner:
3838 resume_dir : null
3939
4040algorithm :
41- auto_reset : False
42- ignore_terminations : False
43- use_fixed_reset_state_ids : True
4441 normalize_advantages : True
4542 kl_penalty : kl # how to estimate kl divergence: kl or kl_penalty
4643 group_size : 8
@@ -52,7 +49,7 @@ algorithm:
5249 logprob_type : chunk_level
5350 entropy_type : token_level
5451
55- update_epoch : 4
52+ update_epoch : 2
5653 adv_type : grpo
5754 loss_type : actor
5855 loss_agg_func : " token-mean"
@@ -67,7 +64,7 @@ algorithm:
6764 gamma : 0.99
6865 gae_lambda : 0.95
6966
70- filter_rewards : False
67+ filter_rewards : True
7168 rewards_lower_bound : 0.1
7269 rewards_upper_bound : 0.9
7370 # params for generation
9794
9895 train :
9996 total_num_envs : 64
100- max_episode_steps : 480 # max episode steps for truncation
101- max_steps_per_rollout_epoch : 480
10297 reward_coef : ${algorithm.reward_coef}
10398 group_size : ${algorithm.group_size}
99+ max_episode_steps : 480 # max episode steps for truncation
100+ max_steps_per_rollout_epoch : 480
104101 eval :
105102 total_num_envs : 500
106103 auto_reset : True
107104 ignore_terminations : True
108105 max_episode_steps : 480
109106 max_steps_per_rollout_epoch : 480
110107 group_size : 1
111- use_fixed_reset_state_ids : True
112108 is_eval : True
113109 video_cfg :
114110 save_video : True
Original file line number Diff line number Diff line change @@ -63,7 +63,7 @@ algorithm:
6363 gamma : 0.99
6464 gae_lambda : 0.95
6565
66- filter_rewards : False
66+ filter_rewards : True
6767 rewards_lower_bound : 0.1
6868 rewards_upper_bound : 0.9
6969 # params for generation
@@ -142,6 +142,7 @@ actor:
142142 # Override the default values in model/pi0_5
143143 model :
144144 model_path : " /path/to/model/RLinf-Pi05-SFT"
145+ num_steps : 4
145146
146147 optim :
147148 lr : 5.0e-6
Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ algorithm:
4949 logprob_type : chunk_level
5050 entropy_type : token_level
5151
52- update_epoch : 4
52+ update_epoch : 2
5353 adv_type : grpo
5454 loss_type : actor
5555 loss_agg_func : " token-mean"
@@ -64,7 +64,7 @@ algorithm:
6464 gamma : 0.99
6565 gae_lambda : 0.95
6666
67- filter_rewards : False
67+ filter_rewards : True
6868 rewards_lower_bound : 0.1
6969 rewards_upper_bound : 0.9
7070 # params for generation
105105 max_episode_steps : 320
106106 max_steps_per_rollout_epoch : 320
107107 group_size : 1
108- use_fixed_reset_state_ids : True
109108 is_eval : True
110109 video_cfg :
111110 save_video : True
@@ -126,7 +125,7 @@ rollout:
126125 model :
127126 model_path : " /path/to/model/RLinf-Pi0-SFT-Spatial-Object-Goal"
128127 precision : ${actor.model.precision}
129-
128+
130129actor :
131130 group_name : " ActorGroup"
132131 channel :
Original file line number Diff line number Diff line change @@ -48,7 +48,7 @@ algorithm:
4848 reward_type : chunk_level
4949 logprob_type : chunk_level
5050 entropy_type : token_level
51- update_epoch : 4
51+ update_epoch : 1
5252
5353 adv_type : grpo
5454 loss_type : actor
@@ -64,7 +64,7 @@ algorithm:
6464 gamma : 0.99
6565 gae_lambda : 0.95
6666
67- filter_rewards : False
67+ filter_rewards : True
6868 rewards_lower_bound : 0.1
6969 rewards_upper_bound : 0.9
7070 # params for generation
@@ -142,8 +142,11 @@ actor:
142142 # Override the default values in model/pi0_5
143143 model :
144144 model_path : " /path/to/model/RLinf-Pi05-SFT"
145- model_type : " openpi"
146145 num_action_chunks : 5
146+ model_type : " openpi"
147+ # openpi specific parameters
148+ openpi :
149+ noise_level : 0.3
147150
148151 optim :
149152 lr : 5.0e-6
Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ algorithm:
4949 logprob_type : chunk_level
5050 entropy_type : token_level
5151
52- update_epoch : 4
52+ update_epoch : 2
5353 adv_type : grpo
5454 loss_type : actor
5555 loss_agg_func : " token-mean"
@@ -64,7 +64,7 @@ algorithm:
6464 gamma : 0.99
6565 gae_lambda : 0.95
6666
67- filter_rewards : False
67+ filter_rewards : True
6868 rewards_lower_bound : 0.1
6969 rewards_upper_bound : 0.9
7070 # params for generation
9898 group_size : ${algorithm.group_size}
9999 max_episode_steps : 240
100100 max_steps_per_rollout_epoch : 240
101- use_fixed_reset_state_ids : True
102- use_ordered_reset_state_ids : False
103101 eval :
104102 total_num_envs : 500
105103 auto_reset : True
106104 ignore_terminations : True
107105 max_episode_steps : 240
108106 max_steps_per_rollout_epoch : 240
109107 group_size : 1
110- use_fixed_reset_state_ids : True
111108 is_eval : True
112109 video_cfg :
113110 save_video : True
Original file line number Diff line number Diff line change @@ -64,7 +64,7 @@ algorithm:
6464 gamma : 0.99
6565 gae_lambda : 0.95
6666
67- filter_rewards : False
67+ filter_rewards : True
6868 rewards_lower_bound : 0.1
6969 rewards_upper_bound : 0.9
7070 # params for generation
9898 group_size : ${algorithm.group_size}
9999 max_episode_steps : 240
100100 max_steps_per_rollout_epoch : 240
101- use_fixed_reset_state_ids : True
102- use_ordered_reset_state_ids : False
103101 eval :
104102 total_num_envs : 500
105103 auto_reset : True
@@ -147,7 +145,8 @@ actor:
147145 model_path : " /path/to/model/RLinf-Pi05-SFT"
148146 model_type : " openpi"
149147 num_action_chunks : 5 # interface for the env
150- num_steps : 3
148+ openpi :
149+ noise_level : 0.3
151150
152151 optim :
153152 lr : 5.0e-6
Original file line number Diff line number Diff line change @@ -50,7 +50,7 @@ algorithm:
5050 logprob_type : chunk_level
5151 entropy_type : token_level
5252
53- update_epoch : 4
53+ update_epoch : 2
5454 adv_type : grpo
5555 loss_type : actor
5656 loss_agg_func : " token-mean"
@@ -65,7 +65,7 @@ algorithm:
6565 gamma : 0.99
6666 gae_lambda : 0.95
6767
68- filter_rewards : False
68+ filter_rewards : True
6969 rewards_lower_bound : 0.1
7070 rewards_upper_bound : 0.9
7171 # params for generation
Original file line number Diff line number Diff line change @@ -64,7 +64,7 @@ algorithm:
6464 gamma : 0.99
6565 gae_lambda : 0.95
6666
67- filter_rewards : False
67+ filter_rewards : True
6868 rewards_lower_bound : 0.1
6969 rewards_upper_bound : 0.9
7070 # params for generation
You can’t perform that action at this time.
0 commit comments