Skip to content

Commit e1f29da

Browse files
training (gliding)
1 parent 6a00b2a commit e1f29da

File tree

7 files changed

+251
-128
lines changed

7 files changed

+251
-128
lines changed

isaacgymenvs/cfg/config.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,17 @@ checkpoint: ''
3939
# set to True to use multi-gpu horovod training
4040
multi_gpu: False
4141

42+
experiment_dir: ''
43+
4244
wandb_activate: False
4345
wandb_group: ''
4446
wandb_name: ${train.params.config.name}
4547
wandb_entity: ''
46-
wandb_project: 'isaacgymenvs'
47-
capture_video: False
48-
capture_video_freq: 7000
49-
capture_video_len: 100
50-
force_render: True
48+
wandb_project: ${task.wandb_project}
49+
capture_video: ${task.capture_video}
50+
capture_video_freq: ${task.capture_video_freq}
51+
capture_video_len: ${task.capture_video_len}
52+
force_render: ${task.force_render}
5153

5254
# disables rendering
5355
headless: False

isaacgymenvs/cfg/task/Atlas.yaml

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
# used to create the object
22
name: Atlas
33

4+
wandb_group: ''
5+
wandb_name: Atlas
6+
wandb_entity: ''
7+
wandb_project: 'atlas'
8+
capture_video: True
9+
capture_video_freq: 2500
10+
capture_video_len: 100
11+
force_render: True
12+
413
physics_engine: 'physx'
514

615
env:
@@ -35,7 +44,7 @@ env:
3544

3645
randomCommandVelocityRanges:
3746
# train
38-
linear_x: [-1., 1.] # min max [m/s]
47+
linear_x: [-2., 2.] # min max [m/s]
3948
linear_y: [0., 0.] # min max [m/s]
4049
yaw: [-1.57, 1.57] # min max [rad/s]
4150

@@ -44,7 +53,7 @@ env:
4453
stiffness: 85.0 # [N*m/rad]
4554
damping: 4.0 # [N*m*s/rad]
4655
# action scale: target angle = actionScale * action + defaultAngle
47-
actionScale: 0.5
56+
actionScale: 0.75
4857
# decimation: Number of control action updates @ sim DT per policy DT
4958
decimation: 4
5059

@@ -76,15 +85,15 @@ env:
7685
allowKneeContacts: true
7786
# rewards
7887
terminalReward: 0.0
79-
linearVelocityXYRewardScale: 1.0
88+
linearVelocityXYRewardScale: 5.0
8089
linearVelocityZRewardScale: -4.0
8190
angularVelocityXYRewardScale: -0.05
8291
angularVelocityZRewardScale: 0.5
83-
orientationRewardScale: -0. #-1.
84-
torqueRewardScale: -0.00002 # -0.000025
92+
orientationRewardScale: -0.5 #-1.
93+
torqueRewardScale: -0.000005 # -0.000025
8594
jointAccRewardScale: -0.0005 # -0.0025
8695
baseHeightRewardScale: -0.0 #5
87-
feetAirTimeRewardScale: 1.0
96+
feetAirTimeRewardScale: 2.0
8897
kneeCollisionRewardScale: -0.25
8998
feetStumbleRewardScale: -0. #-2.0
9099
actionRateRewardScale: -0.01
@@ -127,8 +136,8 @@ env:
127136
enableCameraSensors: False
128137

129138
sim:
130-
dt: 0.02
131-
substeps: 2
139+
dt: 0.005
140+
substeps: 1
132141
up_axis: "z"
133142
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
134143
gravity: [0.0, 0.0, -9.81]

isaacgymenvs/cfg/train/AtlasPPO.yaml

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ params:
99

1010
network:
1111
name: actor_critic
12-
separate: True
12+
separate: False
1313

1414
space:
1515
continuous:
@@ -23,22 +23,14 @@ params:
2323
fixed_sigma: True
2424

2525
mlp:
26-
units: [512] #, 256, 128]
26+
units: [256, 128, 64]
2727
activation: elu
2828
d2rl: False
2929

3030
initializer:
3131
name: default
3232
regularizer:
3333
name: None
34-
rnn:
35-
name: lstm
36-
units: 256 #128
37-
layers: 1
38-
before_mlp: False #True
39-
concat_input: True
40-
layer_norm: False
41-
4234

4335
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
4436
load_path: ${...checkpoint} # path to the checkpoint to load
@@ -75,7 +67,7 @@ params:
7567
bounds_loss_coef: 0.
7668

7769
max_epochs: ${resolve_default:1000,${....max_iterations}}
78-
save_best_after: 200
70+
save_best_after: 0
7971
score_to_win: 20000
8072
save_frequency: 50
8173
print_stats: True
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
params:
2+
seed: ${...seed}
3+
4+
algo:
5+
name: a2c_continuous
6+
7+
model:
8+
name: continuous_a2c_logstd
9+
10+
network:
11+
name: actor_critic
12+
separate: True
13+
14+
space:
15+
continuous:
16+
mu_activation: None
17+
sigma_activation: None
18+
mu_init:
19+
name: default
20+
sigma_init:
21+
name: const_initializer
22+
val: 0. # std = 1.
23+
fixed_sigma: True
24+
25+
mlp:
26+
units: [512] #, 256, 128]
27+
activation: elu
28+
d2rl: False
29+
30+
initializer:
31+
name: default
32+
regularizer:
33+
name: None
34+
rnn:
35+
name: lstm
36+
units: 256 #128
37+
layers: 1
38+
before_mlp: False #True
39+
concat_input: True
40+
layer_norm: False
41+
42+
43+
load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
44+
load_path: ${...checkpoint} # path to the checkpoint to load
45+
46+
config:
47+
name: ${resolve_default:Atlas,${....experiment}}
48+
full_experiment_name: ${.name}
49+
env_name: rlgpu
50+
ppo: True
51+
mixed_precision: True
52+
normalize_input: True
53+
normalize_value: True
54+
normalize_advantage: True
55+
value_bootstrap: True
56+
clip_actions: False
57+
num_actors: ${....task.env.numEnvs}
58+
reward_shaper:
59+
scale_value: 1.0
60+
gamma: 0.99
61+
tau: 0.95
62+
e_clip: 0.2
63+
entropy_coef: 0.001
64+
learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
65+
lr_schedule: adaptive
66+
kl_threshold: 0.008 # target kl for adaptive lr
67+
truncate_grads: True
68+
grad_norm: 1.
69+
horizon_length: 24
70+
minibatch_size: 512
71+
mini_epochs: 5
72+
critic_coef: 2
73+
clip_value: True
74+
seq_len: 4 # only for rnn
75+
bounds_loss_coef: 0.
76+
77+
max_epochs: ${resolve_default:1000,${....max_iterations}}
78+
save_best_after: 0
79+
score_to_win: 20000
80+
save_frequency: 50
81+
print_stats: True

0 commit comments

Comments
 (0)