Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 39 additions & 39 deletions pufferlib/config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@ optimizer = muon
anneal_lr = True
precision = float32
total_timesteps = 10_000_000
learning_rate = 0.015
gamma = 0.995
learning_rate = 0.003
gamma = 0.9650881439471051
gae_lambda = 0.90
update_epochs = 1
clip_coef = 0.2
clip_coef = 0.24983563859666713
vf_coef = 2.0
vf_clip_coef = 0.2
max_grad_norm = 1.5
ent_coef = 0.001
adam_beta1 = 0.95
adam_beta2 = 0.999
adam_eps = 1e-12
ent_coef = 0.013899446116244659
adam_beta1 = 0.9739575783018397
adam_beta2 = 0.9983789918385146
adam_eps = 2.112852785895044e-07

data_dir = experiments
checkpoint_interval = 200
Expand Down Expand Up @@ -74,67 +74,67 @@ downsample = 10
#scale = auto

# TODO: Elim from base
[sweep.train.total_timesteps]
distribution = log_normal
min = 5e7
max = 1e10
mean = 1e8
scale = time

[sweep.train.bptt_horizon]
distribution = uniform_pow2
min = 16
max = 64
mean = 64
scale = auto
#[sweep.train.total_timesteps]
#distribution = log_normal
#min = 5e7
#max = 1e10
#mean = 1e8
#scale = time

#[sweep.train.bptt_horizon]
#distribution = uniform_pow2
#min = 16
#max = 64
#mean = 64
#scale = auto

[sweep.train.minibatch_size]
distribution = uniform_pow2
min = 8192
min = 16384
max = 65536
mean = 32768
scale = auto

[sweep.train.learning_rate]
distribution = log_normal
min = 0.00001
mean = 0.01
max = 0.1
mean = 0.00986346440817611
max = 0.05
scale = 0.5

[sweep.train.ent_coef]
distribution = log_normal
min = 0.00001
mean = 0.01
mean = 0.01510492391719301
max = 0.2
scale = auto

[sweep.train.gamma]
distribution = logit_normal
min = 0.8
mean = 0.98
mean = 0.9785788872888401
max = 0.9999
scale = auto

[sweep.train.gae_lambda]
distribution = logit_normal
min = 0.6
mean = 0.95
mean = 0.9909980359361995
max = 0.995
scale = auto

[sweep.train.vtrace_rho_clip]
distribution = uniform
min = 0.0
max = 5.0
mean = 1.0
mean = 4.561253323222173
scale = auto

[sweep.train.vtrace_c_clip]
distribution = uniform
min = 0.0
max = 5.0
mean = 1.0
mean = 1.0511015002528337
scale = auto

#[sweep.train.update_epochs]
Expand All @@ -148,7 +148,7 @@ scale = auto
distribution = uniform
min = 0.01
max = 1.0
mean = 0.2
mean = 0.38615719521473363
scale = auto

# Optimal vf clip can be lower than 0.1,
Expand All @@ -157,54 +157,54 @@ scale = auto
distribution = uniform
min = 0.1
max = 5.0
mean = 0.2
mean = 0.101
scale = auto

[sweep.train.vf_coef]
distribution = uniform
min = 0.0
max = 5.0
mean = 2.0
mean = 4.999
scale = auto

[sweep.train.max_grad_norm]
distribution = uniform
min = 0.0
mean = 1.0
mean = 2.3757898890403624
max = 5.0
scale = auto

[sweep.train.adam_beta1]
distribution = logit_normal
min = 0.5
mean = 0.9
mean = 0.9227290229121611
max = 0.999
scale = auto

[sweep.train.adam_beta2]
distribution = logit_normal
min = 0.9
mean = 0.999
mean = 0.9615271430090461
max = 0.99999
scale = auto

[sweep.train.adam_eps]
distribution = log_normal
min = 1e-14
mean = 1e-8
mean = 5.941675291037148e-05
max = 1e-4
scale = auto

[sweep.train.prio_alpha]
distribution = logit_normal
min = 0.1
mean = 0.85
max = 0.99
mean = 0.9698884328392982
max = 0.999
scale = auto

[sweep.train.prio_beta0]
distribution = logit_normal
min = 0.1
mean = 0.85
max = 0.99
mean = 0.9876650505878536
max = 0.999
scale = auto
196 changes: 196 additions & 0 deletions pufferlib/config/ocean/drone_delivery.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
[base]
package = ocean
env_name = puffer_drone_delivery
policy_name = Policy
rnn_name = Recurrent

[policy]
hidden_size = 256

[rnn]
input_size = 256
hidden_size = 256

[vec]
num_envs = 16

[env]
num_envs = 16
num_drones = 64

ablation = 0.0
anneal_min = 0.9236471304397496

grip_k_max = 5.123485320457474

perfect_anneal = 275_000_000
perfect_deadline = 6.077482208019486e+07

pos_const = 0.8113024567778199
pos_penalty = 0.02588602599777176

reward_grip = 0.9760442313179252
reward_ho_drop = 0.5
reward_hover = 0.0

reward_max_dist = 65.0
reward_min_dist = 2.531838566923265

vel_penalty_clamp = 0.056085060041244056

w_approach = 1.5767291682972426
w_position = 1.0188128215656589
w_stability = 0.012739532721609687
w_velocity = 0.11573303880718092

[train]
adam_beta1 = 0.9227290229121611
adam_beta2 = 0.9615271430090461
adam_eps = 5.941675291037148e-05
anneal_lr = true
batch_size = auto
bptt_horizon = 64
checkpoint_interval = 200
clip_coef = 0.38615719521473363
ent_coef = 0.01510492391719301
gae_lambda = 0.9909980359361995
gamma = 0.9785788872888401
learning_rate = 0.00986346440817611
#learning_rate = 0.005
max_grad_norm = 2.3757898890403624
max_minibatch_size = 65536
minibatch_size = 32768 #32768
prio_alpha = 0.9698884328392982
prio_beta0 = 0.9876650505878536
total_timesteps = 300_000_000
update_epochs = 1
#use_rnn = false
vf_clip_coef = 0.101
vf_coef = 4.999
vtrace_c_clip = 1.0511015002528337
vtrace_rho_clip = 4.561253323222173

[sweep]
method = Protein
metric = perfect_deliv
goal = maximize
downsample = 0

[sweep.env.ablation]
distribution = uniform
min = 0.0
max = 1.0
mean = 0.0
scale = auto

[sweep.env.anneal_min]
distribution = uniform
min = 0.0
max = 1.0
mean = 0.9236471304397496
scale = auto

[sweep.env.grip_k_max]
distribution = uniform
min = 5.0
max = 20.0
mean = 5.123485320457474
scale = auto

[sweep.env.perfect_anneal]
distribution = uniform
min = 200_000_000
max = 275_000_000
mean = 275_000_000
scale = auto

#[sweep.env.perfect_anneal_start]
#distribution = uniform
#min = 150_000_000
#max = 200_000_000
#mean = 175_000_000
#scale = auto

[sweep.env.perfect_deadline]
distribution = uniform
min = 50_000_000
max = 150_000_000
mean = 6.077482208019486e+07
scale = auto

[sweep.env.pos_const]
distribution = uniform
min = 0.001
max = 1.0
mean = 0.8113024567778199
scale = auto

[sweep.env.pos_penalty]
distribution = uniform
min = 0.0
max = 0.25
mean = 0.02588602599777176
scale = auto

[sweep.env.reward_grip]
distribution = uniform
min = 0.0
max = 1.0
mean = 0.9760442313179252
scale = auto

[sweep.env.reward_ho_drop]
distribution = uniform
min = 0.1
max = 0.5
mean = 0.5
scale = auto

[sweep.env.reward_hover]
distribution = uniform
min = 0.0
max = 0.5
mean = 0.01
scale = auto

[sweep.env.reward_min_dist]
distribution = uniform
min = 0.1
max = 5.0
mean = 2.531838566923265
scale = auto

[sweep.env.vel_penalty_clamp]
distribution = uniform
min = 0.0
max = 0.5
mean = 0.056085060041244056
scale = auto

[sweep.env.w_approach]
distribution = uniform
min = 0.0
max = 2.5
mean = 1.5767291682972426
scale = auto

[sweep.env.w_position]
distribution = uniform
min = 0.0
max = 1.5
mean = 1.0188128215656589
scale = auto

[sweep.env.w_stability]
distribution = uniform
min = 0.0
max = 2.5
mean = 0.012739532721609687
scale = auto

[sweep.env.w_velocity]
distribution = uniform
min = 0.0
max = 1.5
mean = 0.11573303880718092
scale = auto
Loading