Skip to content

Commit 69a7968

Browse files
committed
sweeped parameters that solve 2048 in 600m steps. (roughly 10min of training)
1 parent 9307ce5 commit 69a7968

File tree

3 files changed

+244
-148
lines changed

3 files changed

+244
-148
lines changed

pufferlib/config/ocean/g2048.ini

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,38 @@
22
package = ocean
33
env_name = puffer_g2048
44
policy_name = G2048
5+
rnn_name = Recurrent
6+
7+
[policy]
8+
hidden_size = 256
9+
10+
[rnn]
11+
input_size = 256
12+
hidden_size = 256
13+
14+
[vec]
15+
num_envs = 4
516

617
[env]
718
num_envs = 4024
819

920
[train]
10-
total_timesteps = 10_000_000_000 #4.6B
11-
adam_beta1 = 0.8081024539479613
12-
adam_beta2 = 0.9978536811174212
13-
adam_eps = 1.4542006937471102e-9
21+
total_timesteps = 600_000_000
22+
adam_beta1 = 0.9529488439604378
23+
adam_beta2 = 0.9993901829477296
24+
adam_eps = 2.745365927413118e-7
1425
bptt_horizon = 64
15-
clip_coef = 0.095627870395359
16-
ent_coef = 0.08439222625935798
17-
gae_lambda = 0.9155041484587568
18-
gamma = 0.9661669903070148
19-
learning_rate = 0.0014756768275156805
20-
max_grad_norm = 0.8813109722891985
21-
minibatch_size = 32768
22-
prio_alpha = 0.565686548517019
23-
prio_beta0 = 0.811234742153397
26+
clip_coef = 0.596573170393339
27+
ent_coef = 0.02107417730003862
28+
gae_lambda = 0.9940613415815854
29+
gamma = 0.9889857974154952
30+
learning_rate = 0.0032402460796988127
31+
max_grad_norm = 1.0752406726589745
32+
minibatch_size = 16384
33+
prio_alpha = 0.25297099593586336
34+
prio_beta0 = 0.940606268942572
2435
vf_clip_coef = 0.1
25-
vf_coef = 2.2900867366155664
36+
vf_coef = 1.6362878279900643
2637
vtrace_c_clip = 0
27-
vtrace_rho_clip = 0.8647738667214979
38+
vtrace_rho_clip = 1.2917509971869054
39+
anneal_lr = False

0 commit comments

Comments
 (0)