File tree Expand file tree Collapse file tree 4 files changed +273
-160
lines changed
Expand file tree Collapse file tree 4 files changed +273
-160
lines changed Original file line number Diff line number Diff line change @@ -12,5 +12,26 @@ num_envs = 1024
1212size = 500
1313
1414[train]
15- total_timesteps = 150_000_000
16- minibatch_size = 32768
15+ adam_beta1 = 0.975493290069733
16+ adam_beta2 = 0.9999436458974764
17+ adam_eps = 6.915036275112011e-08
18+ anneal_lr = true
19+ batch_size = auto
20+ bptt_horizon = 64
21+ checkpoint_interval = 200
22+ clip_coef = 0.18588778503512546
23+ ent_coef = 0.0016620361911332262
24+ gae_lambda = 0.8400278040617952
25+ gamma = 0.9998708818940873
26+ learning_rate = 0.00502237062536979
27+ max_grad_norm = 0.7306435358436453
28+ max_minibatch_size = 32768
29+ minibatch_size = 8192
30+ prio_alpha = 0.9165093859993415
31+ prio_beta0 = 0.8869674411376214
32+ total_timesteps = 100_000_000
33+ update_epochs = 1
34+ vf_clip_coef = 0.1
35+ vf_coef = 2.960148388519086
36+ vtrace_c_clip = 1.0767718761515104
37+ vtrace_rho_clip = 4.132507367126342
Original file line number Diff line number Diff line change @@ -4,9 +4,36 @@ env_name = puffer_g2048
44policy_name = G2048
55rnn_name = Recurrent
66
7+ [policy]
8+ hidden_size = 256
9+
10+ [rnn]
11+ input_size = 256
12+ hidden_size = 256
13+
14+ [vec]
15+ num_envs = 4
16+
717[env]
8- num_envs = 4096
18+ num_envs = 4024
919
1020[train]
11- total_timesteps = 1_000_000_000
12- minibatch_size = 32768
21+ total_timesteps = 600_000_000
22+ adam_beta1 = 0.9529488439604378
23+ adam_beta2 = 0.9993901829477296
24+ adam_eps = 2.745365927413118e-7
25+ bptt_horizon = 64
26+ clip_coef = 0.596573170393339
27+ ent_coef = 0.02107417730003862
28+ gae_lambda = 0.9940613415815854
29+ gamma = 0.9889857974154952
30+ learning_rate = 0.0032402460796988127
31+ max_grad_norm = 1.0752406726589745
32+ minibatch_size = 16384
33+ prio_alpha = 0.25297099593586336
34+ prio_beta0 = 0.940606268942572
35+ vf_clip_coef = 0.1
36+ vf_coef = 1.6362878279900643
37+ vtrace_c_clip = 0
38+ vtrace_rho_clip = 1.2917509971869054
39+ anneal_lr = False
You can’t perform that action at this time.
0 commit comments