22package = ocean
33env_name = puffer_g2048
44policy_name = G2048
5+ rnn_name = Recurrent
6+
7+ [policy]
8+ hidden_size = 256
9+
10+ [rnn]
11+ input_size = 256
12+ hidden_size = 256
13+
14+ [vec]
15+ num_envs = 4
516
617[env]
718num_envs = 4024
819
920[train]
10- total_timesteps = 10_000_000_000 # 4.6B
11- adam_beta1 = 0.8081024539479613
12- adam_beta2 = 0.9978536811174212
13- adam_eps = 1.4542006937471102e-9
21+ total_timesteps = 600_000_000
22+ adam_beta1 = 0.9529488439604378
23+ adam_beta2 = 0.9993901829477296
24+ adam_eps = 2.745365927413118e-7
1425bptt_horizon = 64
15- clip_coef = 0.095627870395359
16- ent_coef = 0.08439222625935798
17- gae_lambda = 0.9155041484587568
18- gamma = 0.9661669903070148
19- learning_rate = 0.0014756768275156805
20- max_grad_norm = 0.8813109722891985
21- minibatch_size = 32768
22- prio_alpha = 0.565686548517019
23- prio_beta0 = 0.811234742153397
26+ clip_coef = 0.596573170393339
27+ ent_coef = 0.02107417730003862
28+ gae_lambda = 0.9940613415815854
29+ gamma = 0.9889857974154952
30+ learning_rate = 0.0032402460796988127
31+ max_grad_norm = 1.0752406726589745
32+ minibatch_size = 16384
33+ prio_alpha = 0.25297099593586336
34+ prio_beta0 = 0.940606268942572
2435vf_clip_coef = 0.1
25- vf_coef = 2.2900867366155664
36+ vf_coef = 1.6362878279900643
2637vtrace_c_clip = 0
27- vtrace_rho_clip = 0.8647738667214979
38+ vtrace_rho_clip = 1.2917509971869054
39+ anneal_lr = False
0 commit comments