Skip to content

Commit e87bff5

Browse files
author
Joseph Suarez
committed
Merge branch '3.0' of https://github.com/pufferai/pufferlib into 3.0
2 parents 2308bbd + 57ade97 commit e87bff5

File tree

7 files changed

+313
-176
lines changed

7 files changed

+313
-176
lines changed

pufferlib/config/ocean/asteroids.ini

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,26 @@ num_envs = 1024
1212
size = 500
1313

1414
[train]
15-
total_timesteps = 150_000_000
16-
minibatch_size = 32768
15+
adam_beta1 = 0.975493290069733
16+
adam_beta2 = 0.9999436458974764
17+
adam_eps = 6.915036275112011e-08
18+
anneal_lr = true
19+
batch_size = auto
20+
bptt_horizon = 64
21+
checkpoint_interval = 200
22+
clip_coef = 0.18588778503512546
23+
ent_coef = 0.0016620361911332262
24+
gae_lambda = 0.8400278040617952
25+
gamma = 0.9998708818940873
26+
learning_rate = 0.00502237062536979
27+
max_grad_norm = 0.7306435358436453
28+
max_minibatch_size = 32768
29+
minibatch_size = 8192
30+
prio_alpha = 0.9165093859993415
31+
prio_beta0 = 0.8869674411376214
32+
total_timesteps = 100_000_000
33+
update_epochs = 1
34+
vf_clip_coef = 0.1
35+
vf_coef = 2.960148388519086
36+
vtrace_c_clip = 1.0767718761515104
37+
vtrace_rho_clip = 4.132507367126342

pufferlib/config/ocean/battle.ini

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,39 @@ input_size = 512
1212
hidden_size = 512
1313

1414
[vec]
15-
num_envs = 8
15+
num_envs = 16
1616

1717
[env]
18-
num_envs = 8
18+
num_envs = 4
1919
num_agents = 128
2020
num_armies = 2
2121
size_x = 2
22-
size_y = 2
22+
size_y = 1.0
2323
size_z = 2
2424

2525
[train]
26-
total_timesteps = 100_000_000
26+
total_timesteps = 50_000_000
2727

28-
learning_rate = 0.0015534438005054883
29-
gamma = 0.9923382806478448
30-
minibatch_size = 32768
28+
#adam_beta1 = 0.9672322418397323
29+
#adam_beta2 = 0.9877607751795193
30+
#adam_eps = 3.1721115738865995e-12
31+
#clip_coef = 0.43568934504743784
32+
#ent_coef = 0.0009836417478975427
33+
#gae_lambda = 0.9668222538234107
34+
#gamma = 0.990709789440733
35+
#learning_rate = 0.006246420318636455
36+
#max_grad_norm = 1.7919049246329588
37+
#minibatch_size = 65536
38+
#prio_alpha = 0.09999999999999998
39+
#prio_beta0 = 0.7406397128300295
40+
#vf_clip_coef = 1.6190073090306314
41+
#vf_coef = 3.4918587292978454
42+
#vtrace_c_clip = 0.5344573247342275
43+
#vtrace_rho_clip = 1.2893540729776307
44+
45+
#learning_rate = 0.0015534438005054883
46+
#gamma = 0.9923382806478448
47+
#minibatch_size = 32768
3148

3249
#adam_beta1 = 0.5797997352318079
3350
#adam_beta2 = 0.9001752474216785

pufferlib/config/ocean/g2048.ini

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,36 @@ env_name = puffer_g2048
44
policy_name = G2048
55
rnn_name = Recurrent
66

7+
[policy]
8+
hidden_size = 256
9+
10+
[rnn]
11+
input_size = 256
12+
hidden_size = 256
13+
14+
[vec]
15+
num_envs = 4
16+
717
[env]
8-
num_envs = 4096
18+
num_envs = 4024
919

1020
[train]
11-
total_timesteps = 1_000_000_000
12-
minibatch_size = 32768
21+
total_timesteps = 600_000_000
22+
adam_beta1 = 0.9529488439604378
23+
adam_beta2 = 0.9993901829477296
24+
adam_eps = 2.745365927413118e-7
25+
bptt_horizon = 64
26+
clip_coef = 0.596573170393339
27+
ent_coef = 0.02107417730003862
28+
gae_lambda = 0.9940613415815854
29+
gamma = 0.9889857974154952
30+
learning_rate = 0.0032402460796988127
31+
max_grad_norm = 1.0752406726589745
32+
minibatch_size = 16384
33+
prio_alpha = 0.25297099593586336
34+
prio_beta0 = 0.940606268942572
35+
vf_clip_coef = 0.1
36+
vf_coef = 1.6362878279900643
37+
vtrace_c_clip = 0
38+
vtrace_rho_clip = 1.2917509971869054
39+
anneal_lr = False

pufferlib/ocean/battle/battle.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,12 @@ void scripted_move(Battle* env, Entity* agent, bool is_air) {
477477
float dx = target->x - agent->x;
478478
float dy = target->y - agent->y;
479479
float dz = target->z - agent->z;
480+
481+
// Add some noise
482+
dx += randf(-0.1f, 0.1f);
483+
dy += randf(-0.1f, 0.1f);
484+
dz += randf(-0.1f, 0.1f);
485+
480486
float dd = dx*dx + dz*dz;
481487
if (is_air) {
482488
dd += dy*dy;
@@ -669,11 +675,12 @@ void compute_observations(Battle* env) {
669675
o->dx = dx;
670676
o->dy = dy;
671677
o->dz = dz;
672-
o->distance = distance;
673678
if (other->army == agent->army) {
674679
o->same_team = 1.0f;
680+
o->distance = 99999.0f;
675681
} else {
676682
o->same_team = 0.0f;
683+
o->distance = distance;
677684
}
678685
o->idx = i;
679686
}
@@ -817,13 +824,14 @@ void c_step(Battle* env) {
817824
if (i < env->num_agents/2) {
818825
env->rewards[i] = reward;
819826
env->terminals[i] = 1;
827+
env->log.score = env->log.episode_return;
828+
env->log.episode_length += agent->episode_length;
829+
env->log.episode_return += agent->episode_return;
830+
env->log.collision_rate += collision;
831+
env->log.oob_rate += oob;
832+
env->log.n++;
833+
820834
}
821-
env->log.score = (1.0f - collision) * env->log.episode_return;
822-
env->log.episode_length += agent->episode_length;
823-
env->log.episode_return += agent->episode_return;
824-
env->log.collision_rate += collision;
825-
env->log.oob_rate += oob;
826-
env->log.n++;
827835
agent->episode_length = 0;
828836
agent->episode_return = 0;
829837
}
@@ -870,8 +878,8 @@ void c_step(Battle* env) {
870878
agent->target = j;
871879
if (i < env->num_agents/2) {
872880
env->rewards[i] += 0.25f;
881+
agent->episode_return += 0.25f;
873882
}
874-
agent->episode_return += 0.25f;
875883
target->health -= agent->attack_damage;
876884
break;
877885
}

pufferlib/ocean/battle/battle.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ def __init__(self, num_envs=1, width=1920, height=1080, size_x=1.0,
1212
num_armies=4, render_mode=None, log_interval=128, buf=None, seed=0):
1313
self.single_observation_space = gymnasium.spaces.Box(low=0, high=1,
1414
shape=(num_armies*3 + 4*16 + 22 + 8,), dtype=np.float32)
15-
#self.single_action_space = gymnasium.spaces.MultiDiscrete([9, 9, 9])
1615
self.single_action_space = gymnasium.spaces.Box(
1716
low=-1, high=1, shape=(3,), dtype=np.float32)
1817
self.render_mode = render_mode

0 commit comments

Comments
 (0)