Skip to content

Commit bfd6245

Browse files
author
Joseph Suarez
committed
Battle env fixes
1 parent f0f952a commit bfd6245

File tree

3 files changed

+40
-16
lines changed

3 files changed

+40
-16
lines changed

pufferlib/config/ocean/battle.ini

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,39 @@ input_size = 512
1212
hidden_size = 512
1313

1414
[vec]
15-
num_envs = 8
15+
num_envs = 16
1616

1717
[env]
18-
num_envs = 8
18+
num_envs = 4
1919
num_agents = 128
2020
num_armies = 2
2121
size_x = 2
22-
size_y = 2
22+
size_y = 1.0
2323
size_z = 2
2424

2525
[train]
26-
total_timesteps = 100_000_000
26+
total_timesteps = 50_000_000
2727

28-
learning_rate = 0.0015534438005054883
29-
gamma = 0.9923382806478448
30-
minibatch_size = 32768
28+
#adam_beta1 = 0.9672322418397323
29+
#adam_beta2 = 0.9877607751795193
30+
#adam_eps = 3.1721115738865995e-12
31+
#clip_coef = 0.43568934504743784
32+
#ent_coef = 0.0009836417478975427
33+
#gae_lambda = 0.9668222538234107
34+
#gamma = 0.990709789440733
35+
#learning_rate = 0.006246420318636455
36+
#max_grad_norm = 1.7919049246329588
37+
#minibatch_size = 65536
38+
#prio_alpha = 0.09999999999999998
39+
#prio_beta0 = 0.7406397128300295
40+
#vf_clip_coef = 1.6190073090306314
41+
#vf_coef = 3.4918587292978454
42+
#vtrace_c_clip = 0.5344573247342275
43+
#vtrace_rho_clip = 1.2893540729776307
44+
45+
#learning_rate = 0.0015534438005054883
46+
#gamma = 0.9923382806478448
47+
#minibatch_size = 32768
3148

3249
#adam_beta1 = 0.5797997352318079
3350
#adam_beta2 = 0.9001752474216785

pufferlib/ocean/battle/battle.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,12 @@ void scripted_move(Battle* env, Entity* agent, bool is_air) {
477477
float dx = target->x - agent->x;
478478
float dy = target->y - agent->y;
479479
float dz = target->z - agent->z;
480+
481+
// Add some noise
482+
dx += randf(-0.1f, 0.1f);
483+
dy += randf(-0.1f, 0.1f);
484+
dz += randf(-0.1f, 0.1f);
485+
480486
float dd = dx*dx + dz*dz;
481487
if (is_air) {
482488
dd += dy*dy;
@@ -669,11 +675,12 @@ void compute_observations(Battle* env) {
669675
o->dx = dx;
670676
o->dy = dy;
671677
o->dz = dz;
672-
o->distance = distance;
673678
if (other->army == agent->army) {
674679
o->same_team = 1.0f;
680+
o->distance = 99999.0f;
675681
} else {
676682
o->same_team = 0.0f;
683+
o->distance = distance;
677684
}
678685
o->idx = i;
679686
}
@@ -817,13 +824,14 @@ void c_step(Battle* env) {
817824
if (i < env->num_agents/2) {
818825
env->rewards[i] = reward;
819826
env->terminals[i] = 1;
827+
env->log.score = env->log.episode_return;
828+
env->log.episode_length += agent->episode_length;
829+
env->log.episode_return += agent->episode_return;
830+
env->log.collision_rate += collision;
831+
env->log.oob_rate += oob;
832+
env->log.n++;
833+
820834
}
821-
env->log.score = (1.0f - collision) * env->log.episode_return;
822-
env->log.episode_length += agent->episode_length;
823-
env->log.episode_return += agent->episode_return;
824-
env->log.collision_rate += collision;
825-
env->log.oob_rate += oob;
826-
env->log.n++;
827835
agent->episode_length = 0;
828836
agent->episode_return = 0;
829837
}
@@ -870,8 +878,8 @@ void c_step(Battle* env) {
870878
agent->target = j;
871879
if (i < env->num_agents/2) {
872880
env->rewards[i] += 0.25f;
881+
agent->episode_return += 0.25f;
873882
}
874-
agent->episode_return += 0.25f;
875883
target->health -= agent->attack_damage;
876884
break;
877885
}

pufferlib/ocean/battle/battle.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ def __init__(self, num_envs=1, width=1920, height=1080, size_x=1.0,
1212
num_armies=4, render_mode=None, log_interval=128, buf=None, seed=0):
1313
self.single_observation_space = gymnasium.spaces.Box(low=0, high=1,
1414
shape=(num_armies*3 + 4*16 + 22 + 8,), dtype=np.float32)
15-
#self.single_action_space = gymnasium.spaces.MultiDiscrete([9, 9, 9])
1615
self.single_action_space = gymnasium.spaces.Box(
1716
low=-1, high=1, shape=(3,), dtype=np.float32)
1817
self.render_mode = render_mode

0 commit comments

Comments
 (0)