Skip to content

Commit da6e2f9

Browse files
authored
Merge branch 'PufferAI:3.0' into 3.0
2 parents b071575 + 38c42c9 commit da6e2f9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+5982
-1351
lines changed

.github/workflows/install.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,11 @@ jobs:
4343
- name: Upgrade pip
4444
run: python -m pip install -U pip
4545

46+
- name: Install build dependencies
47+
run: pip install --upgrade "setuptools>=69.0.0" "packaging>=24.2" "numpy<2.0" wheel
48+
49+
- name: Install PyTorch CPU
50+
run: pip install torch --index-url https://download.pytorch.org/whl/cpu
51+
4652
- name: Install pufferlib
47-
run: pip install -e .
53+
run: pip install -e . --no-build-isolation

pufferlib/config/nethack.ini

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
11
[base]
22
package = nethack
33
env_name = nethack
4+
policy_name = Policy
5+
rnn_name = Recurrent
46

57
[vec]
6-
num_envs = 128
8+
num_envs = 8192
79
num_workers = 16
8-
batch_size = 64
10+
batch_size = 4096
911

1012
[train]
11-
batch_size = 8192
12-
minibatch_size = 2048
13-
update_epochs = 1
13+
total_timesteps = 90_000_000
14+
adam_beta1 = 0.8946507418260217
15+
adam_beta2 = 0.9
16+
adam_eps = 0.0001
17+
batch_size = auto
1418
bptt_horizon = 64
15-
total_timesteps = 10_000_000
16-
anneal_lr = False
19+
clip_coef = 0.19696765958267629
20+
ent_coef = 0.0005690816545012474
21+
gae_lambda = 0.747650023961198
22+
gamma = 0.9997053654668936
23+
learning_rate = 0.044482546441415506
24+
max_grad_norm = 2.2356112188495723
25+
minibatch_size = 32768
26+
prio_alpha = 0.98967001208896
27+
prio_beta0 = 0.09999999999999998
28+
vf_clip_coef = 2.178492167689251
29+
vf_coef = 1.6832989594296321
30+
vtrace_c_clip = 2.878171091654008
31+
vtrace_rho_clip = 0.7876748061547312

pufferlib/config/ocean/breakout.ini

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,21 @@ num_envs = 8
1010
[env]
1111
num_envs = 1024
1212
frameskip = 4
13-
13+
width = 576
14+
height = 330
15+
paddle_width = 62
16+
paddle_height = 8
17+
ball_width = 32
18+
ball_height = 32
19+
brick_width = 32
20+
brick_height = 12
21+
brick_rows = 6
22+
brick_cols = 18
23+
initial_ball_speed = 256
24+
max_ball_speed = 448
25+
paddle_speed = 620
26+
continuous = 0
27+
1428
[policy]
1529
hidden_size = 128
1630

pufferlib/config/ocean/cartpole.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ rnn_name = Recurrent
66

77
[env]
88
num_envs = 4096
9+
cart_mass = 1.0
10+
pole_mass = 0.1
11+
pole_length = 0.5
12+
gravity = 9.8
13+
force_mag = 10.0
14+
dt = 0.02
915

1016
[train]
1117
total_timesteps = 20_000_000
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_chain_mdp
4+
policy_name = Policy
5+
; rnn_name = Recurrent
6+
7+
[vec]
8+
num_envs = 8
9+
10+
[env]
11+
num_envs = 512
12+
size = 128
13+
14+
[policy]
15+
hidden_size = 128
16+
17+
; [rnn]
18+
; input_size = 128
19+
; hidden_size = 128
20+
21+
[train]
22+
total_timesteps = 5_000_000
23+
bptt_horizon = 64
24+
entropy_coef = 0.1
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[base]
22
package = ocean
3-
env_name = puffer_gpudrive
4-
policy_name = GPUDrive
3+
env_name = puffer_drive
4+
policy_name = Drive
55
rnn_name = Recurrent
66

77
[vec]
@@ -12,11 +12,11 @@ batch_size = 2
1212

1313
[policy]
1414
input_size = 64
15-
hidden_size = 512
15+
hidden_size = 256
1616

1717
[rnn]
18-
input_size = 512
19-
hidden_size = 512
18+
input_size = 256
19+
hidden_size = 256
2020

2121
[env]
2222
num_agents = 1024
@@ -25,11 +25,11 @@ reward_offroad_collision = -0.2
2525
spawn_immunity_timer = 50
2626
reward_goal_post_respawn = 0.25
2727
reward_vehicle_collision_post_respawn = -0.5
28-
resample_frequency = 91
29-
num_maps = 1000
28+
resample_frequency = 910
29+
num_maps = 80000
3030

3131
[train]
32-
total_timesteps = 1_000_000_000
32+
total_timesteps = 2_000_000_000
3333
#learning_rate = 0.02
3434
#gamma = 0.985
3535
anneal_lr = True
@@ -44,7 +44,7 @@ clip_coef = 0.2
4444
ent_coef = 0.001
4545
gae_lambda = 0.95
4646
gamma = 0.98
47-
learning_rate = 0.01
47+
learning_rate = 0.001
4848
max_grad_norm = 1
4949
prio_alpha = 0.8499999999999999
5050
prio_beta0 = 0.8499999999999999

pufferlib/config/ocean/impulse_wars.ini

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ continuous = False
3030
is_training = True
3131

3232
[train]
33-
total_timesteps = 100_000_000
33+
total_timesteps = 1_000_000_000
3434
checkpoint_interval = 250
3535

3636
learning_rate = 0.005
@@ -47,6 +47,78 @@ max = 512
4747
mean = 128
4848
scale = auto
4949

50+
# reward parameters
51+
[sweep.env.reward_win]
52+
distribution = uniform
53+
min = 0.0
54+
mean = 2.0
55+
max = 5.0
56+
scale = auto
57+
58+
[sweep.env.reward_self_kill]
59+
distribution = uniform
60+
min = -3.0
61+
mean = -1.0
62+
max = 0.0
63+
scale = auto
64+
65+
[sweep.env.reward_enemy_death]
66+
distribution = uniform
67+
min = 0.0
68+
mean = 1.0
69+
max = 3.0
70+
scale = auto
71+
72+
[sweep.env.reward_kill]
73+
distribution = uniform
74+
min = 0.0
75+
mean = 1.0
76+
max = 3.0
77+
scale = auto
78+
79+
[sweep.env.reward_death]
80+
distribution = uniform
81+
min = -1.0
82+
mean = -0.25
83+
max = 0.0
84+
scale = auto
85+
86+
[sweep.env.reward_energy_emptied]
87+
distribution = uniform
88+
min = -2.0
89+
mean = -0.75
90+
max = 0.0
91+
scale = auto
92+
93+
[sweep.env.reward_weapon_pickup]
94+
distribution = uniform
95+
min = 0.0
96+
mean = 0.5
97+
max = 3.0
98+
scale = auto
99+
100+
[sweep.env.reward_shield_break]
101+
distribution = uniform
102+
min = 0.0
103+
mean = 0.5
104+
max = 3.0
105+
scale = auto
106+
107+
[sweep.env.reward_shot_hit_coef]
108+
distribution = log_normal
109+
min = 0.0005
110+
mean = 0.005
111+
max = 0.05
112+
scale = auto
113+
114+
[sweep.env.reward_explosion_hit_coef]
115+
distribution = log_normal
116+
min = 0.0005
117+
mean = 0.005
118+
max = 0.05
119+
scale = auto
120+
121+
# hyperparameters
50122
[sweep.train.total_timesteps]
51123
distribution = log_normal
52124
min = 250_000_000

pufferlib/config/ocean/memory.ini

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_memory
4+
policy_name = Policy
5+
rnn_name = Recurrent
6+
7+
[env]
8+
num_envs = 1024
9+
10+
[vec]
11+
num_envs = 8
12+
13+
[train]
14+
total_timesteps = 50_000_000
15+
minibatch_size = 32768
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_onestateworld
4+
policy_name = Policy
5+
rnn_name = None
6+
7+
[vec]
8+
num_envs = 8
9+
10+
[env]
11+
num_envs = 512
12+
mean_left = 0.1
13+
mean_right = 0.5
14+
var_right = 10
15+
16+
[policy]
17+
hidden_size = 128
18+
19+
[train]
20+
total_timesteps = 5_000_000
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[base]
22
package = ocean
3-
env_name = puffer_cpr
3+
env_name = puffer_shared_pool
44
rnn_name = Recurrent
55

66
[env]

0 commit comments

Comments
 (0)