Skip to content

Commit b071575

Browse files
authored
Merge branch 'PufferAI:3.0' into 3.0
2 parents d04f47a + 82f4e7a commit b071575

File tree

27 files changed

+3043
-901
lines changed

27 files changed

+3043
-901
lines changed

pufferlib/config/metta.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ policy_name = Policy
55
rnn_name = Recurrent
66

77
[vec]
8-
num_envs = 8
8+
num_envs = 64
9+
num_workers = 16
910

1011
[env]
1112
render_mode = auto
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[base]
22
package = ocean
3-
env_name = puffer_drone
3+
env_name = puffer_drone_race
44
policy_name = Policy
55
rnn_name = Recurrent
66

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_drone_swarm
4+
policy_name = Policy
5+
rnn_name = Recurrent
6+
7+
[policy]
8+
hidden_size = 128
9+
10+
[rnn]
11+
input_size = 128
12+
hidden_size = 128
13+
14+
[vec]
15+
num_envs = 8
16+
17+
[env]
18+
num_envs = 16
19+
num_drones = 64
20+
max_rings = 10
21+
22+
[train]
23+
adam_beta1 = 0.9610890980775877
24+
adam_beta2 = 0.9999260775286266
25+
adam_eps = 7.782906079040132e-10
26+
anneal_lr = true
27+
batch_size = auto
28+
bptt_horizon = 64
29+
checkpoint_interval = 200
30+
clip_coef = 0.05982655642208556
31+
ent_coef = 0.002465076521024325
32+
gae_lambda = 0.9641173414828333
33+
gamma = 0.997472126425902
34+
learning_rate = 0.010933756713881205
35+
#learning_rate = 0.005
36+
max_grad_norm = 1.6317688647793107
37+
max_minibatch_size = 32768
38+
minibatch_size = 32768
39+
prio_alpha = 0.8968873016577552
40+
prio_beta0 = 0.8672928227817938
41+
total_timesteps = 500_000_000
42+
update_epochs = 1
43+
#use_rnn = false
44+
vf_clip_coef = 0.5869845581530236
45+
vf_coef = 2.1319065538539963
46+
vtrace_c_clip = 2.714930379733876
47+
vtrace_rho_clip = 3.8183814893708057
48+
49+
[sweep]
50+
downsample = 0
51+
52+
[sweep.train.total_timesteps]
53+
distribution = log_normal
54+
min = 2e8
55+
max = 4e8
56+
mean = 2e8
57+
scale = time

pufferlib/config/ocean/matsci.ini

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[base]
2+
package = ocean
3+
env_name = puffer_matsci
4+
policy_name = Policy
5+
6+
[vec]
7+
num_envs = 8
8+
9+
[env]
10+
num_envs = 8
11+
num_atoms = 128
12+
13+
[train]
14+
total_timesteps = 50_000_000
15+
minibatch_size = 32768
16+
17+

pufferlib/environments/metta/environment.py

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,40 +9,24 @@
99
from metta.mettagrid.curriculum.core import SingleTaskCurriculum
1010
from metta.mettagrid.replay_writer import ReplayWriter
1111

12-
#from mettagrid.mettagrid_env import MettaGridEnv
13-
#from mettagrid.curriculum import SingleTaskCurriculum
14-
1512
def env_creator(name='metta'):
1613
return functools.partial(make, name)
1714

1815
def make(name, config='pufferlib/environments/metta/metta.yaml', render_mode='auto', buf=None, seed=0,
19-
ore_reward=0.25, heart_reward=0.5, battery_reward=0.25):
20-
'''Crafter creation function'''
21-
#return MettaPuff(config, render_mode, buf)
22-
#import mettagrid.mettagrid_env
23-
#from omegaconf import OmegaConf
16+
ore_reward=0.17088483842567775, battery_reward=0.9882859711234822, heart_reward=1.0):
17+
'''Metta creation function'''
18+
2419
OmegaConf.register_new_resolver("div", oc_divide, replace=True)
2520
cfg = OmegaConf.load(config)
26-
reward_cfg = cfg['game']['agent']['rewards']
27-
'''
28-
env_overrides = {
29-
'game': {
30-
'agent': {
31-
'rewards': {
32-
'ore.red': 0.25,
33-
'ore.blue': 0.25,
34-
'ore.green': 0.25,
35-
'heart': 0.5,
36-
'battery': 0.25,
37-
}
38-
}
39-
}
40-
'''
41-
reward_cfg['ore.red'] = float(ore_reward)
42-
reward_cfg['heart'] = float(heart_reward)
43-
reward_cfg['battery.red'] = float(battery_reward)
44-
cfg = SingleTaskCurriculum('puffer', cfg)
45-
return MettaPuff(cfg, render_mode=render_mode, buf=buf)
21+
22+
# Update rewards under the new structure: agent.rewards.inventory
23+
inventory_rewards = cfg['game']['agent']['rewards']['inventory']
24+
inventory_rewards['ore_red'] = float(ore_reward)
25+
inventory_rewards['heart'] = float(heart_reward)
26+
inventory_rewards['battery_red'] = float(battery_reward)
27+
28+
curriculum = SingleTaskCurriculum('puffer', cfg)
29+
return MettaPuff(curriculum, render_mode=render_mode, buf=buf, seed=seed)
4630

4731
def oc_divide(a, b):
4832
"""
@@ -56,12 +40,17 @@ def oc_divide(a, b):
5640
return result
5741

5842
class MettaPuff(MettaGridEnv):
59-
def __init__(self, config, render_mode='human', buf=None, seed=0):
43+
def __init__(self, curriculum, render_mode='human', buf=None, seed=0):
6044
self.replay_writer = None
6145
#if render_mode == 'auto':
6246
# self.replay_writer = ReplayWriter("metta/")
6347

64-
super().__init__(config, render_mode=render_mode, buf=buf, replay_writer=self.replay_writer)
48+
super().__init__(
49+
curriculum=curriculum,
50+
render_mode=render_mode,
51+
buf=buf,
52+
replay_writer=self.replay_writer
53+
)
6554
self.action_space = pufferlib.spaces.joint_space(self.single_action_space, self.num_agents)
6655
self.actions = self.actions.astype(np.int32)
6756

0 commit comments

Comments
 (0)