Skip to content

Commit f0f952a

Browse files
author
Joseph Suarez
committed
Mani tuned
1 parent da247f2 commit f0f952a

File tree

3 files changed

+98
-6
lines changed

3 files changed

+98
-6
lines changed

pufferlib/config/mani_skill.ini

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
[base]
33
package = mani_skill
4-
env_name = mani_pickcube
4+
env_name = mani_pickcube mani_pushcube mani_peginsertion
55
policy_name = Policy
66
rnn_name = Recurrent
77

@@ -13,14 +13,35 @@ backend = PufferEnv
1313
num_envs = 1
1414

1515
[train]
16-
total_timesteps = 10_000_000
17-
bptt_horizon = 32
18-
gamma = 0.8
19-
gae_lamda = 0.9
16+
total_timesteps = 100_000_000
17+
adam_beta1 = 0.9832254546070032
18+
adam_beta2 = 0.9996089758513379
19+
adam_eps = 0.0000024542110227211678
20+
bptt_horizon = 64
21+
clip_coef = 0.6609987983481933
22+
ent_coef = 0.001194131610607018
23+
gae_lambda = 0.968478898646462
24+
gamma = 0.8880001899050386
25+
learning_rate = 0.04729013902338006
26+
max_grad_norm = 1.9301595176438802
27+
minibatch_size = 32768
28+
prio_alpha = 0.9531362058849446
29+
prio_beta0 = 0.8285186322612919
30+
vf_clip_coef = 0.2581908677409054
31+
vf_coef = 2.6102252379894217
32+
vtrace_c_clip = 2.008516783867587
33+
vtrace_rho_clip = 0.7482202150166445
2034

2135
[sweep]
2236
method = Protein
2337
metric = success_once
2438
downsample = 0
2539

40+
[sweep.train.total_timesteps]
41+
distribution = log_normal
42+
min = 5e6
43+
max = 15e6
44+
mean = 10e6
45+
scale = time
46+
2647

pufferlib/environments/mani_skill/environment.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212

1313
ALIASES = {
1414
'mani_pickcube': 'PickCube-v1',
15+
'mani_pushcube': 'PushCube-v1',
16+
'mani_peginsertion': 'PegInsertionSide-v1',
1517
}
1618

1719
def env_creator(name='PickCube-v1'):
@@ -27,7 +29,7 @@ def make(name, num_envs=1, render_mode='rgb_array', buf=None, seed=0):
2729

2830
class ManiPufferEnv(pufferlib.PufferEnv):
2931
def __init__(self, name, num_envs=1, render_mode='rgb_array', log_interval=16, buf=None, seed=0):
30-
self.env = gym.make(name, num_envs=num_envs, render_mode=render_mode)
32+
self.env = gym.make(name, reward_mode='delta', num_envs=num_envs, render_mode=render_mode)
3133
self.env = ManiSkillVectorEnv(self.env, auto_reset=True, ignore_terminations=False, record_metrics=True)
3234
self.agents_per_batch = num_envs
3335

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,71 @@
1+
import numpy as np
2+
3+
import torch
4+
import torch.nn as nn
5+
6+
import pufferlib
17
from pufferlib.models import Default as Policy
28
from pufferlib.models import LSTMWrapper as Recurrent
9+
10+
class FakePolicy(nn.Module):
11+
'''Default PyTorch policy. Flattens obs and applies a linear layer.
12+
13+
PufferLib is not a framework. It does not enforce a base class.
14+
You can use any PyTorch policy that returns actions and values.
15+
We structure our forward methods as encode_observations and decode_actions
16+
to make it easier to wrap policies with LSTMs. You can do that and use
17+
our LSTM wrapper or implement your own. To port an existing policy
18+
for use with our LSTM wrapper, simply put everything from forward() before
19+
the recurrent cell into encode_observations and put everything after
20+
into decode_actions.
21+
'''
22+
def __init__(self, env, hidden_size=256):
23+
super().__init__()
24+
self.hidden_size = hidden_size
25+
26+
n_obs = np.prod(env.single_observation_space.shape)
27+
n_atn = env.single_action_space.shape[0]
28+
self.decoder_mean = nn.Sequential(
29+
pufferlib.pytorch.layer_init(nn.Linear(n_obs, 256)),
30+
nn.Tanh(),
31+
pufferlib.pytorch.layer_init(nn.Linear(256, 256)),
32+
nn.Tanh(),
33+
pufferlib.pytorch.layer_init(nn.Linear(256, 256)),
34+
nn.Tanh(),
35+
pufferlib.pytorch.layer_init(nn.Linear(256, n_atn), std=0.01),
36+
)
37+
self.decoder_logstd = nn.Parameter(torch.zeros(
38+
1, env.single_action_space.shape[0]))
39+
40+
self.value = nn.Sequential(
41+
pufferlib.pytorch.layer_init(nn.Linear(n_obs, 256)),
42+
nn.Tanh(),
43+
pufferlib.pytorch.layer_init(nn.Linear(256, 256)),
44+
nn.Tanh(),
45+
pufferlib.pytorch.layer_init(nn.Linear(256, 256)),
46+
nn.Tanh(),
47+
pufferlib.pytorch.layer_init(nn.Linear(256, 1), std=1),
48+
)
49+
50+
def forward_eval(self, observations, state=None):
51+
hidden = self.encode_observations(observations, state=state)
52+
logits, values = self.decode_actions(hidden)
53+
return logits, values
54+
55+
def forward(self, observations, state=None):
56+
return self.forward_eval(observations, state)
57+
58+
def encode_observations(self, observations, state=None):
59+
'''Encodes a batch of observations into hidden states. Assumes
60+
no time dimension (handled by LSTM wrappers).'''
61+
return observations
62+
63+
def decode_actions(self, hidden):
64+
'''Decodes a batch of hidden states into (multi)discrete actions.
65+
Assumes no time dimension (handled by LSTM wrappers).'''
66+
mean = self.decoder_mean(hidden)
67+
logstd = self.decoder_logstd.expand_as(mean)
68+
std = torch.exp(logstd)
69+
logits = torch.distributions.Normal(mean, std)
70+
values = self.value(hidden)
71+
return logits, values

0 commit comments

Comments
 (0)