Skip to content

Commit 2308bbd

Browse files
author
Joseph Suarez
committed
Maniskill sweep
1 parent f0f952a commit 2308bbd

File tree

4 files changed

+72
-22
lines changed

4 files changed

+72
-22
lines changed

pufferlib/config/mani_skill.ini

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11

22
[base]
33
package = mani_skill
4-
env_name = mani_pickcube mani_pushcube mani_peginsertion
4+
env_name = mani_pickcube mani_pushcube mani_stackcube mani_peginsertion
55
policy_name = Policy
66
rnn_name = Recurrent
77

88
[env]
99
num_envs = 4096
10+
sim_steps_per_control = 5
11+
control_freq = 100
12+
solver_position_iterations = 15
1013

1114
[vec]
1215
backend = PufferEnv
1316
num_envs = 1
1417

1518
[train]
16-
total_timesteps = 100_000_000
19+
total_timesteps = 15_000_000
1720
adam_beta1 = 0.9832254546070032
1821
adam_beta2 = 0.9996089758513379
1922
adam_eps = 0.0000024542110227211678
@@ -39,9 +42,28 @@ downsample = 0
3942

4043
[sweep.train.total_timesteps]
4144
distribution = log_normal
42-
min = 5e6
43-
max = 15e6
44-
mean = 10e6
45+
min = 2e7
46+
max = 5e7
47+
mean = 4e7
4548
scale = time
4649

50+
[sweep.env.sim_steps_per_control]
51+
distribution = int_uniform
52+
min = 1
53+
max = 10
54+
mean = 5
55+
scale = auto
4756

57+
[sweep.env.control_freq]
58+
distribution = int_uniform
59+
min = 10
60+
max = 100
61+
mean = 20
62+
scale = auto
63+
64+
[sweep.env.solver_position_iterations]
65+
distribution = int_uniform
66+
min = 4
67+
max = 30
68+
mean = 15
69+
scale = auto

pufferlib/environments/mani_skill/environment.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,35 @@
1313
ALIASES = {
1414
'mani_pickcube': 'PickCube-v1',
1515
'mani_pushcube': 'PushCube-v1',
16+
'mani_stackcube': 'StackCube-v1',
1617
'mani_peginsertion': 'PegInsertionSide-v1',
1718
}
1819

19-
def env_creator(name='PickCube-v1'):
20+
def env_creator(name='PickCube-v1', **kwargs):
2021
return functools.partial(make, name)
2122

22-
def make(name, num_envs=1, render_mode='rgb_array', buf=None, seed=0):
23+
def make(name, num_envs=1, render_mode='rgb_array', buf=None, seed=0, **kwargs):
2324
'''Create an environment by name'''
2425

2526
if name in ALIASES:
2627
name = ALIASES[name]
2728

28-
return ManiPufferEnv(name, num_envs=num_envs, render_mode=render_mode, buf=buf, seed=seed)
29+
return ManiPufferEnv(name, num_envs=num_envs, render_mode=render_mode, buf=buf, seed=seed, **kwargs)
2930

3031
class ManiPufferEnv(pufferlib.PufferEnv):
31-
def __init__(self, name, num_envs=1, render_mode='rgb_array', log_interval=16, buf=None, seed=0):
32-
self.env = gym.make(name, reward_mode='delta', num_envs=num_envs, render_mode=render_mode)
32+
def __init__(self, name, num_envs=1, solver_position_iterations=15,
33+
sim_steps_per_control=5, control_freq=20, render_mode='rgb_array',
34+
log_interval=16, buf=None, seed=0):
35+
sim_freq = int(sim_steps_per_control * control_freq)
36+
sim_config = {
37+
'scene_config': {
38+
'solver_position_iterations': solver_position_iterations
39+
},
40+
'sim_freq': sim_freq,
41+
'control_freq': control_freq
42+
}
43+
self.env = gym.make(name, reward_mode='delta', num_envs=num_envs,
44+
render_mode=render_mode, sim_config=sim_config)
3345
self.env = ManiSkillVectorEnv(self.env, auto_reset=True, ignore_terminations=False, record_metrics=True)
3446
self.agents_per_batch = num_envs
3547

@@ -68,13 +80,20 @@ def _flatten_info(self, info):
6880

6981
def reset(self, seed=0):
7082
obs, info = self.env.reset()
71-
self.observations = obs
83+
#self.observations = torch.nan_to_num(obs)
84+
self.observations = torch.clamp(torch.nan_to_num(obs), -5, 5)
85+
self.observations = obs / 20.0
7286
self._flatten_info(info)
7387
return obs, []
7488

7589
def step(self, actions):
7690
obs, reward, terminated, truncated, info = self.env.step(actions)
77-
self.observations = obs
91+
collapsed = torch.where(torch.isnan(obs).sum(1) > 0)[0]
92+
if len(collapsed) > 0:
93+
obs, _ = self.env.reset(options={'env_idx': collapsed})
94+
95+
self.observations = torch.clamp(torch.nan_to_num(obs), -5, 5)
96+
#self.observations = obs / 20.0 #torch.nan_to_num(obs)
7897
self.rewards = reward
7998
self.terminated = terminated
8099
self.truncated = truncated

pufferlib/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ def forward(self, observations, state):
182182
hidden = hidden.transpose(0, 1)
183183
#hidden = self.pre_layernorm(hidden)
184184
hidden, (lstm_h, lstm_c) = self.lstm.forward(hidden, lstm_state)
185+
hidden = hidden.float()
186+
185187
#hidden = self.post_layernorm(hidden)
186188
hidden = hidden.transpose(0, 1)
187189

pufferlib/pufferl.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -863,8 +863,6 @@ def download(self):
863863

864864
def train(env_name, args=None, vecenv=None, policy=None, logger=None):
865865
args = args or load_config(env_name)
866-
vecenv = vecenv or load_env(env_name, args)
867-
policy = policy or load_policy(args, vecenv)
868866

869867
# Assume TorchRun DDP is used if LOCAL_RANK is set
870868
if 'LOCAL_RANK' in os.environ:
@@ -875,6 +873,12 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
875873
local_rank = int(os.environ["LOCAL_RANK"])
876874
print(f"rank: {local_rank}, MASTER_ADDR={master_addr}, MASTER_PORT={master_port}")
877875
torch.cuda.set_device(local_rank)
876+
os.environ["CUDA_VISIBLE_DEVICES"] = str(local_rank)
877+
878+
vecenv = vecenv or load_env(env_name, args)
879+
policy = policy or load_policy(args, vecenv)
880+
881+
if 'LOCAL_RANK' in os.environ:
878882
args['train']['device'] = torch.cuda.current_device()
879883
torch.distributed.init_process_group(backend='nccl', world_size=world_size)
880884
policy = policy.to(local_rank)
@@ -925,10 +929,12 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
925929

926930
def eval(env_name, args=None, vecenv=None, policy=None):
927931
args = args or load_config(env_name)
928-
args['vec'] = dict(backend='Serial', num_envs=1)
932+
backend = args['vec']['backend']
933+
if backend != 'PufferEnv':
934+
backend = 'Serial'
935+
936+
args['vec'] = dict(backend=backend, num_envs=1)
929937
vecenv = vecenv or load_env(env_name, args)
930-
if not isinstance(vecenv, pufferlib.vector.Serial):
931-
raise pufferlib.APIUsageError('eval requires Serial vector env')
932938

933939
policy = policy or load_policy(args, vecenv)
934940
ob, info = vecenv.reset()
@@ -954,11 +960,12 @@ def eval(env_name, args=None, vecenv=None, policy=None):
954960
print('\033[0;0H' + render + '\n')
955961
time.sleep(1/args['fps'])
956962
elif driver.render_mode == 'rgb_array':
957-
import cv2
958-
render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR)
959-
cv2.imshow('frame', render)
960-
cv2.waitKey(1)
961-
time.sleep(1/args['fps'])
963+
pass
964+
#import cv2
965+
#render = cv2.cvtColor(render, cv2.COLOR_RGB2BGR)
966+
#cv2.imshow('frame', render)
967+
#cv2.waitKey(1)
968+
#time.sleep(1/args['fps'])
962969

963970
with torch.no_grad():
964971
ob = torch.as_tensor(ob).to(device)

0 commit comments

Comments
 (0)