Skip to content

Commit d9a8a99

Browse files
authored
Add isaac sim env (#164)
* Rename OmniIsaacGym Env * Update environment and vectorized_env to ArrayBackend * Change to only one recording * Add IsaacSim Environment * Add modified version of ppo similiar to rudins * Add CartPole environment * Add example for CartPole environment * Add environment for A1 * Add example for a1 environment * Add environment for HoneyBadger * Add example for HoneyBadger environment * Add environment for SilverBadger * Add example for SilverBadger environment * Add tests for all isaacsim envs * Fix Bug where final last not True after flatten * Fix torch load for newer torch versions * Fix save attr * Remove change of final last flag in dataset * Fix function parameters
1 parent 6c10c85 commit d9a8a99

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+4143
-38
lines changed

examples/isaacsim/a1_rudin_ppo.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
4+
import torch.optim as optim
5+
6+
from tqdm import trange
7+
8+
from mushroom_rl.core import VectorCore, Logger
9+
from mushroom_rl.algorithms.actor_critic import RudinPPO
10+
11+
from mushroom_rl.policy import GaussianTorchPolicy
12+
from mushroom_rl.environments.isaacsim_envs import A1Walking
13+
from mushroom_rl.utils import TorchUtils
14+
15+
class Network(nn.Module):
16+
def __init__(self, input_shape, output_shape, n_features, **kwargs):
17+
super(Network, self).__init__()
18+
19+
n_input = input_shape[-1]
20+
n_output = output_shape[0]
21+
22+
self._h1 = nn.Linear(n_input, n_features[0])
23+
self._h2 = nn.Linear(n_features[0], n_features[1])
24+
self._h3 = nn.Linear(n_features[1], n_features[2])
25+
self._h4 = nn.Linear(n_features[2], n_output)
26+
27+
nn.init.xavier_uniform_(self._h1.weight,
28+
gain=nn.init.calculate_gain('relu'))
29+
nn.init.xavier_uniform_(self._h2.weight,
30+
gain=nn.init.calculate_gain('relu'))
31+
nn.init.xavier_uniform_(self._h3.weight,
32+
gain=nn.init.calculate_gain('relu'))
33+
nn.init.xavier_uniform_(self._h4.weight,
34+
gain=nn.init.calculate_gain('linear'))
35+
36+
def forward(self, state, **kwargs):
37+
features1 = F.relu(self._h1(torch.squeeze(state, 1).float()))
38+
features2 = F.relu(self._h2(features1))
39+
features3 = F.relu(self._h3(features2))
40+
a = self._h4(features3)
41+
42+
return a
43+
44+
def experiment(alg, n_epochs, n_steps, n_steps_per_fit, n_episodes_test,
45+
alg_params, policy_params):
46+
47+
logger = Logger(alg.__name__ + "_1_legged_gym", results_dir="./logs/", log_console=True, use_timestamp=True)
48+
logger.strong_line()
49+
logger.info('Experiment Algorithm: ' + alg.__name__)
50+
51+
mdp = A1Walking(4096, 1000, True, True)
52+
53+
critic_params = dict(network=Network,
54+
optimizer={'class': optim.Adam,
55+
'params': {'lr': 1e-3}},
56+
loss=F.mse_loss,
57+
n_features=[512, 256, 128],
58+
batch_size=int((4096*24) / 16),
59+
use_cuda=True,
60+
input_shape=mdp.info.observation_space.shape,
61+
output_shape=(1,))
62+
63+
policy = GaussianTorchPolicy(Network,
64+
mdp.info.observation_space.shape,
65+
mdp.info.action_space.shape,
66+
**policy_params)
67+
68+
alg_params['critic_params'] = critic_params
69+
70+
agent = alg(mdp.info, policy, **alg_params)
71+
72+
core = VectorCore(agent, mdp)
73+
74+
dataset = core.evaluate(n_episodes=n_episodes_test, render=True, record=True)
75+
76+
J = torch.mean(dataset.discounted_return).to("cpu").item()
77+
R = torch.mean(dataset.undiscounted_return.to("cpu")).item()
78+
E = agent.policy.entropy().to("cpu").item()
79+
V = torch.mean(agent._V(dataset.get_init_states())).detach().to("cpu").item()
80+
81+
logger.epoch_info(0, J=J, R=R, entropy=E, V=V)
82+
83+
for it in trange(n_epochs, leave=False):
84+
core.learn(n_steps=n_steps, n_steps_per_fit=n_steps_per_fit)
85+
if (it + 1) % 5 == 0 or it == n_epochs - 1:
86+
dataset = core.evaluate(n_episodes=n_episodes_test, render=True, record=True)
87+
else:
88+
dataset = core.evaluate(n_episodes=n_episodes_test, render=False, record=False)
89+
90+
J = torch.mean(dataset.discounted_return).to("cpu").item()
91+
R = torch.mean(dataset.undiscounted_return).to("cpu").item()
92+
E = agent.policy.entropy().to("cpu").item()
93+
V = torch.mean(agent._V(dataset.get_init_states())).detach().to("cpu").item()
94+
95+
logger.epoch_info(it+1, J=J, R=R, entropy=E, V=V)
96+
97+
del dataset
98+
99+
100+
if __name__ == '__main__':
101+
TorchUtils.set_default_device('cuda:0')
102+
ppo_params = dict(
103+
actor_optimizer={'class': optim.Adam,
104+
'params': {'lr': 1e-3}},
105+
n_epochs_policy=5,
106+
batch_size=int((4096*24) / 16),
107+
eps_ppo=.2,
108+
lam=.95,
109+
ent_coeff=0.01
110+
)
111+
policy_params = dict(
112+
std_0=1.,
113+
n_features=[512, 256, 128],
114+
use_cuda=True
115+
)
116+
experiment(alg=RudinPPO, n_epochs=40, n_steps=4096*24*50, n_steps_per_fit=4096*24,
117+
n_episodes_test=256, alg_params=ppo_params, policy_params=policy_params)

examples/isaacsim/cartpole_ppo.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
4+
import torch.optim as optim
5+
6+
import numpy as np
7+
from tqdm import trange
8+
9+
from mushroom_rl.core import VectorCore, Logger
10+
from mushroom_rl.algorithms.actor_critic import TRPO, PPO
11+
12+
from mushroom_rl.policy import GaussianTorchPolicy
13+
from mushroom_rl.environments.isaacsim_envs.cartpole import CartPole
14+
from mushroom_rl.utils import TorchUtils
15+
16+
17+
class Network(nn.Module):
18+
def __init__(self, input_shape, output_shape, n_features, **kwargs):
19+
super(Network, self).__init__()
20+
21+
n_input = input_shape[-1]
22+
n_output = output_shape[0]
23+
24+
self._h1 = nn.Linear(n_input, n_features)
25+
self._h2 = nn.Linear(n_features, n_features)
26+
self._h3 = nn.Linear(n_features, n_output)
27+
28+
nn.init.xavier_uniform_(self._h1.weight,
29+
gain=nn.init.calculate_gain('relu'))
30+
nn.init.xavier_uniform_(self._h2.weight,
31+
gain=nn.init.calculate_gain('relu'))
32+
nn.init.xavier_uniform_(self._h3.weight,
33+
gain=nn.init.calculate_gain('linear'))
34+
35+
def forward(self, state, **kwargs):
36+
features1 = F.relu(self._h1(torch.squeeze(state, 1).float()))
37+
features2 = F.relu(self._h2(features1))
38+
a = self._h3(features2)
39+
40+
return a
41+
42+
43+
def experiment(alg, n_epochs, n_steps, n_steps_per_fit, n_episodes_test,
44+
alg_params, policy_params):
45+
46+
logger = Logger(alg.__name__, results_dir=None)
47+
logger.strong_line()
48+
logger.info('Experiment Algorithm: ' + alg.__name__)
49+
50+
mdp = CartPole(64, True)
51+
52+
critic_params = dict(network=Network,
53+
optimizer={'class': optim.Adam,
54+
'params': {'lr': 3e-4}},
55+
loss=F.mse_loss,
56+
n_features=32,
57+
batch_size=100,
58+
use_cuda=True,
59+
input_shape=mdp.info.observation_space.shape,
60+
output_shape=(1,))
61+
62+
policy = GaussianTorchPolicy(Network,
63+
mdp.info.observation_space.shape,
64+
mdp.info.action_space.shape,
65+
**policy_params)
66+
67+
alg_params['critic_params'] = critic_params
68+
69+
agent = alg(mdp.info, policy, **alg_params)
70+
#agent.set_logger(logger)
71+
72+
core = VectorCore(agent, mdp)
73+
74+
dataset = core.evaluate(n_episodes=n_episodes_test, render=True, record=True)
75+
76+
J = torch.mean(dataset.discounted_return).item()
77+
R = torch.mean(dataset.undiscounted_return).item()
78+
E = agent.policy.entropy().item()
79+
A = torch.sum(dataset.absorbing).item()
80+
81+
logger.epoch_info(0, J=J, R=R, entropy=E, absorbing=A)
82+
83+
for it in trange(n_epochs, leave=False):
84+
core.learn(n_steps=n_steps, n_steps_per_fit=n_steps_per_fit)
85+
dataset = core.evaluate(n_episodes=n_episodes_test, render=True, record=True)
86+
87+
J = torch.mean(dataset.discounted_return).item()
88+
R = torch.mean(dataset.undiscounted_return).item()
89+
E = agent.policy.entropy().item()
90+
A = torch.sum(dataset.absorbing).item()
91+
92+
logger.epoch_info(it+1, J=J, R=R, entropy=E, absorbing=A)
93+
94+
logger.info('Press a button to visualize')
95+
input()
96+
core.evaluate(n_episodes=5, render=True, record=True)
97+
98+
99+
if __name__ == '__main__':
100+
ppo_params = dict(
101+
actor_optimizer={'class': optim.Adam,
102+
'params': {'lr': 3e-4}},
103+
n_epochs_policy=4,
104+
batch_size=100,
105+
eps_ppo=.2,
106+
lam=.95
107+
)
108+
policy_params = dict(
109+
std_0=1.,
110+
n_features=32,
111+
use_cuda=True
112+
113+
)
114+
experiment(alg=PPO, n_epochs=20, n_steps=30000, n_steps_per_fit=3000,
115+
n_episodes_test=64, alg_params=ppo_params, policy_params=policy_params)
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
4+
import torch.optim as optim
5+
6+
import numpy as np
7+
from tqdm import trange
8+
9+
from mushroom_rl.core import VectorCore, Logger
10+
from mushroom_rl.algorithms.actor_critic import RudinPPO
11+
12+
from mushroom_rl.policy import GaussianTorchPolicy
13+
from mushroom_rl.environments.isaacsim_envs import HoneyBadgerWalking
14+
from mushroom_rl.utils import TorchUtils
15+
16+
17+
class Network(nn.Module):
18+
def __init__(self, input_shape, output_shape, n_features, **kwargs):
19+
super(Network, self).__init__()
20+
21+
n_input = input_shape[-1]
22+
n_output = output_shape[0]
23+
24+
self._h1 = nn.Linear(n_input, n_features[0])
25+
self._h2 = nn.Linear(n_features[0], n_features[1])
26+
self._h3 = nn.Linear(n_features[1], n_features[2])
27+
self._h4 = nn.Linear(n_features[2], n_output)
28+
29+
nn.init.xavier_uniform_(self._h1.weight,
30+
gain=nn.init.calculate_gain('relu'))
31+
nn.init.xavier_uniform_(self._h2.weight,
32+
gain=nn.init.calculate_gain('relu'))
33+
nn.init.xavier_uniform_(self._h3.weight,
34+
gain=nn.init.calculate_gain('relu'))
35+
nn.init.xavier_uniform_(self._h4.weight,
36+
gain=nn.init.calculate_gain('linear'))
37+
38+
def forward(self, state, **kwargs):
39+
features1 = F.relu(self._h1(torch.squeeze(state, 1).float()))
40+
features2 = F.relu(self._h2(features1))
41+
features3 = F.relu(self._h3(features2))
42+
a = self._h4(features3)
43+
44+
return a
45+
46+
47+
def experiment(alg, n_epochs, n_steps, n_steps_per_fit, n_episodes_test,
48+
alg_params, policy_params):
49+
50+
logger = Logger(alg.__name__ + "_honey_batcher", results_dir="./logs/", log_console=True, use_timestamp=True)
51+
logger.strong_line()
52+
logger.info('Experiment Algorithm: ' + alg.__name__)
53+
54+
mdp = HoneyBadgerWalking(4096, 1000, True, True)
55+
56+
critic_params = dict(network=Network,
57+
optimizer={'class': optim.Adam,
58+
'params': {'lr': 1e-3}},
59+
loss=F.mse_loss,
60+
n_features=[512, 256, 128],
61+
batch_size=int((4096*24) / 32),
62+
use_cuda=True,
63+
input_shape=mdp.info.observation_space.shape,
64+
output_shape=(1,))
65+
66+
policy = GaussianTorchPolicy(Network,
67+
mdp.info.observation_space.shape,
68+
mdp.info.action_space.shape,
69+
**policy_params)
70+
71+
alg_params['critic_params'] = critic_params
72+
73+
agent = alg(mdp.info, policy, **alg_params)
74+
75+
core = VectorCore(agent, mdp)
76+
77+
dataset = core.evaluate(n_episodes=n_episodes_test, render=True, record=True)
78+
79+
J = torch.mean(dataset.discounted_return).item()
80+
R = torch.mean(dataset.undiscounted_return).item()
81+
E = agent.policy.entropy().item()
82+
83+
logger.epoch_info(0, J=J, R=R, entropy=E)
84+
del dataset
85+
86+
for it in trange(n_epochs, leave=False):
87+
core.learn(n_steps=n_steps, n_steps_per_fit=n_steps_per_fit)
88+
dataset = core.evaluate(n_episodes=n_episodes_test, render=True)
89+
90+
J = torch.mean(dataset.discounted_return).item()
91+
R = torch.mean(dataset.undiscounted_return).item()
92+
E = agent.policy.entropy().item()
93+
94+
logger.epoch_info(it + 1, J=J, R=R, entropy=E)
95+
del dataset
96+
97+
if __name__ == '__main__':
98+
TorchUtils.set_default_device('cuda:0')
99+
ppo_params = dict(
100+
actor_optimizer={'class': optim.Adam,
101+
'params': {'lr': 1e-3}},#changed from 1e-3
102+
n_epochs_policy=5,
103+
batch_size=int((4096*24) / 32),
104+
eps_ppo=.2,
105+
lam=.95,
106+
ent_coeff=0.00
107+
)
108+
policy_params = dict(
109+
std_0=1.,
110+
n_features=[512, 256, 128],
111+
use_cuda=True
112+
)
113+
experiment(alg=RudinPPO, n_epochs=30, n_steps=4096*24*50*2, n_steps_per_fit=4096*24,
114+
n_episodes_test=256, alg_params=ppo_params, policy_params=policy_params)

0 commit comments

Comments
 (0)