Skip to content

Commit 886285d

Browse files
authored
feature(nyz): add PPOF new interface support (#567)
* feature(nyz): add basic ppof in discrete/continuous action space * feature(nyz): add ppof hybrid implementation * demo(nyz): add ppof rocket landing demo * fix(nyz): fix compatibility bugs * demo(nyz): add drone fly demo * fix(nyz): fix typo and add more comments * fix(nyz): fix CI and demo bugs * fix(nyz): fix rocket context bug * style(nyz): fix flake8 style
1 parent 9c689d2 commit 886285d

34 files changed

+1189
-96
lines changed

ding/bonus/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .ppof import PPOF

ding/bonus/config.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from easydict import EasyDict
2+
import gym
3+
from ding.envs import BaseEnv, DingEnvWrapper
4+
from ding.policy import PPOFPolicy
5+
6+
7+
def get_instance_config(env: str) -> EasyDict:
8+
cfg = PPOFPolicy.default_config()
9+
if env == 'lunarlander_discrete':
10+
cfg.n_sample = 400
11+
elif env == 'lunarlander_continuous':
12+
cfg.action_space = 'continuous'
13+
cfg.n_sample = 400
14+
elif env == 'rocket_landing':
15+
cfg.n_sample = 2048
16+
cfg.adv_norm = False
17+
cfg.model = dict(
18+
encoder_hidden_size_list=[64, 64, 128],
19+
actor_head_hidden_size=128,
20+
critic_head_hidden_size=128,
21+
)
22+
elif env == 'drone_fly':
23+
cfg.action_space = 'continuous'
24+
cfg.adv_norm = False
25+
cfg.epoch_per_collect = 5
26+
cfg.learning_rate = 5e-5
27+
cfg.n_sample = 640
28+
elif env == 'hybrid_moving':
29+
cfg.action_space = 'hybrid'
30+
cfg.n_sample = 3200
31+
cfg.entropy_weight = 0.03
32+
cfg.batch_size = 320
33+
cfg.adv_norm = False
34+
cfg.model = dict(
35+
encoder_hidden_size_list=[256, 128, 64, 64],
36+
sigma_type='fixed',
37+
fixed_sigma_value=0.3,
38+
bound_type='tanh',
39+
)
40+
else:
41+
raise KeyError("not supported env type: {}".format(env))
42+
return cfg
43+
44+
45+
def get_instance_env(env: str) -> BaseEnv:
46+
if env == 'lunarlander_discrete':
47+
return DingEnvWrapper(gym.make('LunarLander-v2'))
48+
elif env == 'lunarlander_continuous':
49+
return DingEnvWrapper(gym.make('LunarLander-v2', continuous=True))
50+
elif env == 'rocket_landing':
51+
from dizoo.rocket.envs import RocketEnv
52+
cfg = EasyDict({
53+
'task': 'landing',
54+
'max_steps': 800,
55+
})
56+
return RocketEnv(cfg)
57+
elif env == 'drone_fly':
58+
from dizoo.gym_pybullet_drones.envs import GymPybulletDronesEnv
59+
cfg = EasyDict({
60+
'env_id': 'flythrugate-aviary-v0',
61+
'action_type': 'VEL',
62+
})
63+
return GymPybulletDronesEnv(cfg)
64+
elif env == 'hybrid_moving':
65+
import gym_hybrid
66+
return DingEnvWrapper(gym.make('Moving-v0'))
67+
else:
68+
raise KeyError("not supported env type: {}".format(env))
69+
70+
71+
def get_hybrid_shape(action_space) -> EasyDict:
72+
return EasyDict({
73+
'action_type_shape': action_space[0].n,
74+
'action_args_shape': action_space[1].shape,
75+
})

ding/bonus/model.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
from typing import Union, Optional
2+
from easydict import EasyDict
3+
import torch
4+
import torch.nn as nn
5+
import treetensor.torch as ttorch
6+
from copy import deepcopy
7+
from ding.utils import SequenceType, squeeze
8+
from ding.model.common import ReparameterizationHead, RegressionHead, MultiHead, \
9+
FCEncoder, ConvEncoder, IMPALAConvEncoder
10+
from ding.torch_utils import MLP, fc_block
11+
12+
13+
class DiscretePolicyHead(nn.Module):
14+
15+
def __init__(
16+
self,
17+
hidden_size: int,
18+
output_size: int,
19+
layer_num: int = 1,
20+
activation: Optional[nn.Module] = nn.ReLU(),
21+
norm_type: Optional[str] = None,
22+
) -> None:
23+
super(DiscretePolicyHead, self).__init__()
24+
self.main = nn.Sequential(
25+
MLP(
26+
hidden_size,
27+
hidden_size,
28+
hidden_size,
29+
layer_num,
30+
layer_fn=nn.Linear,
31+
activation=activation,
32+
norm_type=norm_type
33+
), fc_block(hidden_size, output_size)
34+
)
35+
36+
def forward(self, x: torch.Tensor) -> torch.Tensor:
37+
return self.main(x)
38+
39+
40+
class PPOFModel(nn.Module):
41+
mode = ['compute_actor', 'compute_critic', 'compute_actor_critic']
42+
43+
def __init__(
44+
self,
45+
obs_shape: Union[int, SequenceType],
46+
action_shape: Union[int, SequenceType, EasyDict],
47+
action_space: str = 'discrete',
48+
share_encoder: bool = True,
49+
encoder_hidden_size_list: SequenceType = [128, 128, 64],
50+
actor_head_hidden_size: int = 64,
51+
actor_head_layer_num: int = 1,
52+
critic_head_hidden_size: int = 64,
53+
critic_head_layer_num: int = 1,
54+
activation: Optional[nn.Module] = nn.ReLU(),
55+
norm_type: Optional[str] = None,
56+
sigma_type: Optional[str] = 'independent',
57+
fixed_sigma_value: Optional[int] = 0.3,
58+
bound_type: Optional[str] = None,
59+
encoder: Optional[torch.nn.Module] = None,
60+
) -> None:
61+
super(PPOFModel, self).__init__()
62+
obs_shape = squeeze(obs_shape)
63+
action_shape = squeeze(action_shape)
64+
self.obs_shape, self.action_shape = obs_shape, action_shape
65+
self.share_encoder = share_encoder
66+
67+
# Encoder Type
68+
def new_encoder(outsize):
69+
if isinstance(obs_shape, int) or len(obs_shape) == 1:
70+
return FCEncoder(
71+
obs_shape=obs_shape,
72+
hidden_size_list=encoder_hidden_size_list,
73+
activation=activation,
74+
norm_type=norm_type
75+
)
76+
elif len(obs_shape) == 3:
77+
return ConvEncoder(
78+
obs_shape=obs_shape,
79+
hidden_size_list=encoder_hidden_size_list,
80+
activation=activation,
81+
norm_type=norm_type
82+
)
83+
else:
84+
raise RuntimeError(
85+
"not support obs_shape for pre-defined encoder: {}, please customize your own encoder".
86+
format(obs_shape)
87+
)
88+
89+
if self.share_encoder:
90+
assert actor_head_hidden_size == critic_head_hidden_size, \
91+
"actor and critic network head should have same size."
92+
if encoder:
93+
if isinstance(encoder, torch.nn.Module):
94+
self.encoder = encoder
95+
else:
96+
raise ValueError("illegal encoder instance.")
97+
else:
98+
self.encoder = new_encoder(actor_head_hidden_size)
99+
else:
100+
if encoder:
101+
if isinstance(encoder, torch.nn.Module):
102+
self.actor_encoder = encoder
103+
self.critic_encoder = deepcopy(encoder)
104+
else:
105+
raise ValueError("illegal encoder instance.")
106+
else:
107+
self.actor_encoder = new_encoder(actor_head_hidden_size)
108+
self.critic_encoder = new_encoder(critic_head_hidden_size)
109+
110+
# Head Type
111+
self.critic_head = RegressionHead(
112+
critic_head_hidden_size, 1, critic_head_layer_num, activation=activation, norm_type=norm_type
113+
)
114+
self.action_space = action_space
115+
assert self.action_space in ['discrete', 'continuous', 'hybrid'], self.action_space
116+
if self.action_space == 'continuous':
117+
self.multi_head = False
118+
self.actor_head = ReparameterizationHead(
119+
actor_head_hidden_size,
120+
action_shape,
121+
actor_head_layer_num,
122+
sigma_type=sigma_type,
123+
activation=activation,
124+
norm_type=norm_type,
125+
bound_type=bound_type
126+
)
127+
elif self.action_space == 'discrete':
128+
actor_head_cls = DiscretePolicyHead
129+
multi_head = not isinstance(action_shape, int)
130+
self.multi_head = multi_head
131+
if multi_head:
132+
self.actor_head = MultiHead(
133+
actor_head_cls,
134+
actor_head_hidden_size,
135+
action_shape,
136+
layer_num=actor_head_layer_num,
137+
activation=activation,
138+
norm_type=norm_type
139+
)
140+
else:
141+
self.actor_head = actor_head_cls(
142+
actor_head_hidden_size,
143+
action_shape,
144+
actor_head_layer_num,
145+
activation=activation,
146+
norm_type=norm_type
147+
)
148+
elif self.action_space == 'hybrid': # HPPO
149+
# hybrid action space: action_type(discrete) + action_args(continuous),
150+
# such as {'action_type_shape': torch.LongTensor([0]), 'action_args_shape': torch.FloatTensor([0.1, -0.27])}
151+
action_shape.action_args_shape = squeeze(action_shape.action_args_shape)
152+
action_shape.action_type_shape = squeeze(action_shape.action_type_shape)
153+
actor_action_args = ReparameterizationHead(
154+
actor_head_hidden_size,
155+
action_shape.action_args_shape,
156+
actor_head_layer_num,
157+
sigma_type=sigma_type,
158+
fixed_sigma_value=fixed_sigma_value,
159+
activation=activation,
160+
norm_type=norm_type,
161+
bound_type=bound_type,
162+
)
163+
actor_action_type = DiscretePolicyHead(
164+
actor_head_hidden_size,
165+
action_shape.action_type_shape,
166+
actor_head_layer_num,
167+
activation=activation,
168+
norm_type=norm_type,
169+
)
170+
self.actor_head = nn.ModuleList([actor_action_type, actor_action_args])
171+
172+
# must use list, not nn.ModuleList
173+
if self.share_encoder:
174+
self.actor = [self.encoder, self.actor_head]
175+
self.critic = [self.encoder, self.critic_head]
176+
else:
177+
self.actor = [self.actor_encoder, self.actor_head]
178+
self.critic = [self.critic_encoder, self.critic_head]
179+
# Convenient for calling some apis (e.g. self.critic.parameters()),
180+
# but may cause misunderstanding when `print(self)`
181+
self.actor = nn.ModuleList(self.actor)
182+
self.critic = nn.ModuleList(self.critic)
183+
184+
def forward(self, inputs: ttorch.Tensor, mode: str) -> ttorch.Tensor:
185+
assert mode in self.mode, "not support forward mode: {}/{}".format(mode, self.mode)
186+
return getattr(self, mode)(inputs)
187+
188+
def compute_actor(self, x: ttorch.Tensor) -> ttorch.Tensor:
189+
if self.share_encoder:
190+
x = self.encoder(x)
191+
else:
192+
x = self.actor_encoder(x)
193+
194+
if self.action_space == 'discrete':
195+
return self.actor_head(x)
196+
elif self.action_space == 'continuous':
197+
x = self.actor_head(x) # mu, sigma
198+
return ttorch.as_tensor(x)
199+
elif self.action_space == 'hybrid':
200+
action_type = self.actor_head[0](x)
201+
action_args = self.actor_head[1](x)
202+
return ttorch.as_tensor({'action_type': action_type, 'action_args': action_args})
203+
204+
def compute_critic(self, x: ttorch.Tensor) -> ttorch.Tensor:
205+
if self.share_encoder:
206+
x = self.encoder(x)
207+
else:
208+
x = self.critic_encoder(x)
209+
x = self.critic_head(x)
210+
return x['pred']
211+
212+
def compute_actor_critic(self, x: ttorch.Tensor) -> ttorch.Tensor:
213+
if self.share_encoder:
214+
actor_embedding = critic_embedding = self.encoder(x)
215+
else:
216+
actor_embedding = self.actor_encoder(x)
217+
critic_embedding = self.critic_encoder(x)
218+
219+
value = self.critic_head(critic_embedding)['pred']
220+
221+
if self.action_space == 'discrete':
222+
logit = self.actor_head(actor_embedding)
223+
return ttorch.as_tensor({'logit': logit, 'value': value})
224+
elif self.action_space == 'continuous':
225+
x = self.actor_head(actor_embedding)
226+
return ttorch.as_tensor({'logit': x, 'value': value})
227+
elif self.action_space == 'hybrid':
228+
action_type = self.actor_head[0](actor_embedding)
229+
action_args = self.actor_head[1](actor_embedding)
230+
return ttorch.as_tensor({'logit': {'action_type': action_type, 'action_args': action_args}, 'value': value})

0 commit comments

Comments
 (0)