@@ -41,45 +41,6 @@ def _reverse_action(self, action):
41
41
42
42
return action
43
43
44
- def layer_init (layer , std = np .sqrt (2 ), bias_const = 0.0 ):
45
- torch .nn .init .orthogonal_ (layer .weight , std )
46
- torch .nn .init .constant_ (layer .bias , bias_const )
47
- return layer
48
-
49
- # class PPO(nn.Module):
50
- # def __init__(self, num_inputs, num_actions, hidden_size, action_range = 1.):
51
- # super(PPO, self).__init__()
52
- # self.data = []
53
- # self.action_range = action_range
54
- # self.v_linear = nn.Sequential(
55
- # layer_init(nn.Linear(num_inputs, 64)),
56
- # nn.Tanh(),
57
- # layer_init(nn.Linear(64, 64)),
58
- # nn.Tanh(),
59
- # layer_init(nn.Linear(64, 1), std=1.0),
60
- # )
61
- # self.mean_linear = nn.Sequential(
62
- # layer_init(nn.Linear(num_inputs, 64)),
63
- # nn.Tanh(),
64
- # layer_init(nn.Linear(64, 64)),
65
- # nn.Tanh(),
66
- # layer_init(nn.Linear(64, num_actions), std=0.01),
67
- # )
68
- # self.log_std_param = nn.Parameter(torch.zeros(num_actions))
69
-
70
- # self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
71
-
72
- # def pi(self, x):
73
- # mean = self.mean_linear(x)
74
- # # log_std = self.log_std_linear(x)
75
- # log_std = self.log_std_param.expand_as(mean)
76
-
77
- # return mean, log_std
78
-
79
- # def v(self, x):
80
- # v = self.v_linear(x)
81
- # return v
82
-
83
44
class PPO (nn .Module ):
84
45
def __init__ (self , num_inputs , num_actions , hidden_size , action_range = 1. ):
85
46
super (PPO , self ).__init__ ()
0 commit comments