99import rltorch .env as E
1010from rltorch .action_selector import StochasticSelector
1111from tensorboardX import SummaryWriter
12+ from rltorch .log import Logger
1213
1314#
1415## Networks
1516#
1617class Policy (nn .Module ):
17- def __init__ (self , state_size , action_size ):
18- super (Policy , self ).__init__ ()
19- self .state_size = state_size
20- self .action_size = action_size
21-
22- self .fc1 = nn .Linear (state_size , 125 )
23- self .fc_norm = nn .LayerNorm (125 )
24-
25- self .fc2 = nn .Linear (125 , 125 )
26- self .fc2_norm = nn .LayerNorm (125 )
27-
28- self .action_prob = nn .Linear (125 , action_size )
29-
30- def forward (self , x ):
31- x = F .relu (self .fc_norm (self .fc1 (x )))
32- x = F .relu (self .fc2_norm (self .fc2 (x )))
33- x = F .softmax (self .action_prob (x ), dim = 1 )
34- return x
18+ def __init__ (self , state_size , action_size ):
19+ super (Policy , self ).__init__ ()
20+ self .state_size = state_size
21+ self .action_size = action_size
22+ self .fc1 = nn .Linear (state_size , 125 )
23+ self .fc_norm = nn .LayerNorm (125 )
24+
25+ self .fc2 = nn .Linear (125 , 125 )
26+ self .fc2_norm = nn .LayerNorm (125 )
27+ self .action_prob = nn .Linear (125 , action_size )
28+
29+ def forward (self , x ):
30+ x = F .relu (self .fc_norm (self .fc1 (x )))
31+ x = F .relu (self .fc2_norm (self .fc2 (x )))
32+ x = F .softmax (self .action_prob (x ), dim = 1 )
33+ return x
3534
3635#
3736## Configuration
@@ -50,75 +49,67 @@ def forward(self, x):
5049#
5150## Training Loop
5251#
53- def train (runner , net , config , logger = None , logwriter = None ):
54- finished = False
55- while not finished :
56- runner .run ()
57- net .calc_gradients ()
58- net .step ()
59- if logwriter is not None :
60- net .log_named_parameters ()
61- logwriter .write (logger )
62- finished = runner .episode_num > config ['total_training_episodes' ]
52+ def train (runner , net , config , logwriter = None ):
53+ finished = False
54+ while not finished :
55+ runner .run ()
56+ net .calc_gradients ()
57+ net .step ()
58+ if logwriter is not None :
59+ net .log_named_parameters ()
60+ logwriter .write (Logger )
61+ finished = runner .episode_num > config ['total_training_episodes' ]
6362
6463#
6564## Loss function
6665#
6766def fitness (model ):
68- env = gym .make ("Acrobot-v1" )
69- state = torch .from_numpy (env .reset ()).float ().unsqueeze (0 )
70- total_reward = 0
71- done = False
72- while not done :
73- action_probabilities = model (state )
74- distribution = Categorical (action_probabilities )
75- action = distribution .sample ().item ()
76- next_state , reward , done , _ = env .step (action )
77- total_reward += reward
78- state = torch .from_numpy (next_state ).float ().unsqueeze (0 )
79- return - total_reward
67+ env = gym .make ("Acrobot-v1" )
68+ state = torch .from_numpy (env .reset ()).float ().unsqueeze (0 )
69+ total_reward = 0
70+ done = False
71+ while not done :
72+ action_probabilities = model (state )
73+ distribution = Categorical (action_probabilities )
74+ action = distribution .sample ().item ()
75+ next_state , reward , done , _ = env .step (action )
76+ total_reward += reward
77+ state = torch .from_numpy (next_state ).float ().unsqueeze (0 )
78+ return - total_reward
8079
8180if __name__ == "__main__" :
82- # Hide internal gym warnings
83- gym .logger .set_level (40 )
84-
85- # Setting up the environment
86- rltorch .set_seed (config ['seed' ])
87- print ("Setting up environment..." , end = " " )
88- env = E .TorchWrap (gym .make (config ['environment_name' ]))
89- env .seed (config ['seed' ])
90- print ("Done." )
91-
92- state_size = env .observation_space .shape [0 ]
93- action_size = env .action_space .n
94-
95- # Logging
96- logger = rltorch .log .Logger ()
97- logwriter = rltorch .log .LogWriter (SummaryWriter ())
98-
99- # Setting up the networks
100- device = torch .device ("cuda:0" if torch .cuda .is_available () and not config ['disable_cuda' ] else "cpu" )
101- net = rn .ESNetwork (Policy (state_size , action_size ),
102- torch .optim .Adam , 100 , fitness , config , device = device , name = "ES" , logger = logger )
103-
104- # Actor takes a net and uses it to produce actions from given states
105- actor = StochasticSelector (net , action_size , device = device )
106-
107- # Runner performs an episode of the environment
108- runner = rltorch .env .EnvironmentEpisodeSync (env , actor , config , name = "Training" , logwriter = logwriter )
109-
110- print ("Training..." )
111- train (runner , net , config , logger = logger , logwriter = logwriter )
112-
113- # For profiling...
114- # import cProfile
115- # cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
116- # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
117-
118- print ("Training Finished." )
119-
120- print ("Evaluating..." )
121- rltorch .env .simulateEnvEps (env , actor , config , total_episodes = config ['total_evaluation_episodes' ], logger = logger , name = "Evaluation" )
122- print ("Evaulations Done." )
123-
124- logwriter .close () # We don't need to write anything out to disk anymore
81+ # Hide internal gym warnings
82+ gym .logger .set_level (40 )
83+
84+ # Setting up the environment
85+ rltorch .set_seed (config ['seed' ])
86+ print ("Setting up environment..." , end = " " )
87+ env = E .TorchWrap (gym .make (config ['environment_name' ]))
88+ env .seed (config ['seed' ])
89+ print ("Done." )
90+
91+ state_size = env .observation_space .shape [0 ]
92+ action_size = env .action_space .n
93+
94+ # Logging
95+ logwriter = rltorch .log .LogWriter (SummaryWriter ())
96+ # Setting up the networks
97+ device = torch .device ("cuda:0" if torch .cuda .is_available () and not config ['disable_cuda' ] else "cpu" )
98+ net = rn .ESNetwork (Policy (state_size , action_size ),
99+ torch .optim .Adam , 100 , fitness , config , device = device , name = "ES" )
100+ # Actor takes a net and uses it to produce actions from given states
101+ actor = StochasticSelector (net , action_size , device = device )
102+ # Runner performs an episode of the environment
103+ runner = rltorch .env .EnvironmentEpisodeSync (env , actor , config , name = "Training" , logwriter = logwriter )
104+ print ("Training..." )
105+ train (runner , net , config , logwriter = logwriter )
106+ # For profiling...
107+ # import cProfile
108+ # cProfile.run('train(runner, agent, config, logwriter = logwriter )')
109+ # python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
110+ print ("Training Finished." )
111+ print ("Evaluating..." )
112+ rltorch .env .simulateEnvEps (env , actor , config , total_episodes = config ['total_evaluation_episodes' ], name = "Evaluation" )
113+ print ("Evaulations Done." )
114+
115+ logwriter .close () # We don't need to write anything out to disk anymore
0 commit comments