Skip to content

Commit c6172f3

Browse files
committed
Made Logger global
1 parent 1f7c6f1 commit c6172f3

21 files changed

+529
-543
lines changed

Readme.md

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,8 @@ This is a dictionary that is shared around the different components. Contains hy
1414
### Environment
1515
This component needs to support the standard openai functions reset and step.
1616

17-
### Logger
18-
For Tensorboard to work, you need to define a logger that will (optionally) later go into the network, runner, and agent/trainer.
19-
20-
Due to issues with multiprocessing, the Logger is a shared dictionary of lists that get appended to and the LogWriter writes on the main thread.
21-
2217
### Network
23-
A network takes a PyTorch nn.Module, PyTorch optimizer, configuration, and the optional logger.
18+
A network takes a PyTorch nn.Module, PyTorch optimizer, and configuration.
2419

2520
### Target Network
2621
Takes in a network and provides methods to sync a copy of the original network.

examples/acrobot_a2c.py

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import rltorch.env as E
99
from rltorch.action_selector import StochasticSelector
1010
from tensorboardX import SummaryWriter
11+
from rltorch.log import Logger
1112

1213
#
1314
## Networks
@@ -68,65 +69,55 @@ def forward(self, x):
6869
#
6970
## Training Loop
7071
#
71-
def train(runner, agent, config, logger = None, logwriter = None):
72+
def train(runner, agent, config, logwriter=None):
7273
finished = False
7374
while not finished:
7475
runner.run()
7576
agent.learn()
7677
if logwriter is not None:
77-
agent.value_net.log_named_parameters()
78-
agent.policy_net.log_named_parameters()
79-
logwriter.write(logger)
78+
agent.value_net.log_named_parameters()
79+
agent.policy_net.log_named_parameters()
80+
logwriter.write(Logger)
8081
finished = runner.episode_num > config['total_training_episodes']
8182

8283

8384
if __name__ == "__main__":
84-
# Setting up the environment
85-
rltorch.set_seed(config['seed'])
86-
print("Setting up environment...", end = " ")
87-
env = E.TorchWrap(gym.make(config['environment_name']))
88-
env.seed(config['seed'])
89-
print("Done.")
90-
91-
state_size = env.observation_space.shape[0]
92-
action_size = env.action_space.n
93-
94-
# Logging
95-
logger = rltorch.log.Logger()
96-
logwriter = rltorch.log.LogWriter(SummaryWriter())
97-
98-
# Setting up the networks
99-
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
100-
policy_net = rn.Network(Policy(state_size, action_size),
101-
torch.optim.Adam, config, device = device, name = "Policy")
102-
value_net = rn.Network(Value(state_size),
103-
torch.optim.Adam, config, device = device, name = "DQN")
104-
105-
106-
# Memory stores experiences for later training
107-
memory = M.EpisodeMemory()
108-
109-
# Actor takes a net and uses it to produce actions from given states
110-
actor = StochasticSelector(policy_net, action_size, memory, device = device)
111-
112-
# Agent is what performs the training
113-
agent = rltorch.agents.A2CSingleAgent(policy_net, value_net, memory, config, logger = logger)
114-
115-
# Runner performs one episode in the environment
116-
runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name = "Training", memory = memory, logwriter = logwriter)
85+
# Setting up the environment
86+
rltorch.set_seed(config['seed'])
87+
print("Setting up environment...", end=" ")
88+
env = E.TorchWrap(gym.make(config['environment_name']))
89+
env.seed(config['seed'])
90+
print("Done.")
91+
92+
state_size = env.observation_space.shape[0]
93+
action_size = env.action_space.n
94+
# Logging
95+
logwriter = rltorch.log.LogWriter(SummaryWriter())
96+
# Setting up the networks
97+
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
98+
policy_net = rn.Network(Policy(state_size, action_size),
99+
torch.optim.Adam, config, device=device, name="Policy")
100+
value_net = rn.Network(Value(state_size),
101+
torch.optim.Adam, config, device=device, name="DQN")
102+
# Memory stores experiences for later training
103+
memory = M.EpisodeMemory()
104+
# Actor takes a net and uses it to produce actions from given states
105+
actor = StochasticSelector(policy_net, action_size, memory, device = device)
106+
# Agent is what performs the training
107+
agent = rltorch.agents.A2CSingleAgent(policy_net, value_net, memory, config)
108+
# Runner performs one episode in the environment
109+
runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name="Training", memory=memory, logwriter=logwriter)
117110

118-
print("Training...")
119-
train(runner, agent, config, logger = logger, logwriter = logwriter)
120-
121-
# For profiling...
122-
# import cProfile
123-
# cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
124-
# python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
125-
126-
print("Training Finished.")
127-
128-
print("Evaluating...")
129-
rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
130-
print("Evaulations Done.")
131-
132-
logwriter.close() # We don't need to write anything out to disk anymore
111+
print("Training...")
112+
train(runner, agent, config, logwriter=logwriter)
113+
114+
# For profiling...
115+
# import cProfile
116+
# cProfile.run('train(runner, agent, config, logwriter = logwriter )')
117+
# python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
118+
119+
print("Training Finished.")
120+
print("Evaluating...")
121+
rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], name="Evaluation")
122+
print("Evaulations Done.")
123+
logwriter.close() # We don't need to write anything out to disk anymore

examples/acrobot_es.py

Lines changed: 74 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,28 @@
99
import rltorch.env as E
1010
from rltorch.action_selector import StochasticSelector
1111
from tensorboardX import SummaryWriter
12+
from rltorch.log import Logger
1213

1314
#
1415
## Networks
1516
#
1617
class Policy(nn.Module):
17-
def __init__(self, state_size, action_size):
18-
super(Policy, self).__init__()
19-
self.state_size = state_size
20-
self.action_size = action_size
21-
22-
self.fc1 = nn.Linear(state_size, 125)
23-
self.fc_norm = nn.LayerNorm(125)
24-
25-
self.fc2 = nn.Linear(125, 125)
26-
self.fc2_norm = nn.LayerNorm(125)
27-
28-
self.action_prob = nn.Linear(125, action_size)
29-
30-
def forward(self, x):
31-
x = F.relu(self.fc_norm(self.fc1(x)))
32-
x = F.relu(self.fc2_norm(self.fc2(x)))
33-
x = F.softmax(self.action_prob(x), dim = 1)
34-
return x
18+
def __init__(self, state_size, action_size):
19+
super(Policy, self).__init__()
20+
self.state_size = state_size
21+
self.action_size = action_size
22+
self.fc1 = nn.Linear(state_size, 125)
23+
self.fc_norm = nn.LayerNorm(125)
24+
25+
self.fc2 = nn.Linear(125, 125)
26+
self.fc2_norm = nn.LayerNorm(125)
27+
self.action_prob = nn.Linear(125, action_size)
28+
29+
def forward(self, x):
30+
x = F.relu(self.fc_norm(self.fc1(x)))
31+
x = F.relu(self.fc2_norm(self.fc2(x)))
32+
x = F.softmax(self.action_prob(x), dim = 1)
33+
return x
3534

3635
#
3736
## Configuration
@@ -50,75 +49,67 @@ def forward(self, x):
5049
#
5150
## Training Loop
5251
#
53-
def train(runner, net, config, logger = None, logwriter = None):
54-
finished = False
55-
while not finished:
56-
runner.run()
57-
net.calc_gradients()
58-
net.step()
59-
if logwriter is not None:
60-
net.log_named_parameters()
61-
logwriter.write(logger)
62-
finished = runner.episode_num > config['total_training_episodes']
52+
def train(runner, net, config, logwriter=None):
53+
finished = False
54+
while not finished:
55+
runner.run()
56+
net.calc_gradients()
57+
net.step()
58+
if logwriter is not None:
59+
net.log_named_parameters()
60+
logwriter.write(Logger)
61+
finished = runner.episode_num > config['total_training_episodes']
6362

6463
#
6564
## Loss function
6665
#
6766
def fitness(model):
68-
env = gym.make("Acrobot-v1")
69-
state = torch.from_numpy(env.reset()).float().unsqueeze(0)
70-
total_reward = 0
71-
done = False
72-
while not done:
73-
action_probabilities = model(state)
74-
distribution = Categorical(action_probabilities)
75-
action = distribution.sample().item()
76-
next_state, reward, done, _ = env.step(action)
77-
total_reward += reward
78-
state = torch.from_numpy(next_state).float().unsqueeze(0)
79-
return -total_reward
67+
env = gym.make("Acrobot-v1")
68+
state = torch.from_numpy(env.reset()).float().unsqueeze(0)
69+
total_reward = 0
70+
done = False
71+
while not done:
72+
action_probabilities = model(state)
73+
distribution = Categorical(action_probabilities)
74+
action = distribution.sample().item()
75+
next_state, reward, done, _ = env.step(action)
76+
total_reward += reward
77+
state = torch.from_numpy(next_state).float().unsqueeze(0)
78+
return -total_reward
8079

8180
if __name__ == "__main__":
82-
# Hide internal gym warnings
83-
gym.logger.set_level(40)
84-
85-
# Setting up the environment
86-
rltorch.set_seed(config['seed'])
87-
print("Setting up environment...", end = " ")
88-
env = E.TorchWrap(gym.make(config['environment_name']))
89-
env.seed(config['seed'])
90-
print("Done.")
91-
92-
state_size = env.observation_space.shape[0]
93-
action_size = env.action_space.n
94-
95-
# Logging
96-
logger = rltorch.log.Logger()
97-
logwriter = rltorch.log.LogWriter(SummaryWriter())
98-
99-
# Setting up the networks
100-
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
101-
net = rn.ESNetwork(Policy(state_size, action_size),
102-
torch.optim.Adam, 100, fitness, config, device = device, name = "ES", logger = logger)
103-
104-
# Actor takes a net and uses it to produce actions from given states
105-
actor = StochasticSelector(net, action_size, device = device)
106-
107-
# Runner performs an episode of the environment
108-
runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name = "Training", logwriter = logwriter)
109-
110-
print("Training...")
111-
train(runner, net, config, logger = logger, logwriter = logwriter)
112-
113-
# For profiling...
114-
# import cProfile
115-
# cProfile.run('train(runner, agent, config, logger = logger, logwriter = logwriter )')
116-
# python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
117-
118-
print("Training Finished.")
119-
120-
print("Evaluating...")
121-
rltorch.env.simulateEnvEps(env, actor, config, total_episodes = config['total_evaluation_episodes'], logger = logger, name = "Evaluation")
122-
print("Evaulations Done.")
123-
124-
logwriter.close() # We don't need to write anything out to disk anymore
81+
# Hide internal gym warnings
82+
gym.logger.set_level(40)
83+
84+
# Setting up the environment
85+
rltorch.set_seed(config['seed'])
86+
print("Setting up environment...", end=" ")
87+
env = E.TorchWrap(gym.make(config['environment_name']))
88+
env.seed(config['seed'])
89+
print("Done.")
90+
91+
state_size = env.observation_space.shape[0]
92+
action_size = env.action_space.n
93+
94+
# Logging
95+
logwriter = rltorch.log.LogWriter(SummaryWriter())
96+
# Setting up the networks
97+
device = torch.device("cuda:0" if torch.cuda.is_available() and not config['disable_cuda'] else "cpu")
98+
net = rn.ESNetwork(Policy(state_size, action_size),
99+
torch.optim.Adam, 100, fitness, config, device=device, name="ES")
100+
# Actor takes a net and uses it to produce actions from given states
101+
actor = StochasticSelector(net, action_size, device=device)
102+
# Runner performs an episode of the environment
103+
runner = rltorch.env.EnvironmentEpisodeSync(env, actor, config, name="Training", logwriter=logwriter)
104+
print("Training...")
105+
train(runner, net, config, logwriter=logwriter)
106+
# For profiling...
107+
# import cProfile
108+
# cProfile.run('train(runner, agent, config, logwriter = logwriter )')
109+
# python -m torch.utils.bottleneck /path/to/source/script.py [args] is also a good solution...
110+
print("Training Finished.")
111+
print("Evaluating...")
112+
rltorch.env.simulateEnvEps(env, actor, config, total_episodes=config['total_evaluation_episodes'], name="Evaluation")
113+
print("Evaulations Done.")
114+
115+
logwriter.close() # We don't need to write anything out to disk anymore

0 commit comments

Comments
 (0)