Skip to content

Commit d939f47

Browse files
committed
New branch
Add Tensorboard visualisation to quad training file
1 parent d95b8a6 commit d939f47

File tree

2 files changed

+58
-19
lines changed

2 files changed

+58
-19
lines changed

model_RL/PPO_model_pers.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,16 @@ def train(self, dataset):
199199
p.requires_grad = True
200200
start = stop + 1
201201

202-
def learn(self):
202+
def learn(self, writer):
203203
"""
204204
Train the PPO mesh_model
205205
:return: the actor policy, training rewards, training wins, len of episodes
206206
"""
207207
rewards = []
208208
wins = []
209209
len_ep = []
210+
global_step = 0
211+
nb_episodes = 0
210212

211213
try:
212214
for iteration in tqdm(range(self.nb_iterations)):
@@ -217,6 +219,8 @@ def learn(self):
217219
next_obs, info = self.env.reset()
218220
trajectory = []
219221
ep_reward = 0
222+
ep_mesh_reward = 0
223+
ideal_reward = info["mesh_ideal_rewards"]
220224
done = False
221225
step = 0
222226
while step < 40:
@@ -229,6 +233,7 @@ def learn(self):
229233
gym_action = [action[2],int(action[0]/3)]
230234
next_obs, reward, terminated, truncated, info = self.env.step(gym_action)
231235
ep_reward += reward
236+
ep_mesh_reward += info["mesh_reward"]
232237
if terminated:
233238
if truncated:
234239
wins.append(0)
@@ -245,6 +250,10 @@ def learn(self):
245250
rollouts.append(trajectory)
246251
dataset.extend(trajectory)
247252
len_ep.append(len(trajectory))
253+
nb_episodes += 1
254+
writer.add_scalar("episode_reward", ep_reward, nb_episodes)
255+
writer.add_scalar("normalized return", (ep_reward/ideal_reward), nb_episodes)
256+
writer.add_scalar("len_episodes", len(trajectory), nb_episodes)
248257

249258
self.train(dataset)
250259

training/train_quadmesh.py

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,26 @@
1010
from model_RL.PPO_model_pers import PPO
1111

1212
import gymnasium as gym
13+
from torch.utils.tensorboard import SummaryWriter
14+
import random
15+
import torch
16+
import numpy as np
17+
import time
18+
import wandb
1319
import json
20+
import os
1421

22+
if __name__ == '__main__':
1523

16-
def train():
17-
mesh_size = 30
18-
lr = 0.0001
19-
gamma = 0.9
20-
21-
#dataset = [random_mesh() for _ in range(9)]
22-
#plot_dataset(dataset)
23-
24+
with open("model_RL/parameters/ppo_config.json", "r") as f:
25+
ppo_config = json.load(f)
2426
with open("environment/environment_config.json", "r") as f:
2527
env_config = json.load(f)
2628

29+
# Create log dir
30+
log_dir = ppo_config["tensorboard_log"]
31+
os.makedirs(log_dir, exist_ok=True)
32+
2733
# Create the environment
2834
env = gym.make(
2935
env_config["env_name"],
@@ -35,16 +41,40 @@ def train():
3541
with_degree_obs=env_config["with_degree_observation"]
3642
)
3743

38-
model = PPO(env, lr, gamma, nb_iterations=15, nb_episodes_per_iteration=100, nb_epochs=5, batch_size=8)
39-
actor, rewards, wins, steps = model.learn()
40-
if rewards is not None:
41-
plot_training_results(rewards, wins, steps)
44+
model = PPO(
45+
env=env,
46+
lr=ppo_config["learning_rate"],
47+
gamma=ppo_config["gamma"],
48+
nb_iterations=20,
49+
nb_episodes_per_iteration=100,
50+
nb_epochs=5,
51+
batch_size=8
52+
)
4253

43-
"""
44-
# torch.save(actor.state_dict(), 'policy_saved/actor_network.pth')
45-
avg_steps, avg_wins, avg_rewards, final_meshes = testPolicy(actor, 5, dataset, 60)
54+
run_name = f"{env_config['env_name']}__{1}__{int(time.time())}"
55+
# Create log dir
56+
log_dir = ppo_config["tensorboard_log"]
57+
os.makedirs(log_dir, exist_ok=True)
4658

59+
# SEEDING
60+
seed = 1
61+
random.seed(seed)
62+
np.random.seed(seed)
63+
torch.manual_seed(seed)
64+
torch.backends.cudnn.deterministic = True
65+
66+
writer = SummaryWriter(f"results/runs/{run_name}")
67+
writer.add_text(
68+
"Environment config",
69+
"|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in env_config.items()])),
70+
)
71+
writer.add_text(
72+
"PPO config",
73+
"|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in ppo_config.items()])),
74+
)
75+
76+
actor, rewards, wins, steps = model.learn(writer)
77+
writer.close()
4778
if rewards is not None:
48-
plot_test_results(avg_rewards, avg_wins, avg_steps, avg_rewards)
49-
plot_dataset(final_meshes)
50-
"""
79+
plot_training_results(rewards, wins, steps)
80+
# torch.save(actor.state_dict(), 'policy_saved/actor_network.pth')

0 commit comments

Comments
 (0)