1010from model_RL .PPO_model_pers import PPO
1111
1212import gymnasium as gym
13+ from torch .utils .tensorboard import SummaryWriter
14+ import random
15+ import torch
16+ import numpy as np
17+ import time
18+ import wandb
1319import json
20+ import os
1421
22+ if __name__ == '__main__' :
1523
16- def train ():
17- mesh_size = 30
18- lr = 0.0001
19- gamma = 0.9
20-
21- #dataset = [random_mesh() for _ in range(9)]
22- #plot_dataset(dataset)
23-
24+ with open ("model_RL/parameters/ppo_config.json" , "r" ) as f :
25+ ppo_config = json .load (f )
2426 with open ("environment/environment_config.json" , "r" ) as f :
2527 env_config = json .load (f )
2628
29+ # Create log dir
30+ log_dir = ppo_config ["tensorboard_log" ]
31+ os .makedirs (log_dir , exist_ok = True )
32+
2733 # Create the environment
2834 env = gym .make (
2935 env_config ["env_name" ],
@@ -35,16 +41,40 @@ def train():
3541 with_degree_obs = env_config ["with_degree_observation" ]
3642 )
3743
38- model = PPO (env , lr , gamma , nb_iterations = 15 , nb_episodes_per_iteration = 100 , nb_epochs = 5 , batch_size = 8 )
39- actor , rewards , wins , steps = model .learn ()
40- if rewards is not None :
41- plot_training_results (rewards , wins , steps )
44+ model = PPO (
45+ env = env ,
46+ lr = ppo_config ["learning_rate" ],
47+ gamma = ppo_config ["gamma" ],
48+ nb_iterations = 20 ,
49+ nb_episodes_per_iteration = 100 ,
50+ nb_epochs = 5 ,
51+ batch_size = 8
52+ )
4253
43- """
44- # torch.save(actor.state_dict(), 'policy_saved/actor_network.pth')
45- avg_steps, avg_wins, avg_rewards, final_meshes = testPolicy(actor, 5, dataset, 60)
54+ run_name = f"{ env_config ['env_name' ]} __{ 1 } __{ int (time .time ())} "
55+ # Create log dir
56+ log_dir = ppo_config ["tensorboard_log" ]
57+ os .makedirs (log_dir , exist_ok = True )
4658
59+ # SEEDING
60+ seed = 1
61+ random .seed (seed )
62+ np .random .seed (seed )
63+ torch .manual_seed (seed )
64+ torch .backends .cudnn .deterministic = True
65+
66+ writer = SummaryWriter (f"results/runs/{ run_name } " )
67+ writer .add_text (
68+ "Environment config" ,
69+ "|param|value|\n |-|-|\n %s" % ("\n " .join ([f"|{ key } |{ value } |" for key , value in env_config .items ()])),
70+ )
71+ writer .add_text (
72+ "PPO config" ,
73+ "|param|value|\n |-|-|\n %s" % ("\n " .join ([f"|{ key } |{ value } |" for key , value in ppo_config .items ()])),
74+ )
75+
76+ actor , rewards , wins , steps = model .learn (writer )
77+ writer .close ()
4778 if rewards is not None :
48- plot_test_results(avg_rewards, avg_wins, avg_steps, avg_rewards)
49- plot_dataset(final_meshes)
50- """
79+ plot_training_results (rewards , wins , steps )
80+ # torch.save(actor.state_dict(), 'policy_saved/actor_network.pth')
0 commit comments