Fix issue with exploit file

ArzhelaR · ArzhelaR · commit 79c35222febe · 2025-07-02T12:56:09.000+02:00
diff --git a/training/exploit_PPO_perso.py b/training/exploit_PPO_perso.py
@@ -1,35 +1,42 @@
-from numpy import ndarray
-
-import gymnasium as gym
 import json
 import torch
+import copy
+import numpy as np
+import gymnasium as gym
+import yaml
+
+from tqdm import tqdm
+from numpy import ndarray
 from torch.distributions import Categorical
-from model_RL.PPO_model_pers import Actor
 
+from mesh_model.mesh_analysis.quadmesh_analysis import QuadMeshOldAnalysis
+from mesh_model.mesh_analysis.trimesh_analysis import TriMeshOldAnalysis, TriMeshQualityAnalysis
+from mesh_model.mesh_struct.mesh_elements import Dart
 from mesh_model.mesh_struct.mesh import Mesh
 from mesh_model.reader import read_gmsh
+from model_RL.PPO_model_pers import Actor
+
 from view.mesh_plotter.create_plots import plot_test_results
 from view.mesh_plotter.mesh_plots import plot_dataset
 from environment.actions.smoothing import smoothing_mean
-import mesh_model.random_quadmesh as QM
+
 from environment.gymnasium_envs.quadmesh_env.envs.quadmesh import QuadMeshEnv
-import numpy as np
-import copy
-from tqdm import tqdm
+from environment.gymnasium_envs.trimesh_full_env.envs.trimesh import TriMeshEnvFull
 
+import mesh_model.random_quadmesh as QM
 
 def testPolicy(
         actor,
         n_eval_episodes: int,
-        env_config,
+        config,
         dataset: list[Mesh]
 ) -> tuple[ndarray, ndarray, ndarray, ndarray, list[Mesh]]:
     """
     Tests policy on each mesh of a dataset with n_eval_episodes.
-    :param policy: the policy to test
+    :param actor: the policy to test
     :param n_eval_episodes: number of evaluation episodes on each mesh
+    :param config: configuration
     :param dataset: list of mesh objects
-    :param max_steps: max steps to evaluate
     :return: average length of evaluation episodes, number of wins,average reward per mesh, dataset with the modified meshes
     """
     print('Testing policy')
@@ -41,14 +48,15 @@ def testPolicy(
     for i, mesh in tqdm(enumerate(dataset, 1)):
         best_mesh = mesh
         env = gym.make(
-            env_config["env_name"],
-            max_episode_steps=30,
-            mesh = mesh,
-            n_darts_selected=env_config["n_darts_selected"],
-            deep= env_config["deep"],
-            action_restriction=env_config["action_restriction"],
-            with_degree_obs=env_config["with_degree_observation"],
-            render_mode="human"
+            config["env"]["env_id"],
+            max_episode_steps=config["env"]["max_episode_steps"],
+            mesh=mesh,
+            #mesh_size = 30,
+            n_darts_selected=config["env"]["n_darts_selected"],
+            deep=config["env"]["deep"],
+            action_restriction=config["env"]["action_restriction"],
+            with_quality_obs=config["env"]["with_quality_observation"],
+            render_mode=config["env"]["render_mode"],
         )
         for _ in range(n_eval_episodes):
             terminated = False
@@ -62,8 +70,8 @@ def testPolicy(
                 dist = Categorical(pmf)
                 action = dist.sample()
                 action = action.tolist()
-                action_dart = int(action / 4)
-                action_type = action % 4
+                action_dart = int(action / config["ppo"]["n_actions"])
+                action_type = action % config["ppo"]["n_actions"]
                 gymnasium_action = [action_type, action_dart]
                 if action is None:
                     env.terminal = True
@@ -73,7 +81,7 @@ def testPolicy(
                 ep_length += 1
             if terminated:
                 nb_wins[i-1] += 1
-            if isBetterMesh(best_mesh, info['mesh']):
+            if isBetterMesh(best_mesh, info['mesh'], config["env"]["analysis_type"]):
                 best_mesh = copy.deepcopy(info['mesh'])
             avg_length[i-1] += ep_length
             avg_mesh_rewards[i-1] += ep_mesh_rewards
@@ -89,37 +97,57 @@ def isBetterPolicy(actual_best_policy, policy_to_test):
     if actual_best_policy is None:
         return True
 
-def isBetterMesh(best_mesh, actual_mesh):
-    if best_mesh is None or global_score(best_mesh)[1] > global_score(actual_mesh)[1]:
+def isBetterMesh(best_mesh, actual_mesh, analysis_type):
+    tri = False
+    for d_info in actual_mesh.dart_info:
+        if d_info[0]>=0:
+            d = Dart(actual_mesh, d_info[0])
+            if d == ((d.get_beta(1)).get_beta(1)).get_beta(1):
+                tri = True
+            else:
+                tri = False
+            break
+    if tri:
+        if analysis_type == "old":
+            ma_best_mesh = TriMeshOldAnalysis(best_mesh)
+            ma_actual_mesh = TriMeshOldAnalysis(actual_mesh)
+        else:
+            ma_best_mesh = TriMeshQualityAnalysis(best_mesh)
+            ma_actual_mesh = TriMeshQualityAnalysis(actual_mesh)
+    else:
+        ma_best_mesh = QuadMeshOldAnalysis(best_mesh)
+        ma_actual_mesh = QuadMeshOldAnalysis(actual_mesh)
+    if best_mesh is None or ma_best_mesh.global_score()[1] > ma_actual_mesh.global_score()[1]:
         return True
     else:
         return False
 
 
 if __name__ == '__main__':
 
-
     #Create a dataset of 9 meshes
-    mesh = read_gmsh("../mesh_files/medium_quad.msh")
+    mesh = read_gmsh("../mesh_files/t1_tri.msh")
     dataset = [mesh for _ in range(9)]
-    with open("../environment/old_files/environment_config.json", "r") as f:
-        env_config = json.load(f)
+    with open("../training/config/trimesh_config_PPO_perso.yaml", "r") as f:
+        config = yaml.safe_load(f)
     plot_dataset(dataset)
 
     env = gym.make(
-        env_config["env_name"],
+        config["env"]["env_id"],
+        max_episode_steps=config["env"]["max_episode_steps"],
         mesh=mesh,
-        max_episode_steps=env_config["max_episode_steps"],
-        n_darts_selected=env_config["n_darts_selected"],
-        deep=env_config["deep"],
-        action_restriction=env_config["action_restriction"],
-        with_degree_obs=env_config["with_degree_observation"]
+        # mesh_size = 30,
+        n_darts_selected=config["env"]["n_darts_selected"],
+        deep=config["env"]["deep"],
+        action_restriction=config["env"]["action_restriction"],
+        with_quality_obs=config["env"]["with_quality_observation"],
+        render_mode=config["env"]["render_mode"],
     )
 
     #Load the model
-    actor = Actor(env, 10*8, 4*10, lr=0.0001)
-    actor.load_state_dict(torch.load('policy_saved/quad-perso/medium_quad_perso-2.pth'))
-    avg_steps, avg_wins, avg_rewards, normalized_return, final_meshes = testPolicy(actor, 15, env_config, dataset)
+    actor = Actor(env, config["env"]["obs_size"], config["ppo"]["n_actions"], n_darts_observed=config["env"]["n_darts_selected"], lr=0.0001)
+    actor.load_state_dict(torch.load('policy_saved/tri-perso/TEST-Exploit.pth'))
+    avg_steps, avg_wins, avg_rewards, normalized_return, final_meshes = testPolicy(actor, 15, config, dataset)
 
     plot_test_results(avg_rewards, avg_wins, avg_steps, normalized_return)
     plot_dataset(final_meshes)
diff --git a/training/exploit_SB3_policy.py b/training/exploit_SB3_policy.py
@@ -118,7 +118,6 @@ def isBetterMesh(best_mesh, actual_mesh, analysis_type):
 
 if __name__ == '__main__':
 
-
     #Create a dataset of 9 meshes
     mesh = read_gmsh("../mesh_files/tri-star.msh")
     # ma = TriMeshQualityAnalysis(mesh)