Fix policy evaluation problem

ArzhelaR · ArzhelaR · commit 018b51771079 · 2025-01-31T10:54:18.000+01:00
diff --git a/environment/gymnasium_envs/trimesh_full_env/envs/trimesh.py b/environment/gymnasium_envs/trimesh_full_env/envs/trimesh.py
@@ -69,7 +69,7 @@ def reset(self, seed=None, options=None):
         # We need the following line to seed self.np_random
         super().reset(seed=seed)
         if options is not None:
-            self.mesh = options.get("mesh", self.mesh)
+            self.mesh = options['mesh']
         else:
             self.mesh = random_mesh(self.mesh_size)
         self.nb_darts = len(self.mesh.dart_info)
diff --git a/exploit_SB3_policy.py b/exploit_SB3_policy.py
@@ -1,6 +1,7 @@
 from numpy import ndarray
 
 import gymnasium as gym
+import json
 
 from environment.gymnasium_envs.trimesh_flip_env import TriMeshEnvFlip
 from environment.gymnasium_envs.trimesh_full_env import TriMeshEnvFull
@@ -50,9 +51,9 @@ def testPolicy(
             truncated = False
             ep_mesh_rewards: int = 0
             ep_length: int = 0
-            obs, info = env.reset(options={"mesh": mesh})
+            obs, info = env.reset(options={"mesh": copy.deepcopy(mesh)})
             while terminated == False and truncated == False:
-                action, _states = model.predict(obs, deterministic=True)
+                action, _states = model.predict(obs, deterministic=False)
                 if action is None:
                     env.terminal = True
                     break
@@ -65,7 +66,7 @@ def testPolicy(
                 best_mesh = copy.deepcopy(info['mesh'])
             avg_length[i-1] += ep_length
             avg_mesh_rewards[i-1] += ep_mesh_rewards
-            avg_normalized_return[i-1] += ep_mesh_rewards/info['mesh_ideal_rewards']
+            avg_normalized_return[i-1] += 0 if info['mesh_ideal_rewards'] == 0 else ep_mesh_rewards/info['mesh_ideal_rewards']
         final_meshes.append(best_mesh)
         avg_length[i-1] = avg_length[i-1]/n_eval_episodes
         avg_mesh_rewards[i-1] = avg_mesh_rewards[i-1]/n_eval_episodes
@@ -83,13 +84,14 @@ def isBetterMesh(best_mesh, actual_mesh):
     else:
         return False
 
+
 """
 dataset = [TM.random_mesh(30) for _ in range(9)]
 with open("environment/parameters/environment_config.json", "r") as f:
     env_config = json.load(f)
 plot_dataset(dataset)
-model = PPO.load("policy_saved/final-2.zip")
-avg_steps, avg_wins, avg_rewards, avg_normalized_return, final_meshes = testPolicy(model, 5, env_config, dataset)
+model = PPO.load("policy_saved/final/final-PPO-3.zip")
+avg_steps, avg_wins, avg_rewards, avg_normalized_return, final_meshes = testPolicy(model, 10, env_config, dataset)
 
 plot_test_results(avg_rewards, avg_wins, avg_steps, avg_normalized_return)
 plot_dataset(final_meshes)
diff --git a/mesh_display.py b/mesh_display.py
@@ -35,5 +35,5 @@ def get_scores(self):
         Calculates the irregularities of each node and the real and ideal score of the mesh
         :return: a list of three elements (nodes_score, mesh_score, ideal_mesh_score)
         """
-        nodes_score, mesh_score, ideal_mesh_score = global_score(self.mesh)
+        nodes_score, mesh_score, ideal_mesh_score, adjacency = global_score(self.mesh)
         return [nodes_score, mesh_score, ideal_mesh_score]
diff --git a/policy_saved/final/final-PPO-3.zip b/policy_saved/final/final-PPO-3.zip