LIHPC-Computational-Geometry
diff --git a/‎environment/gymnasium_envs/quadmesh_env/envs/mesh_conv.py‎
Lines changed: 81 additions & 14 deletions b/‎environment/gymnasium_envs/quadmesh_env/envs/mesh_conv.py‎
Lines changed: 81 additions & 14 deletions
diff --git a/‎environment/gymnasium_envs/quadmesh_env/envs/quadmesh.py‎
Lines changed: 8 additions & 4 deletions b/‎environment/gymnasium_envs/quadmesh_env/envs/quadmesh.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎model_RL/PPO_model_pers.py‎
Lines changed: 16 additions & 6 deletions b/‎model_RL/PPO_model_pers.py‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎training/exploit_PPO_perso.py‎
Lines changed: 127 additions & 0 deletions b/‎training/exploit_PPO_perso.py‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎training/exploit_trimesh.py‎
Lines changed: 63 additions & 0 deletions b/‎training/exploit_trimesh.py‎
Lines changed: 63 additions & 0 deletions
@@ -7,7 +7,8 @@
 def get_x(state: Mesh, n_darts_selected: int, deep :int, degree: bool, restricted:bool, nodes_scores: list[int], nodes_adjacency: list[int]):
     mesh = state
     if degree:
-        template, darts_id = get_template_deg(mesh, deep, nodes_scores, nodes_adjacency)
+        deep = int(deep / 2)
+        template, darts_id = get_template_boundary(mesh, deep, nodes_scores, nodes_adjacency)
     else:
         template, darts_id = get_template(mesh, deep, nodes_scores)
 
@@ -126,19 +127,85 @@ def get_template_deg(mesh: Mesh, deep: int, nodes_scores, nodes_adjacency):
         if deep > 4:
             while len(E) < deep:
                 df = F.pop(0)
-                df1 = df.get_beta(1)
-                df11 = df1.get_beta(1)
-                df111 = df11.get_beta(1)
-                F.append(df1)
-                F.append(df11)
-                F.append(df111)
-                N1, N2 = df11.get_node(), df111.get_node()
-                E.append(N1)
-                template[n_darts - 1, len(E)] = nodes_scores[N1.id]
-                template[n_darts - 1, deep + len(E)] = nodes_adjacency[N1.id]
-                E.append(N2)
-                template[n_darts - 1, len(E)] = nodes_scores[N2.id]
-                template[n_darts - 1, deep + len(E)] = nodes_adjacency[N2.id]
+                if df is not None:
+                    df1 = df.get_beta(1)
+                    df11 = df1.get_beta(1)
+                    df111 = df11.get_beta(1)
+                    F.append(df1)
+                    F.append(df11)
+                    F.append(df111)
+                    N1, N2 = df11.get_node(), df111.get_node()
+                    E.append(N1)
+                    template[n_darts-1, len(E)-1] = nodes_scores[N1.id]
+                    template[n_darts-1, deep + len(E)-1] = nodes_adjacency[N1.id]
+                    E.append(N2)
+                    template[n_darts - 1, len(E)-1] = nodes_scores[N2.id]
+                    template[n_darts - 1, deep + len(E)-1] = nodes_adjacency[N2.id]
+                else:
+                    E.extend([None,None])
+                    #template[n_darts - 1, len(E) - 1] = -500 # dummy vertices are assigned to -500
+                    #template[n_darts - 1, len(E) - 2] = -500 # dummy vertices are assigned to -500
+
+    template = template[:n_darts, :]
+    return template, dart_ids
+
+def get_template_boundary(mesh: Mesh, deep: int, nodes_scores, nodes_adjacency):
+    size = len(mesh.dart_info)
+    template = np.zeros((size, deep*2), dtype=np.int64)
+    dart_ids = []
+    n_darts = 0
+
+    for d_info in mesh.active_darts():
+        n_darts += 1
+        d_id = d_info[0]
+        dart_ids.append(d_id)
+        d = Dart(mesh, d_id)
+        A = d.get_node()
+        d1 = d.get_beta(1)
+        B = d1.get_node()
+        d11 = d1.get_beta(1)
+        C = d11.get_node()
+        d111 = d11.get_beta(1)
+        D = d111.get_node()
+
+        # Template niveau 1
+        template[n_darts - 1, 0] = nodes_scores[A.id]
+        template[n_darts - 1, deep] = 1
+        template[n_darts - 1, 1] = nodes_scores[B.id]
+        template[n_darts - 1, deep+1] = 1
+        template[n_darts - 1, 2] = nodes_scores[C.id]
+        template[n_darts - 1, deep+2] = 1
+        template[n_darts - 1, 3] = nodes_scores[D.id]
+        template[n_darts - 1, deep + 3] = 1
+
+        E = [A, B, C, D]
+        deep_captured = len(E)
+        d2 = d.get_beta(2)
+        d12 = d1.get_beta(2)
+        d112 = d11.get_beta(2)
+        d1112 = d111.get_beta(2)
+        F = [d2, d12, d112, d1112]
+        if deep > 4:
+            while len(E) < deep:
+                df = F.pop(0)
+                if df is not None:
+                    df1 = df.get_beta(1)
+                    df11 = df1.get_beta(1)
+                    df111 = df11.get_beta(1)
+                    F.append(df1)
+                    F.append(df11)
+                    F.append(df111)
+                    N1, N2 = df11.get_node(), df111.get_node()
+                    E.append(N1)
+                    template[n_darts-1, len(E)-1] = nodes_scores[N1.id]
+                    template[n_darts-1, deep + len(E)-1] = 1
+                    E.append(N2)
+                    template[n_darts - 1, len(E)-1] = nodes_scores[N2.id]
+                    template[n_darts - 1, deep + len(E)-1] = 1
+                else:
+                    E.extend([None,None])
+                    #template[n_darts - 1, len(E) - 1] = -500 # dummy vertices are assigned to -500
+                    #template[n_darts - 1, len(E) - 2] = -500 # dummy vertices are assigned to -500
 
     template = template[:n_darts, :]
     return template, dart_ids
@@ -145,18 +145,22 @@ def step(self, action: np.ndarray):
             # An episode is done if the actual score is the same as the ideal
             next_nodes_score, self.next_mesh_score, _, next_nodes_adjacency = global_score(self.mesh)
             terminated = np.array_equal(self._ideal_score, self.next_mesh_score)
-            mesh_reward = (self._mesh_score - self.next_mesh_score)*10
-            reward = mesh_reward
+            if terminated:
+                mesh_reward = (self._mesh_score - self.next_mesh_score)*10
+                reward = mesh_reward
+            else:
+                mesh_reward = (self._mesh_score - self.next_mesh_score)*10
+                reward = mesh_reward
             self._nodes_scores, self._mesh_score, self._nodes_adjacency = next_nodes_score, self.next_mesh_score, next_nodes_adjacency
             self.observation = self._get_obs()
             self.nb_invalid_actions = 0
         elif not valid_topo:
-            reward = -10
+            reward = -3
             mesh_reward = 0
             terminated = False
             self.nb_invalid_actions += 1
         elif not valid_geo:
-            mesh_reward = 0
+            mesh_reward = -1
             terminated = False
             reward = 0
             self.nb_invalid_actions += 1
 
@@ -137,10 +137,11 @@ def learn(self, critic_loss):
 
 
 class PPO:
-    def __init__(self, env, lr, gamma, nb_iterations, nb_episodes_per_iteration, nb_epochs, batch_size):
+    def __init__(self, env, obs_size, max_steps, lr, gamma, nb_iterations, nb_episodes_per_iteration, nb_epochs, batch_size):
         self.env = env
-        self.actor = Actor(env, 10*8, 4*10, lr=0.0001)
-        self.critic = Critic(8*10, lr=0.0001)
+        self.max_steps = max_steps
+        self.actor = Actor(self.env, obs_size, 4*10, lr=lr)
+        self.critic = Critic(obs_size, lr=lr)
         self.lr = lr
         self.gamma = gamma
         self.nb_iterations = nb_iterations
@@ -205,6 +206,7 @@ def learn(self, writer):
         rewards = []
         wins = []
         len_ep = []
+        valid_actions = []
         global_step = 0
         nb_episodes = 0
 
@@ -218,11 +220,12 @@ def learn(self, writer):
                     trajectory = []
                     ep_reward = 0
                     ep_mesh_reward = 0
+                    ep_valid_actions = 0
                     ideal_reward = info["mesh_ideal_rewards"]
                     G = 0
                     done = False
                     step = 0
-                    while step < 40:
+                    while step < self.max_steps:
                         state = copy.deepcopy(info["mesh"])
                         obs = next_obs
                         action, prob = self.actor.select_action(obs, info)
@@ -233,6 +236,7 @@ def learn(self, writer):
                         next_obs, reward, terminated, truncated, info = self.env.step(gym_action)
                         ep_reward += reward
                         ep_mesh_reward += info["mesh_reward"]
+                        ep_valid_actions += info["valid_action"]
                         G = info["mesh_reward"] + 0.9 * G
                         if terminated:
                             if truncated:
@@ -247,14 +251,20 @@ def learn(self, writer):
                         step += 1
                     if len(trajectory) != 0:
                         rewards.append(ep_reward)
+                        valid_actions.append(ep_valid_actions)
                         rollouts.append(trajectory)
                         dataset.extend(trajectory)
                         len_ep.append(len(trajectory))
                     nb_episodes += 1
                     writer.add_scalar("episode_reward", ep_reward, nb_episodes)
                     writer.add_scalar("episode_mesh_reward", ep_mesh_reward, nb_episodes)
-                    writer.add_scalar("normalized return", (ep_mesh_reward/ideal_reward), nb_episodes)
-                    writer.add_scalar("len_episodes", len(trajectory), nb_episodes)
+                    if ideal_reward !=0 :
+                        writer.add_scalar("normalized return", (ep_mesh_reward/ideal_reward), nb_episodes)
+                    else :
+                        writer.add_scalar("normalized return", ep_mesh_reward, nb_episodes)
+                    if len(trajectory) != 0:
+                        writer.add_scalar("len_episodes", len(trajectory), nb_episodes)
+                        writer.add_scalar("valid_actions", ep_valid_actions*100/len(trajectory), nb_episodes)
 
                 self.train(dataset)
 
 
@@ -0,0 +1,127 @@
+from numpy import ndarray
+
+import gymnasium as gym
+import json
+import torch
+from torch.distributions import Categorical
+from model_RL.PPO_model_pers import Actor
+from stable_baselines3 import PPO
+from mesh_model.mesh_analysis.global_mesh_analysis import global_score
+from mesh_model.mesh_struct.mesh import Mesh
+from mesh_model.reader import read_gmsh
+from view.mesh_plotter.create_plots import plot_test_results
+from view.mesh_plotter.mesh_plots import plot_dataset
+from environment.actions.smoothing import smoothing_mean
+import mesh_model.random_quadmesh as QM
+from environment.gymnasium_envs.quadmesh_env.envs.quadmesh import QuadMeshEnv
+import numpy as np
+import copy
+from tqdm import tqdm
+
+
+def testPolicy(
+        actor,
+        n_eval_episodes: int,
+        env_config,
+        dataset: list[Mesh]
+) -> tuple[ndarray, ndarray, ndarray, ndarray, list[Mesh]]:
+    """
+    Tests policy on each mesh of a dataset with n_eval_episodes.
+    :param policy: the policy to test
+    :param n_eval_episodes: number of evaluation episodes on each mesh
+    :param dataset: list of mesh objects
+    :param max_steps: max steps to evaluate
+    :return: average length of evaluation episodes, number of wins,average reward per mesh, dataset with the modified meshes
+    """
+    print('Testing policy')
+    avg_length = np.zeros(len(dataset))
+    avg_mesh_rewards = np.zeros(len(dataset))
+    avg_normalized_return = np.zeros(len(dataset))
+    nb_wins = np.zeros(len(dataset))
+    final_meshes = []
+    for i, mesh in tqdm(enumerate(dataset, 1)):
+        best_mesh = mesh
+        env = gym.make(
+            env_config["env_name"],
+            max_episode_steps=30,
+            mesh = mesh,
+            n_darts_selected=env_config["n_darts_selected"],
+            deep= env_config["deep"],
+            action_restriction=env_config["action_restriction"],
+            with_degree_obs=env_config["with_degree_observation"]
+        )
+        for _ in range(n_eval_episodes):
+            terminated = False
+            truncated = False
+            ep_mesh_rewards: int = 0
+            ep_length: int = 0
+            observation, info = env.reset(options={"mesh": copy.deepcopy(mesh)})
+            while terminated == False and truncated == False:
+                obs = torch.tensor(observation.flatten(), dtype=torch.float32)
+                pmf = actor.forward(obs)
+                dist = Categorical(pmf)
+                action = dist.sample()
+                action = action.tolist()
+                action_dart = int(action / 4)
+                action_type = action % 4
+                gymnasium_action = [action_type, action_dart]
+                if action is None:
+                    env.terminal = True
+                    break
+                observation, reward, terminated, truncated, info = env.step(gymnasium_action)
+                ep_mesh_rewards += info['mesh_reward']
+                ep_length += 1
+            if terminated:
+                nb_wins[i-1] += 1
+            if isBetterMesh(best_mesh, info['mesh']):
+                best_mesh = copy.deepcopy(info['mesh'])
+            avg_length[i-1] += ep_length
+            avg_mesh_rewards[i-1] += ep_mesh_rewards
+            avg_normalized_return[i-1] += 0 if info['mesh_ideal_rewards'] == 0 else ep_mesh_rewards/info['mesh_ideal_rewards']
+        final_meshes.append(best_mesh)
+        avg_length[i-1] = avg_length[i-1]/n_eval_episodes
+        avg_mesh_rewards[i-1] = avg_mesh_rewards[i-1]/n_eval_episodes
+        avg_normalized_return[i-1] = avg_normalized_return[i-1]/n_eval_episodes
+    return avg_length, nb_wins, avg_mesh_rewards, avg_normalized_return, final_meshes
+
+
+def isBetterPolicy(actual_best_policy, policy_to_test):
+    if actual_best_policy is None:
+        return True
+
+def isBetterMesh(best_mesh, actual_mesh):
+    if best_mesh is None or global_score(best_mesh)[1] > global_score(actual_mesh)[1]:
+        return True
+    else:
+        return False
+
+if __name__ == '__main__':
+
+
+    #Create a dataset of 9 meshes
+    mesh = read_gmsh("../mesh_files/medium_quad.msh")
+    dataset = [mesh for _ in range(9)]
+    with open("../environment/environment_config.json", "r") as f:
+        env_config = json.load(f)
+    plot_dataset(dataset)
+
+    env = gym.make(
+        env_config["env_name"],
+        mesh=mesh,
+        max_episode_steps=env_config["max_episode_steps"],
+        n_darts_selected=env_config["n_darts_selected"],
+        deep=env_config["deep"],
+        action_restriction=env_config["action_restriction"],
+        with_degree_obs=env_config["with_degree_observation"]
+    )
+
+    #Load the model
+    actor = Actor(env, 10*8, 4*10, lr=0.0001)
+    actor.load_state_dict(torch.load('policy_saved/quad-perso/medium_quad_perso.pth'))
+    avg_steps, avg_wins, avg_rewards, normalized_return, final_meshes = testPolicy(actor, 15, env_config, dataset)
+
+    plot_test_results(avg_rewards, avg_wins, avg_steps, normalized_return)
+    plot_dataset(final_meshes)
+    for m in final_meshes:
+        smoothing_mean(m)
+    plot_dataset(final_meshes)
@@ -0,0 +1,63 @@
+import mesh_model.random_trimesh as TM
+import torch
+import json
+import gymnasium as gym
+from environment.trimesh_env import TriMesh
+from model_RL.utilities.actor_critic_networks import Actor
+from mesh_model.reader import read_gmsh
+from view.mesh_plotter.create_plots import plot_test_results
+from view.mesh_plotter.mesh_plots import plot_dataset
+from environment.actions.smoothing import smoothing_mean
+from model_RL.evaluate_model import testPolicy
+
+LOCAL_MESH_FEAT = 0
+
+
+def exploit():
+    mesh_size = 12
+    feature = LOCAL_MESH_FEAT
+
+    dataset = [TM.random_mesh(30) for _ in range(9)]
+    plot_dataset(dataset)
+
+    env = TriMesh(None, mesh_size, max_steps=60, feat=feature)
+
+
+    actor = Actor(env, 30, 15, lr=0.0001)
+    actor.load_state_dict(torch.load('policy_saved/actor_network.pth'))
+
+    avg_steps, avg_wins, avg_rewards, final_meshes = testPolicy(actor, 30, dataset, 100)
+
+    if avg_steps is not None:
+        plot_test_results(avg_rewards, avg_wins, avg_steps)
+    plot_dataset(final_meshes)
+
+if __name__ == '__main__':
+    mesh = read_gmsh("../mesh_files/t1_quad.msh")
+
+    #Create a dataset of 9 meshes
+    dataset = [mesh for _ in range(9)]
+    with open("../environment/environment_config.json", "r") as f:
+        env_config = json.load(f)
+    plot_dataset(dataset)
+
+    env = gym.make(
+        env_config["env_name"],
+        mesh=read_gmsh("mesh_files/simple_quad.msh"),
+        max_episode_steps=env_config["max_episode_steps"],
+        n_darts_selected=env_config["n_darts_selected"],
+        deep=env_config["deep"],
+        action_restriction=env_config["action_restriction"],
+        with_degree_obs=env_config["with_degree_observation"]
+    )
+
+    #Load the model
+    actor = Actor(env, 10*8, 4*10, lr=0.0001)
+    actor.load_state_dict(torch.load('policy_saved/actor_network.pth'))
+    avg_steps, avg_wins, avg_rewards, final_meshes = testPolicy(actor, 15, dataset, 20)
+
+    plot_test_results(avg_rewards, avg_wins, avg_steps)
+    plot_dataset(final_meshes)
+    for m in final_meshes:
+        smoothing_mean(m)
+    plot_dataset(final_meshes)