Grid2op
diff --git a/‎examples/backend_dependant_code/_agent_with_pst_action.py‎
Lines changed: 35 additions & 0 deletions b/‎examples/backend_dependant_code/_agent_with_pst_action.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎examples/backend_dependant_code/_obs_with_n1.py‎
Lines changed: 107 additions & 0 deletions b/‎examples/backend_dependant_code/_obs_with_n1.py‎
Lines changed: 107 additions & 0 deletions
diff --git a/‎examples/backend_dependant_code/_reward_n1.py‎
Lines changed: 110 additions & 0 deletions b/‎examples/backend_dependant_code/_reward_n1.py‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎examples/backend_dependant_code/run_env_with_agent.py‎
Lines changed: 74 additions & 0 deletions b/‎examples/backend_dependant_code/run_env_with_agent.py‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎examples/backend_dependant_code/run_env_without_agent.py‎
Lines changed: 45 additions & 0 deletions b/‎examples/backend_dependant_code/run_env_without_agent.py‎
Lines changed: 45 additions & 0 deletions
@@ -0,0 +1,35 @@
+from encodings.punycode import T
+from grid2op.Agent import BaseAgent
+from grid2op.Action import ActionSpace
+from grid2op.Environment import BaseEnv
+# NB: this agent is, from grid2o point of view
+# "cheating" because it can access the env.backend._grid
+# attribute when acting (so it does access private information).
+# This is for a now a "hack" and will be better handled in
+# future grid2op versions.
+# This means that, for now, you cannot use properly with "simulate" 
+# for example
+
+# NB: as PST are not handled (yet, any contribution welcomed) in grid2op
+# this agent is for now limited to environment using pandapower backend.
+
+
+class AgentRandomPST(BaseAgent):
+    def __init__(self,
+                 action_space: ActionSpace,
+                 env: BaseEnv):
+        super().__init__(action_space)
+        self._backend = env.backend._grid
+    
+    def act(self, observation, reward, done = False):
+        
+        # perform a "random" PST action
+        which_trafo = self.space_prng.randint(self._backend._grid.trafo["tap_phase_shifter"].sum())
+        trafo_pst_ids = self._backend._grid.trafo["tap_phase_shifter"].values.nonzero()[0]
+        trafo_id = trafo_pst_ids[which_trafo]
+        
+        which_tap = self.space_prng.choice([-2, -1, 0, 1, 2], size=1)[0]
+        self._backend._grid.trafo.loc[trafo_id, "tap_pos"] = which_tap
+        print(f"{trafo_id=}, {which_tap=}")
+        # return the base grid2op action (do nothing in this case)
+        return super().act(observation, reward, done)
@@ -0,0 +1,107 @@
+import copy
+from typing import Literal
+import numpy as np
+from grid2op.Environment.baseEnv import BaseEnv
+from grid2op.Observation import CompleteObservation
+from grid2op.Backend import Backend
+from grid2op.dtypes import dt_float
+
+
+class ObsWithN1(CompleteObservation):
+    # attributes that will be saved when action is
+    # serialized as a numpy vector
+    attr_list_vect = copy.deepcopy(CompleteObservation.attr_list_vect)
+    attr_list_vect.append("n1_vals")
+    
+    # attributes that will also be used when action is
+    # serialized as json
+    attr_list_json = copy.deepcopy(CompleteObservation.attr_list_json)
+    
+    # attributes that will be copied
+    # when observation is copied
+    attr_vect_cpy  = copy.deepcopy(CompleteObservation.attr_vect_cpy)
+    attr_vect_cpy.append("n1_vals")
+    
+    def __init__(self,
+                 obs_env=None,
+                 action_helper=None,
+                 random_prng=None,
+                 kwargs_env=None,
+                 n1_li=None,
+                 reduce_n1: Literal["max", "count", "sum"]="max",
+                 compute_algo: Literal["ac", "dc"]="ac"):
+        super().__init__(obs_env,
+                         action_helper,
+                         random_prng,
+                         kwargs_env,
+                         n1_li=n1_li,
+                         reduce_n1=reduce_n1,
+                         compute_algo=compute_algo)
+        
+        # list of lines for which to compute the n-1
+        if n1_li is None:
+            # all n-1 will be used
+            self._n1_li = np.arange(type(self).n_line, dtype=int)
+        else:
+            self._n1_li = []
+            for el in n1_li:
+                if isinstance(el, str):
+                    # user provided a line name
+                    el = type(self).get_line_info(line_name=el)[0]
+                else:
+                    # user provided a line id
+                    el = int(el)
+                self._n1_li.append(el)
+            # convert it to np array
+            self._n1_li = np.array(self._n1_li, dtype=int)
+        
+        # function to aggregate all information for one n-1 
+        # into a single scalar
+        self._fun_reduce_n1 = reduce_n1
+        
+        # computation method (AC or DC)
+        self._compute_algo = compute_algo
+        
+        # added atributes
+        self.n1_vals = np.empty(self._n1_li.shape, dtype=dt_float)
+            
+    def update(self, env: BaseEnv, with_forecast=True):
+        # update standard attribute
+        super().update(env, with_forecast=with_forecast)
+        
+        # update n1 attribute (specific to this usecase)
+        for id_, line_id in enumerate(self._n1_li):
+            this_backend : Backend = env.backend.copy_public()
+            this_backend._disconnect_line(line_id)
+            if self._compute_algo == "ac":
+                conv, exc_ = this_backend.runpf(is_dc=False)
+            elif self._compute_algo == "dc":
+                conv, exc_ = this_backend.runpf(is_dc=True)
+            else:
+                raise RuntimeError(f"Unknown algorithm method '{self._compute_algo}', "
+                                   "use one of 'ac' or 'dc'")
+            if not conv:
+                # powerflow has diverged
+                self.n1_vals[id_] = np.nan
+                continue
+            
+            rel_flow = this_backend.get_relative_flow()[self._n1_li]
+            is_finite = np.isfinite(rel_flow)
+            is_infinite = ~is_finite
+            
+            if self._fun_reduce_n1 == "max":
+                if is_infinite.any():
+                    self.n1_vals[id_] = 5. # some kind of "gentle" max
+                else:
+                    self.n1_vals[id_] = rel_flow[is_finite].max()
+            elif self._fun_reduce_n1 == "count":
+                self.n1_vals[id_] = (rel_flow[is_finite] >= 1.).sum()
+                self.n1_vals[id_] += is_infinite.sum() 
+            elif self._fun_reduce_n1 == "sum":
+                self.n1_vals[id_] = rel_flow[is_finite].sum()
+                self.n1_vals[id_] += is_infinite.sum() * 5.
+            else:
+                raise RuntimeError("Unknown way to summarize information for each n1, found "
+                                   f"'{self._fun_reduce_n1}', please use one of "
+                                   "'max', 'count' or 'sum'")
+                
@@ -0,0 +1,110 @@
+from typing import Literal
+import numpy as np
+from grid2op.Reward import BaseReward
+from grid2op.Environment import BaseEnv
+from grid2op.Backend import Backend
+
+
+class N1Reward(BaseReward):
+    def __init__(self,
+                 logger=None,
+                 n1_li=None,
+                 reduce_n1: Literal["max", "count", "sum"]="max",
+                 reduce_reward: Literal["max", "count", "sum"]="max",
+                 compute_algo: Literal["ac", "dc"]="ac"):
+        super().__init__(logger)
+        self.n1_li_init = n1_li
+        self._n1_li = None
+        
+        # function to aggregate all information for one n-1 
+        # into a single scalar
+        self._fun_reduce_n1 = reduce_n1
+        
+        # function to aggregate all reward (for all n-1)
+        self._fun_reduce_reward = reduce_reward
+        
+        # computation method (AC or DC)
+        self._compute_algo = compute_algo
+    
+    def initialize(self, env: BaseEnv):
+        super().initialize(env)
+        if self.n1_li_init is None:
+            self._n1_li = np.arange(type(env).n_line)
+        else:
+            self._n1_li = []
+            for el in self.n1_li_init:
+                if isinstance(el, str):
+                    # user provided a line name
+                    el = type(self).get_line_info(line_name=el)[0]
+                else:
+                    # user provided a line id
+                    el = int(el)
+                self._n1_li.append(el)
+            # convert it to np array
+            self._n1_li = np.array(self._n1_li, dtype=int)
+            
+    def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
+        if is_done and (not has_error and not is_illegal and not is_ambiguous):
+            # episode terminated without error
+            return 1. # max reward 
+        if has_error:
+            # episode truncated
+            return -10. # min reward
+        
+        # TODO optimization here
+        # if everything is the same in obs (provided that obs is ObsWithN1) and
+        # in self (same lines simulated, same function to get the max)
+        # then use:
+        # obs = env.get_obs(_do_copy=False)
+        # and obs.n1_vals instead of recomputing powerflows
+        
+        # perform the n1 computation
+        n1_vals = np.zeros(self._n1_li.shape, dtype=float)
+        for id_, line_id in enumerate(self._n1_li):
+            this_backend : Backend = env.backend.copy_public()
+            this_backend._disconnect_line(line_id)
+            if self._compute_algo == "ac":
+                conv, exc_ = this_backend.runpf(is_dc=False)
+            elif self._compute_algo == "dc":
+                conv, exc_ = this_backend.runpf(is_dc=True)
+            else:
+                raise RuntimeError(f"Unknown algorithm method '{self._compute_algo}', "
+                                   "use one of 'ac' or 'dc'")
+            if not conv:
+                # powerflow has diverged
+                n1_vals[id_] = 5.
+                continue
+            
+            rel_flow = this_backend.get_relative_flow()[self._n1_li]
+            is_finite = np.isfinite(rel_flow)
+            is_infinite = ~is_finite
+            
+            if self._fun_reduce_n1 == "max":
+                if is_infinite.any():
+                    n1_vals[id_] = 5.  # some kind of infinite
+                else:
+                    n1_vals[id_] = rel_flow[is_finite].max()
+            elif self._fun_reduce_n1 == "count":
+                n1_vals[id_] = (rel_flow[is_finite] >= 1.).sum()
+                n1_vals[id_] += (~is_finite).sum() 
+            elif self._fun_reduce_n1 == "sum":
+                n1_vals[id_] = rel_flow[is_finite].sum()
+                n1_vals[id_] += (~is_finite).sum() * 5.
+            else:
+                raise RuntimeError("Unknown way to summarize information for each n1, found "
+                                   f"'{self._fun_reduce_n1}', please use one of "
+                                   "'max', 'count' or 'sum'")
+            
+        # summarize the n-1 information  for all n1 into 
+        # one single reward score          
+        if self._fun_reduce_reward == "max":
+            return -float(n1_vals.max())
+        if self._fun_reduce_reward == "count":
+            res = (n1_vals >= 1.).sum()
+            return -float(res)
+        if self._fun_reduce_reward == "sum":
+            res = n1_vals.sum()
+            return -res
+        raise RuntimeError("Unknown way to summarize n1 information, found "
+                            f"'{self._fun_reduce_reward}', please use one of "
+                            "'max', 'count' or 'sum'")
@@ -0,0 +1,74 @@
+
+import numpy as np
+import grid2op
+from _obs_with_n1 import ObsWithN1
+from _reward_n1 import N1Reward
+from _agent_with_pst_action import AgentRandomPST
+
+
+li_lines = np.arange(5)
+# env where PST will be modified
+env = grid2op.make("l2rpn_idf_2023",
+                   test=True, 
+                   observation_class=ObsWithN1,
+                   kwargs_observation={"n1_li": 1 * li_lines},
+                   reward_class=N1Reward(n1_li= 1 * li_lines))
+
+# small hack to pretend there are PST in the grid
+# this is totally useless if the grid already has PST
+for grid in [env.backend._grid,
+             env.backend._PandaPowerBackend__pp_backend_initial_grid]: 
+    add_PSTs = [0, 1]
+    grid.trafo.loc[add_PSTs, "tap_step_degree"] = 1.
+    grid.trafo.loc[add_PSTs, "tap_step_percent"] = 0.
+    grid.trafo.loc[add_PSTs, "tap_phase_shifter"] = True
+    grid.trafo.loc[add_PSTs, "tap_pos"] = 0
+    
+# regular env without PST modification (as a reference)
+env_without_pst = env.copy()
+
+pst_agent = AgentRandomPST(action_space=env.action_space,
+                           env=env)
+pst_agent.seed(0)
+
+# initial state
+obs = env.reset(seed=0, options={"time serie id": 0})
+pst_agent._backend = env.backend  # do not forget to synch the agent with the env
+
+obs_without_pst = env_without_pst.reset(seed=0, options={"time serie id": 0})
+# check there is no difference
+assert (obs.rho - obs_without_pst.rho).max() <= 1e-5
+
+# perform a (random) PST action on one, check effect 
+# by comparison with the baseline env
+pst_action = pst_agent.act(obs, None, None)
+# for grid2op, the pst_action is equivalent to the
+# "do nothing" action, grid2op has (for now !, this will be improved
+# in next release) no way of knowing an "action" took place.
+# because the agent action modified the env directly
+next_obs, reward, *_ = env.step(pst_action)
+next_obs_without_pst, reward_without_pst, *_ = env_without_pst.step(pst_action)
+print("Maximum difference (in relative flows) with / without the action: "
+      f"{(next_obs.rho - next_obs_without_pst.rho).max() * 100.:.2f} % of thermal limit")
+print("Difference (in reward flows) with / without the action: "
+      f"{reward - reward_without_pst}")
+
+# do another action
+pst_action = pst_agent.act(obs, None, None)
+next_obs,reward, * _ = env.step(pst_action)
+next_obs_without_pst, reward_without_pst, *_ = env_without_pst.step(pst_action)
+print("Maximum difference (in relative flows) with / without the action: "
+      f"{(next_obs.rho - next_obs_without_pst.rho).max() * 100.:.2f} % of thermal limit")
+print("Difference (in reward flows) with / without the action: "
+      f"{reward - reward_without_pst}")
+
+# do another action
+pst_action = pst_agent.act(obs, None, None)
+next_obs, reward, * _ = env.step(pst_action)
+next_obs_without_pst, reward_without_pst, *_ = env_without_pst.step(pst_action)
+print("Maximum difference (in relative flows) with / without the action: "
+      f"{(next_obs.rho - next_obs_without_pst.rho).max() * 100.:.2f} % of thermal limit")
+print("Difference (in reward flows) with / without the action: "
+      f"{reward - reward_without_pst}")
+
+
@@ -0,0 +1,45 @@
+import numpy as np
+import grid2op
+from _obs_with_n1 import ObsWithN1
+from _reward_n1 import N1Reward
+
+# default config
+print("Default configuration, all lines disconnected")
+env = grid2op.make("l2rpn_case14_sandbox", 
+                   observation_class=ObsWithN1,
+                   reward_class=N1Reward())
+
+obs = env.reset()
+print(f"{obs.n1_vals }")
+obs, reward, done, info = env.step(env.action_space())
+print(f"{obs.n1_vals }")
+print(f"{reward = }")
+
+# with a specific list of n1 to simulate
+print("Custom configuration, only line id 0..5 will be disconnected")
+li_lines = np.arange(5)
+env = grid2op.make("l2rpn_case14_sandbox", 
+                   observation_class=ObsWithN1,
+                   kwargs_observation={"n1_li": 1 * li_lines},
+                   reward_class=N1Reward(n1_li= 1 * li_lines))
+
+obs = env.reset()
+print(f"{obs.n1_vals }")
+obs, reward, done, info = env.step(env.action_space())
+print(f"{obs.n1_vals }")
+print(f"{reward = }")
+
+# NB: reward (score) and observation are two different objects
+# so you can have different list of simulated n-1 when you
+# train the agent and when you score it.
+
+# NB: reward (score) and observation are independant:
+# so you can give the agent the "sum" or "max" of all
+# flows for each n-1
+# but score it with maximum flows for example.
+
+# NB: as of now, no optimization are done in the "N1Reward"
+# to reuse the n-1 computation of the Observation (and vice versa)
+# this might be time consuming. An easy optimization to perform
+# in the reward class, would be to reuse the information in 
+# env.get_obs(_do_copy=False).n1_vals