Skip to content

Commit 126063a

Browse files
committed
fix an issue with obs class having kwargs and show an example on how to use grid2op specialized with custom backend
Signed-off-by: DONNOT Benjamin <[email protected]>
1 parent 9bba19d commit 126063a

File tree

11 files changed

+509
-181
lines changed

11 files changed

+509
-181
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from encodings.punycode import T
2+
from grid2op.Agent import BaseAgent
3+
from grid2op.Action import ActionSpace
4+
from grid2op.Environment import BaseEnv
5+
# NB: this agent is, from grid2o point of view
6+
# "cheating" because it can access the env.backend._grid
7+
# attribute when acting (so it does access private information).
8+
# This is for a now a "hack" and will be better handled in
9+
# future grid2op versions.
10+
# This means that, for now, you cannot use properly with "simulate"
11+
# for example
12+
13+
# NB: as PST are not handled (yet, any contribution welcomed) in grid2op
14+
# this agent is for now limited to environment using pandapower backend.
15+
16+
17+
class AgentRandomPST(BaseAgent):
18+
def __init__(self,
19+
action_space: ActionSpace,
20+
env: BaseEnv):
21+
super().__init__(action_space)
22+
self._backend = env.backend._grid
23+
24+
def act(self, observation, reward, done = False):
25+
26+
# perform a "random" PST action
27+
which_trafo = self.space_prng.randint(self._backend._grid.trafo["tap_phase_shifter"].sum())
28+
trafo_pst_ids = self._backend._grid.trafo["tap_phase_shifter"].values.nonzero()[0]
29+
trafo_id = trafo_pst_ids[which_trafo]
30+
31+
which_tap = self.space_prng.choice([-2, -1, 0, 1, 2], size=1)[0]
32+
self._backend._grid.trafo.loc[trafo_id, "tap_pos"] = which_tap
33+
print(f"{trafo_id=}, {which_tap=}")
34+
# return the base grid2op action (do nothing in this case)
35+
return super().act(observation, reward, done)
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import copy
2+
from typing import Literal
3+
import numpy as np
4+
from grid2op.Environment.baseEnv import BaseEnv
5+
from grid2op.Observation import CompleteObservation
6+
from grid2op.Backend import Backend
7+
from grid2op.dtypes import dt_float
8+
9+
10+
class ObsWithN1(CompleteObservation):
11+
# attributes that will be saved when action is
12+
# serialized as a numpy vector
13+
attr_list_vect = copy.deepcopy(CompleteObservation.attr_list_vect)
14+
attr_list_vect.append("n1_vals")
15+
16+
# attributes that will also be used when action is
17+
# serialized as json
18+
attr_list_json = copy.deepcopy(CompleteObservation.attr_list_json)
19+
20+
# attributes that will be copied
21+
# when observation is copied
22+
attr_vect_cpy = copy.deepcopy(CompleteObservation.attr_vect_cpy)
23+
attr_vect_cpy.append("n1_vals")
24+
25+
def __init__(self,
26+
obs_env=None,
27+
action_helper=None,
28+
random_prng=None,
29+
kwargs_env=None,
30+
n1_li=None,
31+
reduce_n1: Literal["max", "count", "sum"]="max",
32+
compute_algo: Literal["ac", "dc"]="ac"):
33+
super().__init__(obs_env,
34+
action_helper,
35+
random_prng,
36+
kwargs_env,
37+
n1_li=n1_li,
38+
reduce_n1=reduce_n1,
39+
compute_algo=compute_algo)
40+
41+
# list of lines for which to compute the n-1
42+
if n1_li is None:
43+
# all n-1 will be used
44+
self._n1_li = np.arange(type(self).n_line, dtype=int)
45+
else:
46+
self._n1_li = []
47+
for el in n1_li:
48+
if isinstance(el, str):
49+
# user provided a line name
50+
el = type(self).get_line_info(line_name=el)[0]
51+
else:
52+
# user provided a line id
53+
el = int(el)
54+
self._n1_li.append(el)
55+
# convert it to np array
56+
self._n1_li = np.array(self._n1_li, dtype=int)
57+
58+
# function to aggregate all information for one n-1
59+
# into a single scalar
60+
self._fun_reduce_n1 = reduce_n1
61+
62+
# computation method (AC or DC)
63+
self._compute_algo = compute_algo
64+
65+
# added atributes
66+
self.n1_vals = np.empty(self._n1_li.shape, dtype=dt_float)
67+
68+
def update(self, env: BaseEnv, with_forecast=True):
69+
# update standard attribute
70+
super().update(env, with_forecast=with_forecast)
71+
72+
# update n1 attribute (specific to this usecase)
73+
for id_, line_id in enumerate(self._n1_li):
74+
this_backend : Backend = env.backend.copy_public()
75+
this_backend._disconnect_line(line_id)
76+
if self._compute_algo == "ac":
77+
conv, exc_ = this_backend.runpf(is_dc=False)
78+
elif self._compute_algo == "dc":
79+
conv, exc_ = this_backend.runpf(is_dc=True)
80+
else:
81+
raise RuntimeError(f"Unknown algorithm method '{self._compute_algo}', "
82+
"use one of 'ac' or 'dc'")
83+
if not conv:
84+
# powerflow has diverged
85+
self.n1_vals[id_] = np.nan
86+
continue
87+
88+
rel_flow = this_backend.get_relative_flow()[self._n1_li]
89+
is_finite = np.isfinite(rel_flow)
90+
is_infinite = ~is_finite
91+
92+
if self._fun_reduce_n1 == "max":
93+
if is_infinite.any():
94+
self.n1_vals[id_] = 5. # some kind of "gentle" max
95+
else:
96+
self.n1_vals[id_] = rel_flow[is_finite].max()
97+
elif self._fun_reduce_n1 == "count":
98+
self.n1_vals[id_] = (rel_flow[is_finite] >= 1.).sum()
99+
self.n1_vals[id_] += is_infinite.sum()
100+
elif self._fun_reduce_n1 == "sum":
101+
self.n1_vals[id_] = rel_flow[is_finite].sum()
102+
self.n1_vals[id_] += is_infinite.sum() * 5.
103+
else:
104+
raise RuntimeError("Unknown way to summarize information for each n1, found "
105+
f"'{self._fun_reduce_n1}', please use one of "
106+
"'max', 'count' or 'sum'")
107+
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
from typing import Literal
2+
import numpy as np
3+
from grid2op.Reward import BaseReward
4+
from grid2op.Environment import BaseEnv
5+
from grid2op.Backend import Backend
6+
7+
8+
class N1Reward(BaseReward):
9+
def __init__(self,
10+
logger=None,
11+
n1_li=None,
12+
reduce_n1: Literal["max", "count", "sum"]="max",
13+
reduce_reward: Literal["max", "count", "sum"]="max",
14+
compute_algo: Literal["ac", "dc"]="ac"):
15+
super().__init__(logger)
16+
self.n1_li_init = n1_li
17+
self._n1_li = None
18+
19+
# function to aggregate all information for one n-1
20+
# into a single scalar
21+
self._fun_reduce_n1 = reduce_n1
22+
23+
# function to aggregate all reward (for all n-1)
24+
self._fun_reduce_reward = reduce_reward
25+
26+
# computation method (AC or DC)
27+
self._compute_algo = compute_algo
28+
29+
def initialize(self, env: BaseEnv):
30+
super().initialize(env)
31+
if self.n1_li_init is None:
32+
self._n1_li = np.arange(type(env).n_line)
33+
else:
34+
self._n1_li = []
35+
for el in self.n1_li_init:
36+
if isinstance(el, str):
37+
# user provided a line name
38+
el = type(self).get_line_info(line_name=el)[0]
39+
else:
40+
# user provided a line id
41+
el = int(el)
42+
self._n1_li.append(el)
43+
# convert it to np array
44+
self._n1_li = np.array(self._n1_li, dtype=int)
45+
46+
def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
47+
if is_done and (not has_error and not is_illegal and not is_ambiguous):
48+
# episode terminated without error
49+
return 1. # max reward
50+
if has_error:
51+
# episode truncated
52+
return -10. # min reward
53+
54+
# TODO optimization here
55+
# if everything is the same in obs (provided that obs is ObsWithN1) and
56+
# in self (same lines simulated, same function to get the max)
57+
# then use:
58+
# obs = env.get_obs(_do_copy=False)
59+
# and obs.n1_vals instead of recomputing powerflows
60+
61+
# perform the n1 computation
62+
n1_vals = np.zeros(self._n1_li.shape, dtype=float)
63+
for id_, line_id in enumerate(self._n1_li):
64+
this_backend : Backend = env.backend.copy_public()
65+
this_backend._disconnect_line(line_id)
66+
if self._compute_algo == "ac":
67+
conv, exc_ = this_backend.runpf(is_dc=False)
68+
elif self._compute_algo == "dc":
69+
conv, exc_ = this_backend.runpf(is_dc=True)
70+
else:
71+
raise RuntimeError(f"Unknown algorithm method '{self._compute_algo}', "
72+
"use one of 'ac' or 'dc'")
73+
if not conv:
74+
# powerflow has diverged
75+
n1_vals[id_] = 5.
76+
continue
77+
78+
rel_flow = this_backend.get_relative_flow()[self._n1_li]
79+
is_finite = np.isfinite(rel_flow)
80+
is_infinite = ~is_finite
81+
82+
if self._fun_reduce_n1 == "max":
83+
if is_infinite.any():
84+
n1_vals[id_] = 5. # some kind of infinite
85+
else:
86+
n1_vals[id_] = rel_flow[is_finite].max()
87+
elif self._fun_reduce_n1 == "count":
88+
n1_vals[id_] = (rel_flow[is_finite] >= 1.).sum()
89+
n1_vals[id_] += (~is_finite).sum()
90+
elif self._fun_reduce_n1 == "sum":
91+
n1_vals[id_] = rel_flow[is_finite].sum()
92+
n1_vals[id_] += (~is_finite).sum() * 5.
93+
else:
94+
raise RuntimeError("Unknown way to summarize information for each n1, found "
95+
f"'{self._fun_reduce_n1}', please use one of "
96+
"'max', 'count' or 'sum'")
97+
98+
# summarize the n-1 information for all n1 into
99+
# one single reward score
100+
if self._fun_reduce_reward == "max":
101+
return -float(n1_vals.max())
102+
if self._fun_reduce_reward == "count":
103+
res = (n1_vals >= 1.).sum()
104+
return -float(res)
105+
if self._fun_reduce_reward == "sum":
106+
res = n1_vals.sum()
107+
return -res
108+
raise RuntimeError("Unknown way to summarize n1 information, found "
109+
f"'{self._fun_reduce_reward}', please use one of "
110+
"'max', 'count' or 'sum'")
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
2+
import numpy as np
3+
import grid2op
4+
from _obs_with_n1 import ObsWithN1
5+
from _reward_n1 import N1Reward
6+
from _agent_with_pst_action import AgentRandomPST
7+
8+
9+
li_lines = np.arange(5)
10+
# env where PST will be modified
11+
env = grid2op.make("l2rpn_idf_2023",
12+
test=True,
13+
observation_class=ObsWithN1,
14+
kwargs_observation={"n1_li": 1 * li_lines},
15+
reward_class=N1Reward(n1_li= 1 * li_lines))
16+
17+
# small hack to pretend there are PST in the grid
18+
# this is totally useless if the grid already has PST
19+
for grid in [env.backend._grid,
20+
env.backend._PandaPowerBackend__pp_backend_initial_grid]:
21+
add_PSTs = [0, 1]
22+
grid.trafo.loc[add_PSTs, "tap_step_degree"] = 1.
23+
grid.trafo.loc[add_PSTs, "tap_step_percent"] = 0.
24+
grid.trafo.loc[add_PSTs, "tap_phase_shifter"] = True
25+
grid.trafo.loc[add_PSTs, "tap_pos"] = 0
26+
27+
# regular env without PST modification (as a reference)
28+
env_without_pst = env.copy()
29+
30+
pst_agent = AgentRandomPST(action_space=env.action_space,
31+
env=env)
32+
pst_agent.seed(0)
33+
34+
# initial state
35+
obs = env.reset(seed=0, options={"time serie id": 0})
36+
pst_agent._backend = env.backend # do not forget to synch the agent with the env
37+
38+
obs_without_pst = env_without_pst.reset(seed=0, options={"time serie id": 0})
39+
# check there is no difference
40+
assert (obs.rho - obs_without_pst.rho).max() <= 1e-5
41+
42+
# perform a (random) PST action on one, check effect
43+
# by comparison with the baseline env
44+
pst_action = pst_agent.act(obs, None, None)
45+
# for grid2op, the pst_action is equivalent to the
46+
# "do nothing" action, grid2op has (for now !, this will be improved
47+
# in next release) no way of knowing an "action" took place.
48+
# because the agent action modified the env directly
49+
next_obs, reward, *_ = env.step(pst_action)
50+
next_obs_without_pst, reward_without_pst, *_ = env_without_pst.step(pst_action)
51+
print("Maximum difference (in relative flows) with / without the action: "
52+
f"{(next_obs.rho - next_obs_without_pst.rho).max() * 100.:.2f} % of thermal limit")
53+
print("Difference (in reward flows) with / without the action: "
54+
f"{reward - reward_without_pst}")
55+
56+
# do another action
57+
pst_action = pst_agent.act(obs, None, None)
58+
next_obs,reward, * _ = env.step(pst_action)
59+
next_obs_without_pst, reward_without_pst, *_ = env_without_pst.step(pst_action)
60+
print("Maximum difference (in relative flows) with / without the action: "
61+
f"{(next_obs.rho - next_obs_without_pst.rho).max() * 100.:.2f} % of thermal limit")
62+
print("Difference (in reward flows) with / without the action: "
63+
f"{reward - reward_without_pst}")
64+
65+
# do another action
66+
pst_action = pst_agent.act(obs, None, None)
67+
next_obs, reward, * _ = env.step(pst_action)
68+
next_obs_without_pst, reward_without_pst, *_ = env_without_pst.step(pst_action)
69+
print("Maximum difference (in relative flows) with / without the action: "
70+
f"{(next_obs.rho - next_obs_without_pst.rho).max() * 100.:.2f} % of thermal limit")
71+
print("Difference (in reward flows) with / without the action: "
72+
f"{reward - reward_without_pst}")
73+
74+
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import numpy as np
2+
import grid2op
3+
from _obs_with_n1 import ObsWithN1
4+
from _reward_n1 import N1Reward
5+
6+
# default config
7+
print("Default configuration, all lines disconnected")
8+
env = grid2op.make("l2rpn_case14_sandbox",
9+
observation_class=ObsWithN1,
10+
reward_class=N1Reward())
11+
12+
obs = env.reset()
13+
print(f"{obs.n1_vals }")
14+
obs, reward, done, info = env.step(env.action_space())
15+
print(f"{obs.n1_vals }")
16+
print(f"{reward = }")
17+
18+
# with a specific list of n1 to simulate
19+
print("Custom configuration, only line id 0..5 will be disconnected")
20+
li_lines = np.arange(5)
21+
env = grid2op.make("l2rpn_case14_sandbox",
22+
observation_class=ObsWithN1,
23+
kwargs_observation={"n1_li": 1 * li_lines},
24+
reward_class=N1Reward(n1_li= 1 * li_lines))
25+
26+
obs = env.reset()
27+
print(f"{obs.n1_vals }")
28+
obs, reward, done, info = env.step(env.action_space())
29+
print(f"{obs.n1_vals }")
30+
print(f"{reward = }")
31+
32+
# NB: reward (score) and observation are two different objects
33+
# so you can have different list of simulated n-1 when you
34+
# train the agent and when you score it.
35+
36+
# NB: reward (score) and observation are independant:
37+
# so you can give the agent the "sum" or "max" of all
38+
# flows for each n-1
39+
# but score it with maximum flows for example.
40+
41+
# NB: as of now, no optimization are done in the "N1Reward"
42+
# to reuse the n-1 computation of the Observation (and vice versa)
43+
# this might be time consuming. An easy optimization to perform
44+
# in the reward class, would be to reuse the information in
45+
# env.get_obs(_do_copy=False).n1_vals

0 commit comments

Comments
 (0)