Skip to content

Commit be6acb2

Browse files
authored
Merge pull request #592 from DEUCE1957/dev_1.10.1
Add: Compact Episode Data (Debugged)
2 parents 5d93858 + 290ca42 commit be6acb2

File tree

6 files changed

+706
-101
lines changed

6 files changed

+706
-101
lines changed
Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
# Copyright (c) 2019-2020, RTE (https://www.rte-france.com)
2+
# Addition by Xavier Weiss (@DEUCE1957)
3+
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
4+
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
5+
# you can obtain one at http://mozilla.org/MPL/2.0/.
6+
# SPDX-License-Identifier: MPL-2.0
7+
# This file is part of Grid2Op, Grid2Op a testbed platform to model sequential decision making in power systems.
8+
import json
9+
import os
10+
import warnings
11+
import copy
12+
import numpy as np
13+
import grid2op
14+
from grid2op.Exceptions import (
15+
Grid2OpException,
16+
EnvError,
17+
IncorrectNumberOfElements,
18+
NonFiniteElement,
19+
)
20+
from grid2op.Action import ActionSpace
21+
from grid2op.Observation import ObservationSpace
22+
23+
from pathlib import Path as p
24+
25+
class CompactEpisodeData():
26+
27+
"""
28+
This module provides a compact way to serialize/deserialize one episode of a Reinforcement Learning (RL) run.
29+
This enables episodes to be replayed, so we can understand the behaviour of the agent.
30+
It is compatible with :class:`EpisodeData` through the "to_dict()" method.
31+
32+
If enabled when using the :class:`Runner`, the :class:`CompactEpisodeData`
33+
will save the information in a structured and compact way.
34+
For each unique environment it will store a folder with:
35+
- "dict_action_space.json"
36+
- "dict_attack_space.json"
37+
- "dict_env_modification.json"
38+
- "dict_observation_space.json"
39+
Then for each episode it stores a single compressed Numpy archive (.npz) file, identified by the chronics ID (e.g. "003").
40+
Inside this archive we find:
41+
- "actions": actions taken by the :class:`grid2op.BaseAgent.BaseAgent`, each row of this numpy 2d-array is a vector representation of the action
42+
taken by the agent at a particular timestep.
43+
- "env_actions": represents the modification of the powergrid by the environment, these modification usually concern hazards, maintenance, as well as modification of the generators production
44+
setpoint or the loads consumption.
45+
- "attacks": actions taken by any opponent present in the RL environment, stored similary to "actions".
46+
- "observations": observations of the class :class:`grid2op.BaseObservation.BaseObservation made by the :class:`grid2op.Agent.BaseAgent` after taking an action, stored as a numpy 2d-array
47+
where each row corresponds to a vector representation of the observation at that timestep. Note this includes the initial timestep, hence this array is 1 row longer than (e.g.) the actionss.
48+
- "rewards": reward received by the :class:`grid2op.Agent.BaseAgent from the :class:`grid2op.Environment` at timestep 't', represented as 1d-array.
49+
- "other_rewards": any other rewards logged by the :class:`grid2op.Environment` (but not necessarily passed to the agent), represented as a 2d-array.
50+
- "disc_lines": gives which lines have been disconnected during the simulation at each time step. The same convention as for "rewards" has been adopted. This means that the powerlines are
51+
disconnected when the :class:`grid2op.Agent.BaseAgent` takes the :class:`grid2op.BaseAction` at timestep 't`.
52+
- "times": gives some information about the processor time spent (in seconds), mainly the time taken by
53+
:class:`grid2op.Agent.BaseAgent` (and especially its method :func:`grid2op.BaseAgent.act`) and amount of time
54+
spent in the :class:`grid2op.Environment.Environment`
55+
All of the above can be read back from disk.
56+
57+
Inputs
58+
----------
59+
environment: :class:`grid2op.Environment`
60+
The environment we are running, contains most of the metadata required to store the episode.
61+
obs: :class:`grid2op.Observation`
62+
The initial observation of the environment in the current episode. Used to store the first observation.
63+
64+
Examples
65+
--------
66+
Here is an example on how to use the :class:`CompactEpisodeData` class outside of the :class:`grid2op.Runner.Runner`.
67+
68+
.. code-block:: python
69+
from pathlib import Path as p
70+
from grid2op.Agent import DoNothingAgent
71+
env = grid2op.make(""rte_case14_realistic")
72+
obs = env.reset()
73+
ep_id = env.chronics_handler.get_name()
74+
data_dir = p.cwd() # CHANGE THIS TO DESIRED LOCATION ON DISK
75+
agent = DoNothingAgent(env.action_space)
76+
reward = 0.0
77+
episode_store = CompactEpisodeData(env, obs)
78+
for t in range(env.max_episode_duration()):
79+
start = time.perf_counter()
80+
act = agent.act(obs, reward)
81+
obs, reward, done, info = env.step(act)
82+
duration = time.perf_counter() - start
83+
episode_store.update(t, env, act, obs, reward, duration, info)
84+
# Store Episode Data to file (compactly)
85+
episode_store.to_disk()
86+
# Load Episode Data from disk by referring to the specific episode ID
87+
episode_store.from_disk(ep_id)
88+
"""
89+
90+
def __init__(self, env, obs, exp_dir, ep_id:str=None):
91+
"""
92+
Creates Dictionary of Numpy Arrays for storing the details of a Grid2Op Episode (actions, observations, etc.).
93+
Pre-allocating the arrays like this is more efficient than appending to a mutable datatype (like a list).
94+
For the initial timestep, an extra observation is stored (the initial state of the Environment).
95+
96+
Args:
97+
env (grid2op.Environment): Current Grid2Op Environment, used to grab static attributes.
98+
obs (grid2op.Observation): Initial Observation (before agent is active)
99+
exp_dir (pathlib.Path): Where experiment data is stored
100+
ep_id (str | None): If provided tries to load previously stored episode from disk.
101+
102+
Returns:
103+
dict<str:obj>: Contains all data to fully represent what happens in an episode
104+
"""
105+
if exp_dir is not None:
106+
self.exp_dir = p(exp_dir)
107+
else:
108+
self.exp_dir = None
109+
self.array_names = ("actions", "env_actions", "attacks", "observations", "rewards", "other_rewards", "disc_lines", "times")
110+
self.space_names = ("observation_space", "action_space", "attack_space", "env_modification_space")
111+
if ep_id is None:
112+
self.ep_id = env.chronics_handler.get_name()
113+
max_no_of_timesteps = int(env.max_episode_duration())
114+
115+
# Numpy Arrays
116+
self.actions = np.full((max_no_of_timesteps, env.action_space.n), fill_value=np.NaN, dtype=np.float16)
117+
self.env_actions = np.full((max_no_of_timesteps, env._helper_action_env.n), fill_value=np.NaN, dtype=np.float32)
118+
self.attacks = np.full((max_no_of_timesteps, env._opponent_action_space.n), fill_value=0.0, dtype=np.float32)
119+
self.observations = np.full((max_no_of_timesteps + 1, len(obs.to_vect())),fill_value=np.NaN,dtype=np.float32)
120+
self.rewards = np.full(max_no_of_timesteps, fill_value=np.NaN, dtype=np.float32)
121+
self.other_reward_names = list(sorted(env.other_rewards.keys()))
122+
self.other_rewards = np.full((max_no_of_timesteps, len(self.other_reward_names)), fill_value=np.NaN, dtype=np.float32)
123+
self.disc_lines = np.full((max_no_of_timesteps, env.backend.n_line), fill_value=np.NaN, dtype=np.bool_)
124+
self.times = np.full(max_no_of_timesteps, fill_value=np.NaN, dtype=np.float32)
125+
126+
self.disc_lines_templ = np.full((1, env.backend.n_line), fill_value=False, dtype=np.bool_)
127+
# AttackTempl: Not used, kept for comptabiility with EpisodeData
128+
self.attack_templ = np.full((1, env._oppSpace.action_space.size()), fill_value=0.0, dtype=np.float32)
129+
130+
self.legal = np.full(max_no_of_timesteps, fill_value=True, dtype=np.bool_)
131+
self.ambiguous = np.full(max_no_of_timesteps, fill_value=False, dtype=np.bool_)
132+
self.n_cols = env.action_space.n + env._helper_action_env.n + len(obs.to_vect()) + env.backend.n_line + env._oppSpace.action_space.size() + 6
133+
134+
# Store First Observation
135+
self.observations[0] = obs.to_vect()
136+
self.game_over_timestep = max_no_of_timesteps
137+
138+
# JSON-serializable Objects
139+
self.observation_space=env.observation_space
140+
self.action_space=env.action_space
141+
self.attack_space=env._opponent_action_space
142+
self.env_modification_space=env._helper_action_env
143+
144+
# Special JSON-Serializable Object: Episode MetaData
145+
self.meta = dict(
146+
chronics_path = self.ep_id,
147+
chronics_max_timestep = max_no_of_timesteps,
148+
game_over_timestep = self.game_over_timestep,
149+
other_reward_names = self.other_reward_names,
150+
grid_path = env._init_grid_path,
151+
backend_type = type(env.backend).__name__,
152+
env_type = type(env).__name__,
153+
env_seed = (env.seed_used.item() if env.seed_used.ndim == 0 else list(env.seed_used)) if isinstance(env.seed_used, np.ndarray) else env.seed_used,
154+
agent_seed = self.action_space.seed_used,
155+
nb_timestep_played = 0,
156+
cumulative_reward = 0.0,
157+
)
158+
elif exp_dir is not None:
159+
self.load_metadata(ep_id)
160+
self.load_spaces()
161+
self.load_arrays(ep_id)
162+
163+
def update(self, t:int, env, action,
164+
obs, reward:float, done:bool, duration:float, info):
165+
"""
166+
Update the arrays in the Episode Store for each step of the environment.
167+
Args:
168+
t (int): Current time step
169+
env (grid2op.Environment): State of Environment
170+
action (grid2op.Action): Action agent took on the Environment
171+
obs (grid2op.Observation): Observed result of action on Environment
172+
reward (float): Numeric reward returned by Environment for the given action
173+
duration (float): Time in seconds needed to choose and execute the action
174+
info (dict<str:np.array>): Dictionary containing information on legality and ambiguity of action
175+
"""
176+
self.actions[t - 1] = action.to_vect()
177+
self.env_actions[t - 1] = env._env_modification.to_vect()
178+
self.observations[t] = obs.to_vect()
179+
opp_attack = env._oppSpace.last_attack
180+
if opp_attack is not None:
181+
self.attacks[t - 1] = opp_attack.to_vect()
182+
self.rewards[t - 1] = reward
183+
if "disc_lines" in info:
184+
arr = info["disc_lines"]
185+
if arr is not None:
186+
self.disc_lines[t - 1] = arr
187+
else:
188+
self.disc_lines[t - 1] = self.disc_lines_templ
189+
if "rewards" in info:
190+
for i, other_reward_name in enumerate(self.other_reward_names):
191+
self.other_rewards[t-1, i] = info["rewards"][other_reward_name]
192+
self.times[t - 1] = duration
193+
self.legal[t - 1] = not info["is_illegal"]
194+
self.ambiguous[t - 1] = info["is_ambiguous"]
195+
if done:
196+
self.game_over_timestep = t
197+
# Update metadata
198+
self.meta.update(
199+
nb_timestep_played = t,
200+
cumulative_reward = self.meta["cumulative_reward"] + float(reward),
201+
)
202+
return self.meta["cumulative_reward"]
203+
204+
def asdict(self):
205+
"""
206+
Return the Episode Store as a dictionary.
207+
Compatible with Grid2Op's internal EpisodeData format as keyword arguments.
208+
"""
209+
# Other rewards in Grid2op's internal Episode Data is a list of dictionaries, so we convert to that format
210+
other_rewards = [{other_reward_name:float(self.other_rewards[t, i]) for i, other_reward_name in enumerate(self.other_reward_names)} for t in range(len(self.times))]
211+
return dict(actions=self.actions, env_actions=self.env_actions,
212+
observations=self.observations,
213+
rewards=self.rewards,
214+
other_rewards=other_rewards,
215+
disc_lines=self.disc_lines, times=self.times,
216+
disc_lines_templ=self.disc_lines_templ, attack_templ=self.attack_templ,
217+
attack=self.attacks, legal=self.legal, ambiguous=self.ambiguous,
218+
observation_space=self.observation_space, action_space=self.action_space,
219+
attack_space=self.attack_space, helper_action_env=self.env_modification_space)
220+
221+
def store_metadata(self):
222+
"""
223+
Store this Episode's meta data to disk.
224+
"""
225+
print({k:(v,type(v)) for k,v in self.meta.items()})
226+
with open(self.exp_dir / f"{self.ep_id}_metadata.json", "w", encoding="utf-8") as f:
227+
json.dump(self.meta, f, indent=4, sort_keys=True)
228+
229+
def load_metadata(self, ep_id:str):
230+
"""
231+
Load metadata from a specific Episode.
232+
"""
233+
with open(self.exp_dir / f"{ep_id}_metadata.json", "r", encoding="utf-8") as f:
234+
self.meta = json.load(f)
235+
self.other_reward_names = self.meta["other_reward_names"]
236+
self.game_over_timestep = self.meta["game_over_timestep"]
237+
238+
def store_spaces(self):
239+
"""
240+
Store the Observation, Action, Environment and Opponent spaces to disk.
241+
"""
242+
for space_name in self.space_names:
243+
with open(self.exp_dir / f"dict_{space_name}.json", "w", encoding="utf-8") as f:
244+
json.dump(getattr(self, space_name).cls_to_dict(), f, indent=4, sort_keys=True)
245+
246+
def load_spaces(self):
247+
"""
248+
Load the Observation, Action, Environment and Opponent spaces from disk
249+
"""
250+
for space_name in self.space_names:
251+
with open(self.exp_dir / f"dict_{space_name}.json", "r", encoding="utf-8") as f:
252+
if space_name == "observation_space":
253+
setattr(self, space_name, ObservationSpace.from_dict(json.load(f)))
254+
else:
255+
setattr(self, space_name, ActionSpace.from_dict(json.load(f)))
256+
257+
def store_arrays(self):
258+
"""
259+
Store compressed versions of the Actions, Observations, Rewards, Attacks and other metadata
260+
to disk as a compressed numpy archive (single file per episode).
261+
"""
262+
np.savez_compressed(self.exp_dir / f"{self.ep_id}.npz", **{array_name: getattr(self, array_name) for array_name in self.array_names})
263+
264+
def load_arrays(self, ep_id:str):
265+
"""
266+
Load Actions, Observations, Rewards, Attacks and other metadata from disk
267+
for a specific Episode ID (identified by Chronics name)
268+
"""
269+
arrays = np.load(self.exp_dir / f"{ep_id}.npz")
270+
for array_name in self.array_names:
271+
setattr(self, array_name, arrays[array_name])
272+
self.ep_id = ep_id
273+
274+
def to_disk(self):
275+
"""
276+
Store this EpisodeStore object instance to disk (as .json and .npz files)
277+
"""
278+
if self.exp_dir is not None:
279+
# Store Episode metadata
280+
self.store_metadata()
281+
# Store Spaces (values are static, so only save once per experiment)
282+
if len([f for f in self.exp_dir.glob("*.json")]) != 4:
283+
self.store_spaces()
284+
# Store Arrays as Compressed Numpy archive
285+
self.store_arrays()
286+
287+
@classmethod
288+
def from_disk(cls, path, ep_id:str):
289+
"""
290+
Load EpisodeStore data from disk for a specific episode.
291+
"""
292+
return cls(env=None, obs=None, exp_dir=p(path), ep_id=ep_id)
293+
294+
@staticmethod
295+
def list_episode(path):
296+
"""
297+
From a given path, extracts the episodes that can be loaded
298+
299+
Parameters
300+
----------
301+
path: ``str``
302+
The path where to look for data coming from "episode"
303+
304+
Returns
305+
-------
306+
res: ``list``
307+
A list of possible episodes. Each element of this list is a tuple: (full_path, episode_name)
308+
"""
309+
return [(str(full_path), full_path.stem) for full_path in path.glob("*.npz")]
310+
311+
def __len__(self):
312+
return self.game_over_timestep

grid2op/Episode/EpisodeReplay.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from grid2op.Exceptions import Grid2OpException
1616
from grid2op.PlotGrid.PlotMatplot import PlotMatplot
1717
from grid2op.Episode.EpisodeData import EpisodeData
18-
18+
from grid2op.Episode.CompactEpisodeData import CompactEpisodeData
1919

2020
class EpisodeReplay(object):
2121
"""

grid2op/Episode/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
__all__ = ["EpisodeData"]
22

33
from grid2op.Episode.EpisodeData import EpisodeData
4+
from grid2op.Episode.CompactEpisodeData import CompactEpisodeData
45

56
# Try to import optional module
67
try:

0 commit comments

Comments
 (0)