adding some docs [skip ci]

BDonnot · BDonnot · commit 7d3a926ff389 · 2024-05-24T11:18:02.000+02:00
diff --git a/docs/environment.rst b/docs/environment.rst
@@ -104,6 +104,58 @@ increase the training time, especially at the beginning. This is due to the fact
 `env.reset` is called, the whole chronics is read from the hard drive. If you want to lower this
 impact then you might consult the :ref:`environment-module-data-pipeline` page of the doc.
 
+Go to the next scenario
+++++++++++++++++++++++++
+
+Starting grid2op 1.9.8 we attempt to make an easier user experience in the
+selection of time series, seed, initial state of the grid, etc.
+
+All of the above can be done when calling `env.reset()` function.
+
+For customizing the seed, you can for example do:
+
+.. code-block:: python
+
+    import grid2op
+    env_name = "l2rpn_case14_sandbox"
+    env = grid2op.make(env_name)
+
+    obs = env.reset(seed=0)
+
+For customizing the time series id you want to use:
+
+.. code-block:: python
+
+    import grid2op
+    env_name = "l2rpn_case14_sandbox"
+    env = grid2op.make(env_name)
+
+    obs = env.reset(options={"time serie id": 1})  # time serie by id (sorted alphabetically)
+    # or
+    obs = env.reset(options={"time serie id": "0001"})  # time serie by name (folder name)
+
+For customizing the initial state of the grid, for example forcing the
+powerline 0 to be disconnected in the initial observation:
+
+.. code-block:: python
+
+    import grid2op
+    env_name = "l2rpn_case14_sandbox"
+    env = grid2op.make(env_name)
+
+    init_state_dict = {"set_line_status": [(0, -1)]}
+    obs = env.reset(options={"init state": init_state_dict})
+
+
+Feel free to consult the documentation of the :func:`Environment.reset` function
+for more information (this doc might be outdated, the one of the function should 
+be more up to date with the code).
+
+.. info::
+    In the near future (next few releases) we will also attempt to make the 
+    customization of the `parameters` or the `skip number of steps`, `maximum duration 
+    of the scenarios` also available in `env.reset()` options.
+
 .. _environment-module-chronics-info:
 
 Time series Customization
@@ -141,10 +193,15 @@ the call to "env.reset". This gives the following code:
     # and now the loop starts
     for i in range(episode_count):
         ###################################
-        env.set_id(THE_CHRONIC_ID)
+        # with recent grid2op 
+        obs = env.reset(options={"time serie id": THE_CHRONIC_ID})
         ###################################
 
-        obs = env.reset()
+        ###################################
+        # 'old method (oldest grid2op version)'
+        # env.set_id(THE_CHRONIC_ID)
+        # obs = env.reset()
+        ###################################
 
         # now play the episode as usual
         while True:
diff --git a/grid2op/Runner/runner.py b/grid2op/Runner/runner.py
@@ -1220,15 +1220,15 @@ def run(
         res: ``list``
             List of tuple. Each tuple having 3[4] elements:
 
-              - "i" unique identifier of the episode (compared to :func:`Runner.run_sequential`, the elements of the
-                returned list are not necessarily sorted by this value)
+              - "id_chron" unique identifier of the episode
+              - "name_chron" name of the time series (usually it is the path where it is stored)
               - "cum_reward" the cumulative reward obtained by the :attr:`Runner.Agent` on this episode i
               - "nb_time_step": the number of time steps played in this episode.
               - "total_step": the total number of time steps possible in this episode.
               - "episode_data" : [Optional] The :class:`EpisodeData` corresponding to this episode run only
                 if `add_detailed_output=True`
               - "add_nb_highres_sim": [Optional] The estimated number of calls to high resolution simulator made
-                by the agent
+                by the agent. Only preset if `add_nb_highres_sim=True` in the kwargs
 
         Examples
         --------
@@ -1274,6 +1274,40 @@ def run(
             runner = Runner(**env.get_params_for_runner(), agentClass=None, agentInstance=my_agent)
             res = runner.run(nb_episode=1, agent_seeds=[42], env_seeds=[0])
 
+        Since grid2op 1.10.2 you can also set the initial state of the grid when
+        calling the runner. You can do that with the kwargs `init_states`, for example like this:
+        
+        .. code-block: python
+
+            import grid2op
+            from gri2op.Runner import Runner
+            from grid2op.Agent import RandomAgent
+
+            env = grid2op.make("l2rpn_case14_sandbox")
+            my_agent = RandomAgent(env.action_space)
+            runner = Runner(**env.get_params_for_runner(), agentClass=None, agentInstance=my_agent)
+            res = runner.run(nb_episode=1,
+                             agent_seeds=[42],
+                             env_seeds=[0],
+                             init_states=[{"set_line_status": [(0, -1)]}]
+                             )
+        
+        .. note::
+            We recommend that you provide `init_states` as a list having a length of
+            `nb_episode`. Each episode will be initialized with the provided
+            element of the list. However, if you provide only one element, then
+            all episodes you want to compute will be initialized with this same
+            action.
+            
+        .. note::
+            At the beginning of each episode, if an `init_state` is set, 
+            the environment is reset with a call like: `env.reset(options={"init state": init_state})`
+            
+            This is why we recommend you to use dictionary to set the initial state so 
+            that you can control what exactly is done (set the `"method"`) more 
+            information about this on the doc of the :func:`grid2op.Environment.Environment.reset`
+            function.
+            
         """
         if nb_episode < 0:
             raise RuntimeError("Impossible to run a negative number of scenarios.")
diff --git a/grid2op/gym_compat/gymenv.py b/grid2op/gym_compat/gymenv.py
@@ -142,7 +142,7 @@ def _aux_step_new(self, gym_action: ActType) -> Tuple[ObsType, float, bool, bool
     def _aux_reset(self,
                    seed: Optional[int]=None,
                    return_info: Optional[bool]=None,
-                   options: Optional[Dict[Any, Any]]=None) -> Union[ObsType, Tuple[ObsType, RESET_INFO_GYM_TYPING]]:
+                   options: RESET_OPTIONS_TYPING=None) -> Union[ObsType, Tuple[ObsType, RESET_INFO_GYM_TYPING]]:
         # used for gym < 0.26
         if self._shuffle_chronics and isinstance(
             self.init_env.chronics_handler.real_data, Multifolder
@@ -152,7 +152,7 @@ def _aux_reset(self,
         if seed is not None:
             seed_, next_seed, underlying_env_seeds = self._aux_seed(seed)
             
-        g2op_obs = self.init_env.reset()
+        g2op_obs = self.init_env.reset(options=options)
         gym_obs = self.observation_space.to_gym(g2op_obs)
             
         if return_info:
@@ -301,6 +301,23 @@ def reset(self,
                      ObsType,
                      RESET_INFO_GYM_TYPING
                   ]:
+            """This function will reset the underlying grid2op environment
+            and return the next state of the grid (as the gymnasium observation)
+            and some other information.
+
+            Parameters
+            ----------
+            seed : Optional[int], optional
+                The seed for this new environment, by default None
+            options : RESET_OPTIONS_TYPING, optional
+                See the documentation of :func:`grid2op.Environment.Environment.reset`
+                for more information about it, by default None
+
+            Returns
+            -------
+            Tuple[ ObsType, RESET_INFO_GYM_TYPING ]
+                _description_
+            """
             return self._aux_reset_new(seed, options)
 
         def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, STEP_INFO_TYPING]: