Merge pull request #218 from ds4dm/release_0.7.2

gasse · web-flow · commit aab7e5ed26d4 · 2021-07-07T12:05:25.000-04:00
Release 0.7.2
diff --git a/libecole/CMakeLists.txt b/libecole/CMakeLists.txt
@@ -125,6 +125,7 @@ find_or_download_package(
 		-D FMT_DOC=OFF
 		-D FMT_INSTALL=ON
 		-D CMAKE_BUILD_TYPE=Release
+		-D BUILD_SHARED_LIBS=OFF
 		-D CMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE}
 )
 find_or_download_package(
diff --git a/libecole/include/ecole/environment/environment.hpp b/libecole/include/ecole/environment/environment.hpp
@@ -15,7 +15,7 @@
 #include "ecole/scip/type.hpp"
 #include "ecole/traits.hpp"
 
-#include <iostream>
+#include <optional>
 
 template <typename T> struct is_optional : std::false_type {};
 template <typename T> struct is_optional<std::optional<T>> : std::true_type {};
@@ -67,8 +67,8 @@ class Environment {
 		std::map<std::string, scip::Param> scip_params = {},
 		Args&&... args) :
 		the_dynamics(std::forward<Args>(args)...),
-		the_observation_function(data::parse(std::move(observation_function))),
 		the_reward_function(data::parse(std::move(reward_function))),
+		the_observation_function(data::parse(std::move(observation_function))),
 		the_information_function(data::parse(std::move(information_function))),
 		the_scip_params(std::move(scip_params)),
 		the_random_engine(spawn_random_engine()) {}
@@ -112,22 +112,23 @@ class Environment {
 			dynamics().set_dynamics_random_state(model(), random_engine());
 
 			// Reset data extraction function and bring model to initial state.
-			observation_function().before_reset(model());
 			reward_function().before_reset(model());
+			observation_function().before_reset(model());
 			information_function().before_reset(model());
-			auto const [done, action_set] = dynamics().reset_dynamics(model(), std::forward<Args>(args)...);
+
+			// Place the environment in its initial state
+			auto [done, action_set] = dynamics().reset_dynamics(model(), std::forward<Args>(args)...);
 			can_transition = !done;
 
-			auto observation = OptionalObservation{};
-			if (!done) {
-				observation = observation_function().extract(model(), done);
-			}
+			// Extract additional information to be returned by reset
+			auto [reward, observation, information] = extract_reward_observation_information(done);
+
 			return {
 				std::move(observation),
 				std::move(action_set),
-				reward_function().extract(model(), done),
+				std::move(reward),
 				done,
-				information_function().extract(model(), done),
+				std::move(information),
 			};
 		} catch (std::exception const&) {
 			can_transition = false;
@@ -170,19 +171,19 @@ class Environment {
 			throw Exception("Environment need to be reset.");
 		}
 		try {
-			auto const [done, action_set] = dynamics().step_dynamics(model(), action, std::forward<Args>(args)...);
+			// Transition the environment to the next state
+			auto [done, action_set] = dynamics().step_dynamics(model(), action, std::forward<Args>(args)...);
 			can_transition = !done;
 
-			auto observation = OptionalObservation{};
-			if (!done) {
-				observation = observation_function().extract(model(), done);
-			}
+			// Extract additional information to be returned by step
+			auto [reward, observation, information] = extract_reward_observation_information(done);
+
 			return {
 				std::move(observation),
 				std::move(action_set),
-				reward_function().extract(model(), done),
+				std::move(reward),
 				done,
-				information_function().extract(model(), done),
+				std::move(information),
 			};
 		} catch (std::exception const&) {
 			can_transition = false;
@@ -201,12 +202,22 @@ class Environment {
 private:
 	Dynamics the_dynamics;
 	scip::Model the_model;
-	ObservationFunction the_observation_function;
 	RewardFunction the_reward_function;
+	ObservationFunction the_observation_function;
 	InformationFunction the_information_function;
 	std::map<std::string, scip::Param> the_scip_params;
 	RandomEngine the_random_engine;
 	bool can_transition = false;
+
+	// extract reward, observation and information (in that order)
+	auto extract_reward_observation_information(bool done) -> std::tuple<Reward, OptionalObservation, InformationMap> {
+		auto reward = reward_function().extract(model(), done);
+		// Don't extract observations in final states
+		auto observation = done ? OptionalObservation{} : observation_function().extract(model(), done);
+		auto information = information_function().extract(model(), done);
+
+		return {std::move(reward), std::move(observation), std::move(information)};
+	}
 };
 
 }  // namespace ecole::environment
diff --git a/python/src/ecole/environment.py b/python/src/ecole/environment.py
@@ -109,19 +109,23 @@ def reset(self, instance, *dynamics_args, **dynamics_kwargs):
 
             self.dynamics.set_dynamics_random_state(self.model, self.random_engine)
 
+            # Reset data extraction functions
             self.reward_function.before_reset(self.model)
             self.observation_function.before_reset(self.model)
             self.information_function.before_reset(self.model)
+
+            # Place the environment in its initial state
             done, action_set = self.dynamics.reset_dynamics(
                 self.model, *dynamics_args, **dynamics_kwargs
             )
+            self.can_transition = not done
 
+            # Extract additional information to be returned by reset
+            reward_offset = self.reward_function.extract(self.model, done)
             if not done:
                 observation = self.observation_function.extract(self.model, done)
             else:
                 observation = None
-            reward_offset = self.reward_function.extract(self.model, done)
-            observation = self.observation_function.extract(self.model, done)
             information = self.information_function.extract(self.model, done)
 
             return observation, action_set, reward_offset, done, information
@@ -172,16 +176,18 @@ def step(self, action, *dynamics_args, **dynamics_kwargs):
             raise ecole.core.environment.Exception("Environment need to be reset.")
 
         try:
+            # Transition the environment to the next state
             done, action_set = self.dynamics.step_dynamics(
                 self.model, action, *dynamics_args, **dynamics_kwargs
             )
+            self.can_transition = not done
 
+            # Extract additional information to be returned by step
+            reward = self.reward_function.extract(self.model, done)
             if not done:
                 observation = self.observation_function.extract(self.model, done)
             else:
                 observation = None
-            reward = self.reward_function.extract(self.model, done)
-            observation = self.observation_function.extract(self.model, done)
             information = self.information_function.extract(self.model, done)
 
             return observation, action_set, reward, done, information

Original file line number	Diff line number	Diff line change
`@@ -125,6 +125,7 @@ find_or_download_package(`
`125`	`125`	`-D FMT_DOC=OFF`
`126`	`126`	`-D FMT_INSTALL=ON`
`127`	`127`	`-D CMAKE_BUILD_TYPE=Release`
	`128`	`+ -D BUILD_SHARED_LIBS=OFF`
`128`	`129`	`-D CMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE}`
`129`	`130`	`)`
`130`	`131`	`find_or_download_package(`