Skip to content

Commit 8dfe408

Browse files
committed
Add documentation for example 1
1 parent c9be8db commit 8dfe408

File tree

2 files changed

+147
-8
lines changed

2 files changed

+147
-8
lines changed

examples/example_1/example_1.md

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
\page bitrl_example_1 Example 1 Using Gymnasium environments
2+
3+
In this example we will see how to interact with <a href="https://gymnasium.farama.org/index.html">Gymnasium</a> environments and
4+
specifically how to create an interact with <a href="https://gymnasium.farama.org/environments/toy_text/frozen_lake/">FroznLake</a> environment.
5+
6+
In bitrl, Gymnasium-based environment are interacted over a REST-like API maintained here: <a href="https://github.com/pockerman/bitrl-rest-api">bitrl-envs-api</a>.
7+
bitrl itself implements classes that hide this interaction from the client code.
8+
In general, environment classes in bitrl, have to implement the \ref bitrl::envs::EnvBase "`bitrl::envs::EnvBase`" API.
9+
10+
In this example we will use the \ref bitrl::envs::gymnasium::FrozenLake "`bitrl::envs::gymnasium::FrozenLake`"
11+
class. This is a template class, see the example below, that itself inherits from \ref bitrl::envs::gymnasium::GymnasiumEnvBase "`bitrl::envs::gymnasium::GymnasiumEnvBase`"
12+
class.
13+
14+
Below is the driver code.
15+
16+
@code{.cpp}
17+
#include "bitrl/bitrl_types.h"
18+
#include "bitrl/envs/gymnasium/toy_text/frozen_lake_env.h"
19+
#include "bitrl/network/rest_rl_env_client.h"
20+
21+
#include <any>
22+
#include <iostream>
23+
#include <string>
24+
#include <unordered_map>
25+
26+
namespace example_1
27+
{
28+
using namespace bitrl;
29+
30+
const std::string SERVER_URL = "http://0.0.0.0:8001/api";
31+
using bitrl::envs::gymnasium::FrozenLake;
32+
using bitrl::network::RESTRLEnvClient;
33+
34+
void test_frozen_lake(RESTRLEnvClient &server)
35+
{
36+
37+
// the environment is not registered with the server
38+
std::cout << "Is environment registered: " << server.is_env_registered(FrozenLake<4>::name)
39+
<< std::endl;
40+
41+
// when the environment is created we register it with the REST client
42+
FrozenLake<4> env(server);
43+
44+
// environment name can also be accessed via env.env_name()
45+
std::cout << "Is environment registered: " << server.is_env_registered(env.env_name())
46+
<< std::endl;
47+
std::cout << "Environment URL: " << env.get_url() << std::endl;
48+
49+
// make the environment we pass both make options
50+
// and reset options
51+
std::unordered_map<std::string, std::any> make_ops;
52+
make_ops.insert({"is_slippery", false});
53+
54+
std::unordered_map<std::string, std::any> reset_ops;
55+
reset_ops.insert({"seed", static_cast<uint_t>(42)});
56+
env.make("v1", make_ops, reset_ops);
57+
58+
// query the environemnt version
59+
std::cout << "Environment version: " << env.version() << std::endl;
60+
61+
// once the env is created we can get it's id
62+
std::cout << "Environment idx is: " << env.idx() << std::endl;
63+
64+
// the create flag should be true
65+
std::cout << "Is environment created? " << env.is_created() << std::endl;
66+
67+
// environment should be alive on the server
68+
std::cout << "Is environment alive? " << env.is_alive() << std::endl;
69+
70+
// FrozenLake is a discrete state-action env so we can
71+
// query number of actions and states
72+
std::cout << "Number of valid actions? " << env.n_actions() << std::endl;
73+
std::cout << "Number of states? " << env.n_states() << std::endl;
74+
75+
// how many copies of this environment
76+
auto n_copies = env.n_copies();
77+
std::cout << "n_copies: " << n_copies << std::endl;
78+
79+
// reset the environment
80+
auto time_step = env.reset();
81+
82+
std::cout << "Reward on reset: " << time_step.reward() << std::endl;
83+
std::cout << "Observation on reset: " << time_step.observation() << std::endl;
84+
std::cout << "Is terminal state: " << time_step.done() << std::endl;
85+
86+
//...print the time_step
87+
std::cout << time_step << std::endl;
88+
89+
// take an action in the environment
90+
// 2 = RIGHT
91+
auto new_time_step = env.step(2);
92+
std::cout << new_time_step << std::endl;
93+
94+
// get the dynamics of the environment for the given state and action
95+
auto state = 0;
96+
auto action = 1;
97+
auto dynamics = env.p(state, action);
98+
99+
std::cout << "Dynamics for state=" << state << " and action=" << action << std::endl;
100+
for (auto item : dynamics)
101+
{
102+
std::cout << std::get<0>(item) << std::endl;
103+
std::cout << std::get<1>(item) << std::endl;
104+
std::cout << std::get<2>(item) << std::endl;
105+
std::cout << std::get<3>(item) << std::endl;
106+
}
107+
108+
// discrete action environments can sample
109+
// actions
110+
action = env.sample_action();
111+
std::cout << "Action sampled: " << action << std::endl;
112+
113+
new_time_step = env.step(action);
114+
std::cout << new_time_step << std::endl;
115+
116+
// close the environment
117+
env.close();
118+
}
119+
120+
} // namespace example_1
121+
122+
int main()
123+
{
124+
using namespace example_1;
125+
RESTRLEnvClient server(SERVER_URL, false);
126+
127+
std::cout << "Testing FrozenLake..." << std::endl;
128+
example_1::test_frozen_lake(server);
129+
std::cout << "====================" << std::endl;
130+
return 0;
131+
}
132+
@endcode
133+
134+
In order to run the example you will need an instance of the <a href="https://github.com/pockerman/bitrl-rest-api">bitrl-envs-api</a> server running
135+
on your machine listening at por 8001. Note the actual example also shows how to use \ref bitrl::envs::gymnasium::Taxi "`bitrl::envs::gymnasium::Taxi`",
136+
\ref bitrl::envs::gymnasium::CliffWorld "`bitrl::envs::gymnasium::CliffWorld`" and \ref bitrl::envs::gymnasium::BlackJack "`bitrl::envs::gymnasium::BlackJack`"
137+
environments.

examples/example_14/example_14.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
\page bitrl_example_14 Example 14 Create an environment using Chrono
22

3-
in this example we will create an environment for reinforcement learning based
3+
In this example we will create an environment for reinforcement learning based
44
on the <a href="https://github.com/projectchrono/chrono">Chrono</a> library.
55
Specifically, we will create an environment that includes a <a href="https://en.wikipedia.org/wiki/Differential_wheeled_robot">differential drive system</a>.
66
Note that the model we will create will not be of high fidelity as the purpose of the example is show how
@@ -43,7 +43,7 @@ private:
4343
The chassis of the robot is a simple rectangular plate. It also has three wheels. The model robot we will develop will not consider motors.
4444
However, Chrono allows for high fidelity models is this is needed. Below is the function that build the robot
4545

46-
@code
46+
@code{.cpp}
4747
void DiffDriveRobot::build()
4848
{
4949
// build the chassis of the robot
@@ -72,7 +72,7 @@ chassis_->SetPos(chrono::ChVector3d(0.0, 0.0, 0.22));
7272

7373
The reset function simple resets the robot to its original position
7474

75-
@code
75+
@code{.cpp}
7676
void
7777
DiffDriveRobot::reset()
7878
{
@@ -85,7 +85,7 @@ caster_wheel_ -> SetPos(chrono::ChVector3d(0.2, 0.0, 0.16));
8585

8686
Below are some helper functions for the robot.
8787

88-
@code
88+
@code{.cpp}
8989
void DiffDriveRobot::add_to_sys(chrono::ChSystemSMC& sys)
9090
{
9191
sys.Add(chassis_);
@@ -112,7 +112,7 @@ caster_wheel_ -> SetLinVel(chrono::ChVector3d(speed, 0.0, 0.0));
112112
The environment class inherits from the \ref bitrl::envs::EnvBase "`bitrl::envs::EnvBase`" class. We will need to specify the
113113
time step type and the space type:
114114

115-
@code
115+
@code{.cpp}
116116
constexpr uint_t STATE_SPACE_SIZE = 2;
117117
constexpr uint_t ACTION_SPACE_SIZE = 1;
118118

@@ -124,7 +124,9 @@ typedef TimeStep<chrono::ChVector3d> time_step_type;
124124
typedef bitrl::envs::ContinuousVectorStateContinuousVectorActionEnv<STATE_SPACE_SIZE, STATE_SPACE_SIZE> space_type;
125125
@endcode
126126

127-
@code
127+
Here is the definition of the actual class.
128+
129+
@code{.cpp}
128130
class DiffDriveRobotEnv final: public bitrl::envs::EnvBase<time_step_type, space_type>
129131
{
130132
public:
@@ -153,7 +155,7 @@ private:
153155

154156
Below are the implementations for reset, step and make
155157

156-
@code
158+
@code{.cpp}
157159
void
158160
DiffDriveRobotEnv::make(const std::string &version,
159161
const std::unordered_map<std::string, std::any> &make_options,
@@ -207,7 +209,7 @@ BOOST_LOG_TRIVIAL(info)<<"Reset simulation: ";
207209

208210
The simulate function wraps everything together
209211

210-
@code
212+
@code{.cpp}
211213
void DiffDriveRobotEnv::simulate()
212214
{
213215

0 commit comments

Comments
 (0)