|
1 | | -# bitrl |
2 | | - |
3 | | -```bitrl``` is an effort to provide implementations and wrappers of environments suitable for training reinforcement learning agents |
| 1 | +# bitrl-doc |
| 2 | +_bitrl_ is an effort to provide implementations and wrappers of environments suitable for training reinforcement learning agents |
4 | 3 | using C++. |
5 | 4 |
|
6 | | -Furthermore, there is some minimal support for working with Arduino UNO boards over USB or WiFi. |
7 | | -See also <a href="https://rlenvscpp.readthedocs.io/en/latest/working_with_webots.html">Working with Webots</a> |
8 | | -for how to integrate ```bitrl``` with <a href="https://cyberbotics.com/doc/guide/installing-webots">Webots</a>. |
9 | | - |
10 | | -Various RL algorithms using the environments can be found at <a href="https://github.com/pockerman/cuberl/tree/master">cuberl</a>. |
11 | | - |
12 | | -The documentation for the library can be found <a href="https://rlenvscpp.readthedocs.io/en/latest/">here</a>. |
13 | 5 | The following is an example how to use the |
14 | | -``FrozenLake`` environment from <a href="https://github.com/Farama-Foundation/Gymnasium/tree/main">Gymnasium</a>. |
15 | | - |
16 | | -``` |
| 6 | +_FrozenLake_ environment from <a href="https://github.com/Farama-Foundation/Gymnasium/tree/main">Gymnasium</a>. |
17 | 7 |
|
| 8 | +@code |
18 | 9 | #include "bitrl/bitrl_types.h" |
19 | 10 | #include "bitrl/envs/gymnasium/toy_text/frozen_lake_env.h" |
20 | | -#include "bitrl/envs/api_server/apiserver.h" |
| 11 | +#include "bitrl/network/rest_rl_env_client.h" |
21 | 12 |
|
22 | 13 | #include <iostream> |
23 | 14 | #include <string> |
24 | 15 | #include <unordered_map> |
25 | 16 | #include <any> |
26 | 17 |
|
27 | | -namespace example_1{ |
28 | | -
|
| 18 | +namespace example_1 { |
29 | 19 | const std::string SERVER_URL = "http://0.0.0.0:8001/api"; |
30 | | -
|
31 | 20 | using bitrl::envs::gymnasium::FrozenLake; |
32 | | -using bitrl::envs::gymnasium::Taxi; |
33 | | -using bitrl::envs::gymnasium::BlackJack; |
34 | | -using bitrl::envs::gymnasium::CliffWorld; |
35 | 21 | using bitrl::envs::RESTApiServerWrapper; |
36 | 22 |
|
| 23 | +void test_frozen_lake(const RESTApiServerWrapper& server) { |
37 | 24 |
|
38 | | -void test_frozen_lake(const RESTApiServerWrapper& server){ |
39 | | -
|
40 | | - FrozenLake<4> env(server); |
41 | | -
|
42 | | - std::cout<<"Environame URL: "<<env.get_url()<<std::endl; |
43 | | -
|
44 | | - // make the environment |
45 | | - std::unordered_map<std::string, std::any> options; |
46 | | - options.insert({"is_slippery", false}); |
47 | | - env.make("v1", options); |
48 | | -
|
49 | | - std::cout<<"Is environment created? "<<env.is_created()<<std::endl; |
50 | | - std::cout<<"Is environment alive? "<<env.is_alive()<<std::endl; |
51 | | - std::cout<<"Number of valid actions? "<<env.n_actions()<<std::endl; |
52 | | - std::cout<<"Number of states? "<<env.n_states()<<std::endl; |
| 25 | + FrozenLake<4> env(server); |
| 26 | + std::cout << "Environment URL: " << env.get_url() << std::endl; |
| 27 | + |
| 28 | + std::unordered_map<std::string, std::any> make_ops; |
| 29 | + make_ops.insert({"is_slippery", false}); |
| 30 | + |
| 31 | + std::unordered_map<std::string, std::any> reset_ops; |
| 32 | + reset_ops.insert({"seed", static_cast<uint_t>(42)}); |
53 | 33 |
|
54 | | - // reset the environment |
55 | | - auto time_step = env.reset(42, std::unordered_map<std::string, std::any>()); |
56 | | -
|
57 | | - std::cout<<"Reward on reset: "<<time_step.reward()<<std::endl; |
58 | | - std::cout<<"Observation on reset: "<<time_step.observation()<<std::endl; |
59 | | - std::cout<<"Is terminal state: "<<time_step.done()<<std::endl; |
60 | | -
|
61 | | - //...print the time_step |
62 | | - std::cout<<time_step<<std::endl; |
63 | | -
|
64 | | - // take an action in the environment |
65 | | - // 2 = RIGHT |
66 | | - auto new_time_step = env.step(2); |
67 | | -
|
68 | | - std::cout<<new_time_step<<std::endl; |
| 34 | + env.make("v1", make_ops, reset_ops); |
| 35 | +} |
| 36 | +} // namespace example_1 |
69 | 37 |
|
70 | | - // get the dynamics of the environment for the given state and action |
71 | | - auto state = 0; |
72 | | - auto action = 1; |
73 | | - auto dynamics = env.p(state, action); |
| 38 | +int main() { |
| 39 | +RESTApiServerWrapper server(SERVER_URL, true); |
| 40 | +example_1::test_frozen_lake(server); |
| 41 | +return 0; |
| 42 | +} |
| 43 | +@endcode |
74 | 44 |
|
75 | | - std::cout<<"Dynamics for state="<<state<<" and action="<<action<<std::endl; |
76 | 45 |
|
77 | | - for(auto item:dynamics){ |
| 46 | +Gymnasium environments exposed over a REST like API can be found at: <a href="https://github.com/pockerman/bitrl-rest-api">bitrl-rest-api</a>. |
| 47 | +Various RL algorithms using the environments can be found at <a href="https://github.com/pockerman/cuberl/tree/master">cuberl</a>. |
78 | 48 |
|
79 | | - std::cout<<std::get<0>(item)<<std::endl; |
80 | | - std::cout<<std::get<1>(item)<<std::endl; |
81 | | - std::cout<<std::get<2>(item)<<std::endl; |
82 | | - std::cout<<std::get<3>(item)<<std::endl; |
83 | | - } |
84 | | - |
85 | | - action = env.sample_action(); |
86 | | - std::cout<<"Action sampled: "<<action<<std::endl; |
87 | | - |
88 | | - new_time_step = env.step(action); |
89 | | - std::cout<<new_time_step<<std::endl; |
90 | | - |
91 | | - std::cout<<"env cidx: "<<env.cidx()<<std::endl; |
| 49 | +## Dependencies |
92 | 50 |
|
93 | | - // close the environment |
94 | | - env.close(); |
95 | | -} |
| 51 | +_bitrl_ has a number of dependencies assumed to be installed under usual destination on a system: |
96 | 52 |
|
97 | | -} |
| 53 | +- Boost |
| 54 | +- Eigen3 |
| 55 | +- Blas |
| 56 | +- OpenCV |
| 57 | +- PahoMqttCpp |
98 | 58 |
|
| 59 | +## Installation |
99 | 60 |
|
100 | | -int main(){ |
| 61 | +The usual _cmake_ installation/build can be used: |
101 | 62 |
|
102 | | - using namespace example_1; |
103 | | - |
104 | | - RESTApiServerWrapper server(SERVER_URL, true); |
| 63 | +@code |
| 64 | +mkdir build && cd build |
| 65 | +cmake -DCMAKE_INSTALL_PREFIX=/path/where/bitrl/should/be/installed/to .. |
| 66 | +make install -j4 |
| 67 | +@endcode |
105 | 68 |
|
106 | | - std::cout<<"Testing FrozenLake..."<<std::endl; |
107 | | - example_1::test_frozen_lake(server); |
108 | | - std::cout<<"===================="<<std::endl; |
109 | | - |
110 | | - return 0; |
111 | | -} |
| 69 | +## Examples |
112 | 70 |
|
113 | | -``` |
0 commit comments