|
| 1 | +# bitrl |
| 2 | + |
| 3 | +```bitrl``` is an effort to provide implementations and wrappers of environments suitable for training reinforcement learning agents |
| 4 | +using C++. |
| 5 | + |
| 6 | +Furthermore, there is some minimal support for working with Arduino UNO boards over USB or WiFi. |
| 7 | +See also <a href="https://rlenvscpp.readthedocs.io/en/latest/working_with_webots.html">Working with Webots</a> |
| 8 | +for how to integrate ```bitrl``` with <a href="https://cyberbotics.com/doc/guide/installing-webots">Webots</a>. |
| 9 | + |
| 10 | +Various RL algorithms using the environments can be found at <a href="https://github.com/pockerman/cuberl/tree/master">cuberl</a>. |
| 11 | + |
| 12 | +The documentation for the library can be found <a href="https://rlenvscpp.readthedocs.io/en/latest/">here</a>. |
| 13 | +The following is an example how to use the |
| 14 | +``FrozenLake`` environment from <a href="https://github.com/Farama-Foundation/Gymnasium/tree/main">Gymnasium</a>. |
| 15 | + |
| 16 | +``` |
| 17 | +
|
| 18 | +#include "bitrl/bitrl_types.h" |
| 19 | +#include "bitrl/envs/gymnasium/toy_text/frozen_lake_env.h" |
| 20 | +#include "bitrl/envs/api_server/apiserver.h" |
| 21 | +
|
| 22 | +#include <iostream> |
| 23 | +#include <string> |
| 24 | +#include <unordered_map> |
| 25 | +#include <any> |
| 26 | +
|
| 27 | +namespace example_1{ |
| 28 | +
|
| 29 | +const std::string SERVER_URL = "http://0.0.0.0:8001/api"; |
| 30 | +
|
| 31 | +using bitrl::envs::gymnasium::FrozenLake; |
| 32 | +using bitrl::envs::gymnasium::Taxi; |
| 33 | +using bitrl::envs::gymnasium::BlackJack; |
| 34 | +using bitrl::envs::gymnasium::CliffWorld; |
| 35 | +using bitrl::envs::RESTApiServerWrapper; |
| 36 | +
|
| 37 | +
|
| 38 | +void test_frozen_lake(const RESTApiServerWrapper& server){ |
| 39 | +
|
| 40 | + FrozenLake<4> env(server); |
| 41 | +
|
| 42 | + std::cout<<"Environame URL: "<<env.get_url()<<std::endl; |
| 43 | +
|
| 44 | + // make the environment |
| 45 | + std::unordered_map<std::string, std::any> options; |
| 46 | + options.insert({"is_slippery", false}); |
| 47 | + env.make("v1", options); |
| 48 | +
|
| 49 | + std::cout<<"Is environment created? "<<env.is_created()<<std::endl; |
| 50 | + std::cout<<"Is environment alive? "<<env.is_alive()<<std::endl; |
| 51 | + std::cout<<"Number of valid actions? "<<env.n_actions()<<std::endl; |
| 52 | + std::cout<<"Number of states? "<<env.n_states()<<std::endl; |
| 53 | +
|
| 54 | + // reset the environment |
| 55 | + auto time_step = env.reset(42, std::unordered_map<std::string, std::any>()); |
| 56 | +
|
| 57 | + std::cout<<"Reward on reset: "<<time_step.reward()<<std::endl; |
| 58 | + std::cout<<"Observation on reset: "<<time_step.observation()<<std::endl; |
| 59 | + std::cout<<"Is terminal state: "<<time_step.done()<<std::endl; |
| 60 | +
|
| 61 | + //...print the time_step |
| 62 | + std::cout<<time_step<<std::endl; |
| 63 | +
|
| 64 | + // take an action in the environment |
| 65 | + // 2 = RIGHT |
| 66 | + auto new_time_step = env.step(2); |
| 67 | +
|
| 68 | + std::cout<<new_time_step<<std::endl; |
| 69 | +
|
| 70 | + // get the dynamics of the environment for the given state and action |
| 71 | + auto state = 0; |
| 72 | + auto action = 1; |
| 73 | + auto dynamics = env.p(state, action); |
| 74 | +
|
| 75 | + std::cout<<"Dynamics for state="<<state<<" and action="<<action<<std::endl; |
| 76 | +
|
| 77 | + for(auto item:dynamics){ |
| 78 | +
|
| 79 | + std::cout<<std::get<0>(item)<<std::endl; |
| 80 | + std::cout<<std::get<1>(item)<<std::endl; |
| 81 | + std::cout<<std::get<2>(item)<<std::endl; |
| 82 | + std::cout<<std::get<3>(item)<<std::endl; |
| 83 | + } |
| 84 | + |
| 85 | + action = env.sample_action(); |
| 86 | + std::cout<<"Action sampled: "<<action<<std::endl; |
| 87 | + |
| 88 | + new_time_step = env.step(action); |
| 89 | + std::cout<<new_time_step<<std::endl; |
| 90 | + |
| 91 | + std::cout<<"env cidx: "<<env.cidx()<<std::endl; |
| 92 | +
|
| 93 | + // close the environment |
| 94 | + env.close(); |
| 95 | +} |
| 96 | +
|
| 97 | +} |
| 98 | +
|
| 99 | +
|
| 100 | +int main(){ |
| 101 | +
|
| 102 | + using namespace example_1; |
| 103 | + |
| 104 | + RESTApiServerWrapper server(SERVER_URL, true); |
| 105 | +
|
| 106 | + std::cout<<"Testing FrozenLake..."<<std::endl; |
| 107 | + example_1::test_frozen_lake(server); |
| 108 | + std::cout<<"===================="<<std::endl; |
| 109 | + |
| 110 | + return 0; |
| 111 | +} |
| 112 | +
|
| 113 | +``` |
0 commit comments