|
| 1 | +How to use |
| 2 | +============= |
| 3 | + |
| 4 | +The following is an example how to use the |
| 5 | +``FrozenLake`` environment from <a href="https://github.com/Farama-Foundation/Gymnasium/tree/main">Gymnasium</a>. |
| 6 | + |
| 7 | +.. code-block:: |
| 8 | +
|
| 9 | + #include "rlenvs/rlenvs_types_v2.h" |
| 10 | + #include "rlenvs/envs/gymnasium/toy_text/frozen_lake_env.h" |
| 11 | + #include "rlenvs/envs/api_server/apiserver.h" |
| 12 | +
|
| 13 | + #include <iostream> |
| 14 | + #include <string> |
| 15 | + #include <unordered_map> |
| 16 | + #include <any> |
| 17 | +
|
| 18 | + namespace example_1{ |
| 19 | +
|
| 20 | + const std::string SERVER_URL = "http://0.0.0.0:8001/api"; |
| 21 | +
|
| 22 | + using rlenvscpp::envs::gymnasium::FrozenLake; |
| 23 | + using rlenvscpp::envs::RESTApiServerWrapper; |
| 24 | +
|
| 25 | +
|
| 26 | + void test_frozen_lake(const RESTApiServerWrapper& server){ |
| 27 | +
|
| 28 | + FrozenLake<4> env(server); |
| 29 | +
|
| 30 | + std::cout<<"Environame URL: "<<env.get_url()<<std::endl; |
| 31 | +
|
| 32 | + // make the environment |
| 33 | + std::unordered_map<std::string, std::any> options; |
| 34 | + options.insert({"is_slippery", false}); |
| 35 | + env.make("v1", options); |
| 36 | +
|
| 37 | + std::cout<<"Is environment created? "<<env.is_created()<<std::endl; |
| 38 | + std::cout<<"Is environment alive? "<<env.is_alive()<<std::endl; |
| 39 | + std::cout<<"Number of valid actions? "<<env.n_actions()<<std::endl; |
| 40 | + std::cout<<"Number of states? "<<env.n_states()<<std::endl; |
| 41 | +
|
| 42 | + // reset the environment |
| 43 | + auto time_step = env.reset(42, std::unordered_map<std::string, std::any>()); |
| 44 | +
|
| 45 | + std::cout<<"Reward on reset: "<<time_step.reward()<<std::endl; |
| 46 | + std::cout<<"Observation on reset: "<<time_step.observation()<<std::endl; |
| 47 | + std::cout<<"Is terminal state: "<<time_step.done()<<std::endl; |
| 48 | +
|
| 49 | + //...print the time_step |
| 50 | + std::cout<<time_step<<std::endl; |
| 51 | +
|
| 52 | + // take an action in the environment |
| 53 | + // 2 = RIGHT |
| 54 | + auto new_time_step = env.step(2); |
| 55 | +
|
| 56 | + std::cout<<new_time_step<<std::endl; |
| 57 | +
|
| 58 | + // get the dynamics of the environment for the given state and action |
| 59 | + auto state = 0; |
| 60 | + auto action = 1; |
| 61 | + auto dynamics = env.p(state, action); |
| 62 | +
|
| 63 | + std::cout<<"Dynamics for state="<<state<<" and action="<<action<<std::endl; |
| 64 | +
|
| 65 | + for(auto item:dynamics){ |
| 66 | +
|
| 67 | + std::cout<<std::get<0>(item)<<std::endl; |
| 68 | + std::cout<<std::get<1>(item)<<std::endl; |
| 69 | + std::cout<<std::get<2>(item)<<std::endl; |
| 70 | + std::cout<<std::get<3>(item)<<std::endl; |
| 71 | + } |
| 72 | + |
| 73 | + action = env.sample_action(); |
| 74 | + new_time_step = env.step(action); |
| 75 | +
|
| 76 | + std::cout<<new_time_step<<std::endl; |
| 77 | + |
| 78 | + // synchronize the environment |
| 79 | + env.sync(std::unordered_map<std::string, std::any>()); |
| 80 | + |
| 81 | + auto copy_env = env.make_copy(1); |
| 82 | + copy_env.reset(); |
| 83 | + |
| 84 | + std::cout<<"Org env cidx: "<<env.cidx()<<std::endl; |
| 85 | + std::cout<<"Copy env cidx: "<<copy_env.cidx()<<std::endl; |
| 86 | + |
| 87 | + copy_env.close(); |
| 88 | +
|
| 89 | + // close the environment |
| 90 | + env.close(); |
| 91 | +
|
| 92 | + } |
| 93 | +
|
| 94 | + } |
| 95 | +
|
| 96 | +
|
| 97 | + int main(){ |
| 98 | +
|
| 99 | + using namespace example_1; |
| 100 | + |
| 101 | + RESTApiServerWrapper server(SERVER_URL, true); |
| 102 | +
|
| 103 | + std::cout<<"Testing FrozenLake..."<<std::endl; |
| 104 | + example_1::test_frozen_lake(server); |
| 105 | + std::cout<<"===================="<<std::endl; |
| 106 | + return 0; |
| 107 | + } |
| 108 | +
|
| 109 | +
|
| 110 | +
|
| 111 | +
|
| 112 | +In general, the environments exposed by the library follow the semantics in <a href="https://github.com/deepmind/dm_env/blob/master/docs/index.md">Environment API and Semantics</a> specification. |
| 113 | +For more details see the <a href="doc/env_spec.md">```rlenvscpp``` environment specification</a> document. |
| 114 | + |
| 115 | +The general use case is to build the library and link it with your driver code to access its functionality. |
| 116 | +The environments specified as using REST in the tables above, that is all ```Gymnasium```, ```gym_pybullet_drones``` and ```GymWalk``` |
| 117 | +environments are accessed via a client/server pattern. Namely, they are exposed via an API developed using |
| 118 | +<a href="https://fastapi.tiangolo.com/">FastAPI</a>. |
| 119 | +You need to fire up the FastAPI server, see dependencies, before using the environments in your code. |
| 120 | +To do so |
| 121 | + |
| 122 | +`` |
| 123 | +./start_uvicorn.sh |
| 124 | +`` |
| 125 | + |
| 126 | +By default the ```uvicorn``` server listents on port 8001. Change this if needed. You can access the OpenAPI specification at |
| 127 | + |
| 128 | +`` |
| 129 | +http://0.0.0.0:8001/docs |
| 130 | +`` |
| 131 | + |
| 132 | +Note that currently the implementation is not thread/process safe i.e. if multiple threads/processes access the environment |
| 133 | +a global instance of the environment is manipulated. Thus no session based environment exists. |
| 134 | +However, you can create copies of the same environment and access this via its dedicate index. |
| 135 | +If just one thread/process touches this specific environment you should be ok. |
| 136 | +Notice that the FastAPI server only uses a single process to manage all the environments. |
| 137 | +In addition, if you need multiple instances of the same environment you can also use one |
| 138 | +of the exissting vectorised environments (see table above). |
| 139 | + |
| 140 | +Finally, you can choose to launch several instances of ```uvirocrn``` (listening on different ports). |
| 141 | +However in this case you need to implement all the interactions logic yourself as currently no implementation exists to handle such a scenario. |
0 commit comments