Skip to content

Commit afd6688

Browse files
committed
Fix bug with BlackJack environment
1 parent 6bab3a5 commit afd6688

File tree

3 files changed

+69
-20
lines changed

3 files changed

+69
-20
lines changed

examples/example_1/example_1.cpp

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ void test_frozen_lake(RESTRLEnvClient& server){
4747
std::cout<<"Environment version: "<<env.version()<<std::endl;
4848

4949
// once the env is created we can get it's id
50-
std::cout<<"Environment is: "<<env.idx()<<std::endl;
50+
std::cout<<"Environment idx is: "<<env.idx()<<std::endl;
5151

5252
// the create flag should be true
5353
std::cout<<"Is environment created? "<<env.is_created()<<std::endl;
@@ -107,8 +107,6 @@ void test_taxi(RESTRLEnvClient& server){
107107

108108
Taxi env(server);
109109

110-
// environment name can also be accessed via env.env_name()
111-
std::cout<<"Is environment registered: "<<server.is_env_registered(env.env_name())<<std::endl;
112110
std::cout<<"Environment URL: "<<env.get_url()<<std::endl;
113111

114112
// make the environment
@@ -121,7 +119,7 @@ void test_taxi(RESTRLEnvClient& server){
121119
std::cout<<"Environment version: "<<env.version()<<std::endl;
122120

123121
// once the env is created we can get it's id
124-
std::cout<<"Environment is: "<<env.idx()<<std::endl;
122+
std::cout<<"Environment idx is: "<<env.idx()<<std::endl;
125123

126124
// the create flag should be true
127125
std::cout<<"Is environment created? "<<env.is_created()<<std::endl;
@@ -165,7 +163,6 @@ void test_taxi(RESTRLEnvClient& server){
165163

166164
// close the environment
167165
env.close();
168-
169166
}
170167

171168

@@ -207,12 +204,24 @@ void test_cliff_world(RESTRLEnvClient& server){
207204
options["max_episode_steps"] = std::any(static_cast<bitrl::uint_t>(10));
208205
std::unordered_map<std::string, std::any> reset_ops;
209206
reset_ops.insert({"seed", static_cast<uint_t>(42)});
210-
env.make("v0", options, reset_ops);
207+
env.make("v1", options, reset_ops);
208+
209+
// query the environemnt version
210+
std::cout<<"Environment version: "<<env.version()<<std::endl;
211211

212-
std::cout<<"Is environment created? "<<env.is_created()<<std::endl;
213-
std::cout<<"Is environment alive? "<<env.is_alive()<<std::endl;
214-
std::cout<<"Number of valid actions? "<<env.n_actions()<<std::endl;
215-
std::cout<<"Number of states? "<<env.n_states()<<std::endl;
212+
// once the env is created we can get it's id
213+
std::cout<<"Environment idx is: "<<env.idx()<<std::endl;
214+
215+
// the create flag should be true
216+
std::cout<<"Is environment created? "<<env.is_created()<<std::endl;
217+
218+
// environment should be alive on the server
219+
std::cout<<"Is environment alive? "<<env.is_alive()<<std::endl;
220+
221+
// FrozenLake is a discrete state-action env so we can
222+
// query number of actions and states
223+
std::cout<<"Number of valid actions? "<<env.n_actions()<<std::endl;
224+
std::cout<<"Number of states? "<<env.n_states()<<std::endl;
216225

217226
// reset the environment
218227
auto time_step = env.reset();
@@ -265,10 +274,10 @@ int main(){
265274
example_1::test_taxi(server);
266275
std::cout<<"===================="<<std::endl;
267276
std::cout<<"Testing BlackJack..."<<std::endl;
268-
//example_1::test_black_jack(server);
277+
example_1::test_black_jack(server);
269278
std::cout<<"===================="<<std::endl;
270279
std::cout<<"Testing CliffWorld..."<<std::endl;
271-
//example_1::test_cliff_world(server);
280+
example_1::test_cliff_world(server);
272281
std::cout<<"===================="<<std::endl;
273282
return 0;
274283
}

src/bitrl/envs/gymnasium/toy_text/black_jack_env.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ namespace envs::gymnasium
3939

4040
BlackJack::BlackJack(network::RESTRLEnvClient& api_server)
4141
:
42-
ToyTextEnvBase<TimeStep<uint_t>, 48, 2>(api_server, BlackJack::name)
42+
ToyTextEnvBase<TimeStep<std::vector<uint_t>>, 48, 2>(api_server, BlackJack::name)
4343
{
4444
this -> get_api_server().register_if_not(BlackJack::name, BlackJack::URI);
4545
}
4646

4747
BlackJack::BlackJack(const BlackJack& other)
4848
:
49-
ToyTextEnvBase<TimeStep<uint_t>, 48, 2>(other),
49+
ToyTextEnvBase<TimeStep<std::vector<uint_t>>, 48, 2>(other),
5050
is_natural_(other.is_natural_),
5151
is_sab_(other.is_sab_)
5252
{}

src/bitrl/envs/gymnasium/toy_text/black_jack_env.h

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,51 @@ namespace bitrl{
2323
namespace envs::gymnasium
2424
{
2525

26-
///
27-
/// \brief The BlackJack class. Wrapper to the Blackjack
28-
/// OpenAI-Gym environment.
29-
///
30-
class BlackJack final: public ToyTextEnvBase<TimeStep<uint_t>, 48, 2>
26+
27+
/**
28+
* BlackJack class. Wrapper to the Blackjack OpenAI-Gym environment.
29+
*
30+
* This environment is part of the Toy Text environments which contains general information about the environment.
31+
* Action Space: Discrete(2)
32+
* Observation Space: Tuple(Discrete(32), Discrete(11), Discrete(2))
33+
* Blackjack is a card game where the goal is to beat the dealer by obtaining cards that sum to closer
34+
* to 21 (without going over 21) than the dealers cards.
35+
* The game starts with the dealer having one face up and one face down card, while the player has two face up cards.
36+
* All cards are drawn from an infinite deck (i.e. with replacement).
37+
* The card values are:
38+
* - Face cards (Jack, Queen, King) have a point value of 10.
39+
* - Aces can either count as 11 (called a ‘usable ace’) or 1.
40+
* - Numerical cards (2-10) have a value equal to their number.
41+
* The player has the sum of cards held.
42+
* The player can request additional cards (hit) until they decide to stop (stick) or exceed 21 (bust, immediate loss).
43+
* After the player sticks, the dealer reveals their facedown card, and draws cards until their sum is 17 or greater.
44+
* If the dealer goes bust, the player wins.
45+
* If neither the player nor the dealer busts, the outcome (win, lose, draw) is decided by whose sum is closer to 21.
46+
* This environment corresponds to the version of the blackjack problem described
47+
* in Example 5.1 in Reinforcement Learning: An Introduction by Sutton and Barto [1].
48+
*
49+
* Action Space
50+
* The action shape is (1,) in the range {0, 1} indicating whether to stick or hit.
51+
* 0: Stick
52+
* 1: Hit
53+
* Observation Space
54+
* The observation consists of a 3-tuple containing: the player’s current sum,
55+
* the value of the dealer’s one showing card (1-10 where 1 is ace), and whether the player holds a usable ace (0 or 1).
56+
* The observation is returned as (int(), int(), int()).
57+
* Rewards
58+
* win game: +1
59+
* lose game: -1
60+
* draw game: 0
61+
* win game with natural blackjack: +1.5 (if natural is True) +1 (if natural is False)
62+
* Episode End
63+
* The episode ends if the following happens:
64+
* Termination:
65+
* The player hits and the sum of hand exceeds 21.
66+
* The player sticks.
67+
* An ace will always be counted as usable (11) unless it busts the player.
68+
*
69+
*/
70+
class BlackJack final: public ToyTextEnvBase<TimeStep<std::vector<uint_t>>, 48, 2>
3171
{
3272

3373
public:
@@ -45,7 +85,7 @@ class BlackJack final: public ToyTextEnvBase<TimeStep<uint_t>, 48, 2>
4585
///
4686
/// \brief The base type
4787
///
48-
typedef typename ToyTextEnvBase<TimeStep<uint_t>, 48, 2>::base_type base_type;
88+
typedef typename ToyTextEnvBase<TimeStep<std::vector<uint_t>>, 48, 2>::base_type base_type;
4989

5090
///
5191
/// \brief The time step type we return every time a step in the

0 commit comments

Comments
 (0)