Merge branch 'master' of https://github.com/algorithmsbooks/DecisionMakingProblems.jl

SidhartK · SidhartK · commit c2ffdce46f42 · 2021-05-23T22:50:15.000-07:00
diff --git a/README.md b/README.md
@@ -1,18 +1,15 @@
 # DecisionMakingProblems
 
-[![Build Status](https://travis-ci.com/SidhartK/DecisionMakingProblems.jl.svg?branch=master)](https://travis-ci.com/SidhartK/DecisionMakingProblems.jl)
-[![Coverage](https://codecov.io/gh/SidhartK/DecisionMakingProblems.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/SidhartK/DecisionMakingProblems.jl)
-[![Coverage](https://coveralls.io/repos/github/SidhartK/DecisionMakingProblems.jl/badge.svg?branch=master)](https://coveralls.io/github/SidhartK/DecisionMakingProblems.jl?branch=master)
 [![Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://algorithmsbooks.github.io/DecisionMakingProblems.jl/)
 
-Specifically it allows the user to run various environments that are present in Algorithms for Decision Making by Mykel Kochenderfer, Tim Wheeler and Kyle Wray.
+This package contains various decision problem environments from [Algorithms for Decision Making](https://algorithmsbook.com/) by Mykel Kochenderfer, Tim Wheeler and Kyle Wray.
 
 ## Installation
 
 Start Julia and run the following command:
 
 ```julia
-Pkg.add("DecisionMakingProblems")
+] add https://github.com/algorithmsbooks/DecisionMakingProblems.jl
 ```
 
 ## Usage
@@ -26,3 +23,5 @@ using DecisionMakingProblems
 ## Credits
 
 Contributors to this package include Sidhart Krishnan, Tim Wheeler, and Mykel Kochenderfer.
+
+The 2048 implementation was translated from [a C++ implementation](https://github.com/nneonneo/2048-ai) by Robert Xiao.
diff --git a/src/mdp/2048.jl b/src/mdp/2048.jl
@@ -50,44 +50,46 @@ const TWENTY_FORTY_EIGHT_MOVE_STRINGS = ["LEFT", "DOWN", "RIGHT", "UP"]
     γ::Float64 = 1.0
 end
 
-function transition(s::Board, a::TwentyFortyEightAction)
+function transition(mdp::TwentyFortyEight, s::Board, a::TwentyFortyEightAction)
     s′ = move(s, a)
-    if s′ == s
+    if s′ == s # terminal state or illegal action
         return s′
     end
     s′ = insert_tile_rand(s′, draw_tile())
     return s′
 end
 
-function reward(s::Board, a::TwentyFortyEightAction)
+function reward(mdp::TwentyFortyEight, s::Board, a::TwentyFortyEightAction)
     s′ = move(s, a)
-    if s′ == s
+    if s′ == s # terminal state or illegal action
         return -1.0
     end
     s′ = insert_tile_rand(s′, draw_tile())
     return score_board(s′) - score_board(s)
 end
 
+function transition_and_reward(mdp::TwentyFortyEight, s::Board, a::TwentyFortyEightAction)
+    s′ = move(s, a)
+    if s′ == s # terminal state or illegal action
+        return (s′, -1.0)
+    end
+    s′ = insert_tile_rand(s′, draw_tile())
+    r = score_board(s′) - score_board(s)
+    return (s′, r)
+end
+
 
 function MDP(mdp::TwentyFortyEight; γ::Float64=mdp.γ)
     return MDP(
             γ,
             nothing, # no ordered states
             DIRECTIONS,
             nothing, # no probabilistic transition function
-            (s,a) -> reward(s, a),
-            (s, a)->begin
-                s′ = transition(s, a)
-                r = reward(s, a)
-                return (s′, r)
-            end
+            (s,a) -> reward(mdp, s, a),
+            (s, a)-> transition_and_reward(mdp, s,a)
         )
 end
 
-
-
-
-
 """
 Print out a 2048 state.
 """