@@ -50,44 +50,46 @@ const TWENTY_FORTY_EIGHT_MOVE_STRINGS = ["LEFT", "DOWN", "RIGHT", "UP"]
50
50
γ:: Float64 = 1.0
51
51
end
52
52
53
- function transition (s:: Board , a:: TwentyFortyEightAction )
53
+ function transition (mdp :: TwentyFortyEight , s:: Board , a:: TwentyFortyEightAction )
54
54
s′ = move (s, a)
55
- if s′ == s
55
+ if s′ == s # terminal state or illegal action
56
56
return s′
57
57
end
58
58
s′ = insert_tile_rand (s′, draw_tile ())
59
59
return s′
60
60
end
61
61
62
- function reward (s:: Board , a:: TwentyFortyEightAction )
62
+ function reward (mdp :: TwentyFortyEight , s:: Board , a:: TwentyFortyEightAction )
63
63
s′ = move (s, a)
64
- if s′ == s
64
+ if s′ == s # terminal state or illegal action
65
65
return - 1.0
66
66
end
67
67
s′ = insert_tile_rand (s′, draw_tile ())
68
68
return score_board (s′) - score_board (s)
69
69
end
70
70
71
+ function transition_and_reward (mdp:: TwentyFortyEight , s:: Board , a:: TwentyFortyEightAction )
72
+ s′ = move (s, a)
73
+ if s′ == s # terminal state or illegal action
74
+ return (s′, - 1.0 )
75
+ end
76
+ s′ = insert_tile_rand (s′, draw_tile ())
77
+ r = score_board (s′) - score_board (s)
78
+ return (s′, r)
79
+ end
80
+
71
81
72
82
function MDP (mdp:: TwentyFortyEight ; γ:: Float64 = mdp. γ)
73
83
return MDP (
74
84
γ,
75
85
nothing , # no ordered states
76
86
DIRECTIONS,
77
87
nothing , # no probabilistic transition function
78
- (s,a) -> reward (s, a),
79
- (s, a)-> begin
80
- s′ = transition (s, a)
81
- r = reward (s, a)
82
- return (s′, r)
83
- end
88
+ (s,a) -> reward (mdp, s, a),
89
+ (s, a)-> transition_and_reward (mdp, s,a)
84
90
)
85
91
end
86
92
87
-
88
-
89
-
90
-
91
93
"""
92
94
Print out a 2048 state.
93
95
"""
0 commit comments