@@ -23,7 +23,7 @@ const p = DecisionMakingProblems
23
23
@test ! p. is_terminal (m, state)
24
24
@test min_state <= p. vec (p. cart_pole_transition (m, state, rand (1 : 2 ))) <= max_state
25
25
@test p. reward (m, state, rand (1 : 2 )) in [0.0 , 1.0 ]
26
- p. MDP (m)
26
+ mdp = p. MDP (m)
27
27
end
28
28
29
29
@testset " collision_avoidance.jl" begin
35
35
@test length (p. vec (rand (p. transition (m, s, optimal_pol (s))))) == 4
36
36
@test p. is_terminal (m, s) == (p. vec (s)[4 ] < 0.0 )
37
37
@test p. reward (m, rand (p. transition (m, s, optimal_pol (s))), rand (m. 𝒜)) <= 0
38
- p. CollisionAvoidanceValueFunction (m, simple_pol)
38
+ policy = p. CollisionAvoidanceValueFunction (m, simple_pol)
39
+ mdp = p. MDP (m)
39
40
end
40
41
41
42
@testset " hexworld.jl" begin
55
56
@test p. generate_sr (m, state, action)[1 ] in p. ordered_states (m) && p. generate_sr (m, state, action)[2 ] <= 10
56
57
@test p. generate_start_state (m) in p. ordered_states (m)
57
58
@test p. hex_distance (rand (hexes), rand (hexes)) >= 0
58
- mdp = p. DiscreteMDP (m)
59
+ mdp = p. MDP (m)
59
60
end
60
61
@testset " simple_lqr.jl" begin
61
62
m = p. LqrMDP ()
62
63
@test p. discount (m) == 1.0
63
64
state = p. generate_start_state (m)
64
65
@test - 10 <= rand (p. transition (m, state, rand ())) <= 10
65
66
@test p. reward (m, state, rand ()) <= 0
66
- p. MDP (m)
67
+ mdp = p. MDP (m)
67
68
end
68
69
69
70
@testset " mountain_car.jl" begin
76
77
@test all (state_min <= start_state <= state_max)
77
78
@test all (state_min <= p. mountain_car_transition (start_state, 1 ) <= state_max)
78
79
@test p. reward (m, start_state, 1 ) <= 0
79
- p. MDP (m)
80
+ mdp = p. MDP (m)
80
81
end
81
82
82
83
91
92
@test 0 <= p. observation (m, rand (1 : 3 ), rand (1 : 2 )). p <= 1
92
93
@test p. reward (m, rand (1 : 2 ), rand (1 : 3 )) <= 0
93
94
@test p. reward (m, [0.1 , 0.9 ], rand (1 : 3 )) <= 0
94
- pomdp = p. DiscretePOMDP (m)
95
+ pomdp = p. POMDP (m)
95
96
end
96
97
97
98
@testset " machine_replacement.jl" begin
105
106
@test rand (p. transition (m, rand (1 : 3 ), rand (1 : 4 ))) in 1 : 3
106
107
@test rand (p. observation (m, rand (1 : 4 ), rand (1 : 3 ))) in 1 : 2
107
108
@test p. reward (m, rand (1 : 3 ), rand (1 : 4 )) <= 1.0
108
- p. POMDP (m)
109
+ pomdp = p. POMDP (m)
109
110
end
110
111
111
112
@testset " catch.jl" begin
119
120
@test rand (p. transition (m, rand (1 : 4 ), rand (1 : 10 ))) in 1 : 4
120
121
@test rand (p. observation (m, rand (1 : 10 ), rand (1 : 4 ))) in 1 : 2
121
122
@test p. reward (m, rand (1 : 4 ), rand (1 : 10 )) >= 0
122
- p. POMDP (m)
123
+ pomdp = p. POMDP (m)
123
124
end
124
125
125
126
130
131
@test p. n_actions (m, rand (1 : 2 )) == 2 && p. n_joint_actions (m) == 4
131
132
@test p. reward (m, rand (1 : 2 ), [rand (p. ordered_actions (m, 0 )), rand (p. ordered_actions (m, 0 ))]) <= 0.0
132
133
@test p. joint_reward (m, [rand (p. ordered_actions (m, 0 )), rand (p. ordered_actions (m, 0 ))]) <= [0.0 , 0.0 ]
133
- p. SimpleGame (m)
134
+ simplegame = p. SimpleGame (m)
134
135
end
135
136
136
137
@testset " rock_paper_scissors.jl" begin
140
141
@test p. n_actions (m, rand (1 : 2 )) == 3 && p. n_joint_actions (m) == 9
141
142
@test - 1.0 <= p. reward (m, rand (1 : 2 ), [rand (p. ordered_actions (m, 0 )), rand (p. ordered_actions (m, 0 ))]) <= 1.0
142
143
@test [- 1.0 , - 1.0 ] <= p. joint_reward (m, [rand (p. ordered_actions (m, 0 )), rand (p. ordered_actions (m, 0 ))]) <= [1.0 , 1.0 ]
143
- p. SimpleGame (m)
144
+ simplegame = p. SimpleGame (m)
144
145
end
145
146
146
147
@testset " travelers.jl" begin
150
151
@test p. n_actions (m, rand (1 : 2 )) == 99 && p. n_joint_actions (m) == 99 ^ 2
151
152
@test 0.0 <= p. reward (m, rand (1 : 2 ), [rand (p. ordered_actions (m, 0 )), rand (p. ordered_actions (m, 0 ))]) <= 102
152
153
@test [0.0 , 0.0 ] <= p. joint_reward (m, [rand (p. ordered_actions (m, 0 )), rand (p. ordered_actions (m, 0 ))]) <= [102 , 102 ]
153
- p. SimpleGame (m)
154
+ simplegame = p. SimpleGame (m)
154
155
end
155
156
156
157
@testset " predator_prey.jl" begin
164
165
@test 0.0 <= p. transition (m, rand (p. ordered_states (m)), rand (p. ordered_joint_actions (m)), rand (p. ordered_states (m))) <= 1.0
165
166
@test - 1.0 <= p. reward (m, rand (1 : 2 ), rand (p. ordered_states (m)), rand (p. ordered_joint_actions (m))) <= 10.0
166
167
@test [- 1.0 , - 1.0 ] <= p. joint_reward (m, rand (p. ordered_states (m)), rand (p. ordered_joint_actions (m))) <= [10.0 , 10.0 ]
167
- p. MG (m)
168
+ mg = p. MG (m)
168
169
end
169
170
170
171
@testset " multicaregiver.jl" begin
180
181
@test 0.0 <= p. joint_observation (m, rand (p. ordered_joint_actions (m)), rand (p. ordered_states (m)), rand (p. ordered_joint_observations (m))) <= 1.0
181
182
@test p. joint_reward (m, rand (p. ordered_states (m)), rand (p. ordered_joint_actions (m))) <= [0.0 , 0.0 ]
182
183
@test p. joint_reward (m, rand (Float64, 2 ), rand (p. ordered_joint_actions (m))) <= [0.0 , 0.0 ]
183
- p. POMG (m)
184
+ pomg = p. POMG (m)
184
185
end
185
186
186
187
@testset " collab_predator_prey.jl" begin
196
197
@test 0.0 <= p. transition (m, rand (p. ordered_states (m)), rand (p. ordered_joint_actions (m)), rand (p. ordered_states (m))) <= 1.0
197
198
@test 0.0 <= p. joint_observation (m, rand (p. ordered_joint_actions (m)), rand (p. ordered_states (m)), rand (p. ordered_joint_observations (m))) <= 1.0
198
199
@test - 1.0 <= p. reward (m, rand (p. ordered_states (m)), rand (p. ordered_joint_actions (m))) <= 10.0
199
- p. DecPOMDP (m)
200
+ decpomdp = p. DecPOMDP (m)
200
201
end
0 commit comments