Hello, and thank you for reading my question. I am trying to make a 3x3 matrix pomdp game, and I am receiving the following error:
using POMDPs,POMDPModelTools
Base.@kwdef struct pomdp3x3 <: POMDP{Int64, Int64, Int64} # POMDP{State, Action, Observation
discount_factor::Float64 = 0.95 # discount
end
POMDPs.states(pomdp::pomdp3x3) = [1,2,3]
POMDPs.stateindex(pomdp::pomdp3x3, s::Int64) = s
POMDPs.actions(::pomdp3x3) = [1,2,3]
POMDPs.actionindex(pomdp::pomdp3x3, a::Int64) = a
function POMDPs.transition(pomdp::pomdp3x3, s::Int64, a::Int64)
return SparseCat([1, 2, 3], [.10, .30, .60])
end
POMDPs.observations(::pomdp3x3) = [1,2,3,4,5,6,7,8,9]
POMDPs.obsindex(::pomdp3x3, o::Int) = o
function POMDPs.observation(pomdp::pomdp3x3, a::Int64, sp::Int64)
if a == 1 && sp == 1
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.05,.05,.05,.05,.05,.05,.60])
elseif a== 1 && sp == 2
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.05,.05,.05,.05,.05,.60,.05])
elseif a==1 && sp==3
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.05,.05,.05,.05,.60,.05,.05])
elseif a == 2 && sp == 1
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.05,.05,.05,.60,.05,.05,.05])
elseif a== 2 && sp == 2
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.05,.05,.60,.05,.05,.05,.05])
elseif a==2 && sp==3
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.05,.60,.05,.05,.05,.05,.05])
elseif a== 3 && sp == 1
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.05,.60,.05,.05,.05,.05,.05,.05])
elseif a==3 && sp==2
return SparseCat([1,2,3,4,5,6,7,8,9], [.05,.60,.05,.05,.05,.05,.05,.05,.05])
else
return SparseCat([1,2,3,4,5,6,7,8,9], [.60,.05,.05,.05,.05,.05,.05,.05,.05])
end
end
function POMDPs.reward(pomdp::pomdp3x3, s::Int64, a::Int64)
if a == 1 && s == 1
return 0.0
elseif a== 1 && s == 2
return 0.0
elseif a== 1 && s == 3
return 0.0
elseif a== 2 && s == 1
return 0.0
elseif a== 2 && s == 2
return 0.0
elseif a== 2 && s == 3
return 0.0
elseif a== 3 && s == 1
return 0.0
elseif a== 3 && s == 2
return 0.0
elseif a== 3 && s == 3
return 0.0
end
end
POMDPs.initialstate(::pomdp3x3) = SparseCat([1,2,3,4,5,6,7,8,9], [.11,.11,.11,.11,.11,.11,.11,.11,.12])
POMDPs.discount(pomdp::pomdp3x3) = pomdp.discount_factor
m = pomdp3x3()
using QMDP
solver = QMDPSolver()
policy = POMDPs.solve(solver, m)
using POMDPSimulators
rsum = 0.0
for (s,b,a,o,r) in stepthrough(m, policy, "s,b,a,o,r", max_steps=10)
println("s: $s, b: $([pdf(b,s) for s in states(m)]), a: $a, o: $o")
global rsum += r
end
println("Undiscounted reward was $rsum.")
I think the indexing issue is related to either stateindex or actionindex, but I am not sure what to do. I appreciate any suggestions, thanks!
Hello, and thank you for reading my question. I am trying to make a 3x3 matrix pomdp game, and I am receiving the following error:
Here is my code:
I think the indexing issue is related to either stateindex or actionindex, but I am not sure what to do. I appreciate any suggestions, thanks!