diff --git a/ext/cuda/bellman/dense.jl b/ext/cuda/bellman/dense.jl index c51c435d..43fb6502 100644 --- a/ext/cuda/bellman/dense.jl +++ b/ext/cuda/bellman/dense.jl @@ -1,4 +1,4 @@ -function IntervalMDP._bellman_helper!( +function IntervalMDP._expectation_helper!( workspace::CuDenseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, Vres::AbstractVector{Tv}, diff --git a/ext/cuda/bellman/factored.jl b/ext/cuda/bellman/factored.jl index 666b60e8..8d9e0c0e 100644 --- a/ext/cuda/bellman/factored.jl +++ b/ext/cuda/bellman/factored.jl @@ -1,4 +1,4 @@ -function IntervalMDP._bellman_helper!( +function IntervalMDP._expectation_helper!( workspace::CuFactoredOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, Vres::AbstractArray{Tv}, diff --git a/ext/cuda/bellman/sparse.jl b/ext/cuda/bellman/sparse.jl index 74662424..975d4440 100644 --- a/ext/cuda/bellman/sparse.jl +++ b/ext/cuda/bellman/sparse.jl @@ -1,4 +1,4 @@ -function IntervalMDP._bellman_helper!( +function IntervalMDP._expectation_helper!( workspace::CuSparseOMaxWorkspace, strategy_cache::IntervalMDP.AbstractStrategyCache, Vres::AbstractVector{Tv}, diff --git a/src/IntervalMDP.jl b/src/IntervalMDP.jl index 71dbab39..01e1f3de 100644 --- a/src/IntervalMDP.jl +++ b/src/IntervalMDP.jl @@ -62,6 +62,8 @@ include("threading.jl") include("workspace.jl") include("strategy_cache.jl") include("bellman.jl") +include("value.jl") +include("state_sampling.jl") include("robust_value_iteration.jl") diff --git a/src/bellman.jl b/src/bellman.jl index 5d5afcc5..6175403a 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -1,5 +1,5 @@ """ - bellman(V, model; upper_bound = false, maximize = true) + expectation(V, model; upper_bound = false, maximize = true) Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V`. @@ -56,7 +56,7 @@ istates = [Int32(1)] model = IntervalMarkovDecisionProcess(transition_probs, istates) Vprev = [1.0, 2.0, 3.0] -Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) +Vcur = IntervalMDP.expectation(Vprev, model; upper_bound = false) # output @@ -71,20 +71,22 @@ Vcur = IntervalMDP.bellman(Vprev, model; upper_bound = false) For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects. """ -function bellman( +function expectation( V, model, + update_sequence = sample(default_sampling_strategy(), model), alg::BellmanAlgorithm = default_bellman_algorithm(model); upper_bound = false, maximize = true, prop = nothing, ) - Vres = similar(V, source_shape(model)) + Vres = Array{eltype(V)}(undef, (action_values(model)..., size(V)...)) - return bellman!( + return expectation!( Vres, V, model, + update_sequence, alg; upper_bound = upper_bound, maximize = maximize, @@ -93,7 +95,7 @@ function bellman( end """ - bellman!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true) + expectation!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true) Compute in-place robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V`. @@ -159,7 +161,7 @@ workspace = IntervalMDP.construct_workspace(model) strategy_cache = IntervalMDP.construct_strategy_cache(model) Vcur = similar(Vprev) -IntervalMDP.bellman!(workspace, strategy_cache, Vcur, Vprev, model; upper_bound = false, maximize = true) +IntervalMDP.expectation!(workspace, strategy_cache, Vcur, Vprev, model; upper_bound = false, maximize = true) # output @@ -169,12 +171,13 @@ IntervalMDP.bellman!(workspace, strategy_cache, Vcur, Vprev, model; upper_bound 3.0 ``` """ -function bellman! end +function expectation! end -function bellman!( +function expectation!( Vres::AbstractArray, V::AbstractArray, model, + update_sequence = sample(default_sampling_strategy(), model), alg::BellmanAlgorithm = default_bellman_algorithm(model); upper_bound = false, maximize = true, @@ -183,45 +186,49 @@ function bellman!( workspace = construct_workspace(model, alg) strategy_cache = construct_strategy_cache(model) - return bellman!( + return expectation!( workspace, strategy_cache, Vres, V, - model; + model, + update_sequence; upper_bound = upper_bound, maximize = maximize, prop = prop, ) end -function bellman!( +function expectation!( workspace, strategy_cache, Vres::AbstractArray, V::AbstractArray, - model::IntervalMarkovProcess; + model::IntervalMarkovProcess, + update_sequence = sample(default_sampling_strategy(), model, strategy_cache); upper_bound = false, maximize = true, prop = nothing, ) - return _bellman_helper!( + return _expectation_helper!( workspace, strategy_cache, Vres, V, - model; + model, + update_sequence; upper_bound = upper_bound, maximize = maximize, ) end -function bellman!( +function expectation!( workspace::ProductWorkspace, strategy_cache, Vres::AbstractArray, V::AbstractArray, - model::ProductProcess; + model::ProductProcess, + update_sequence; upper_bound = false, maximize = true, prop = nothing, @@ -230,28 +237,30 @@ function bellman!( lf = labelling_function(model) dfa = automaton(model) - return _bellman_helper!( + return _expectation_helper!( workspace, strategy_cache, Vres, V, dfa, lf, - mp; + mp, + update_sequence; upper_bound = upper_bound, maximize = maximize, prop = prop, ) end -function _bellman_helper!( +function _expectation_helper!( workspace::ProductWorkspace, strategy_cache::AbstractStrategyCache, Vres, V, dfa::DFA, lf::DeterministicLabelling, - mp::IntervalMarkovProcess; + mp::IntervalMarkovProcess, + update_sequence; upper_bound = false, maximize = true, prop = nothing, @@ -274,12 +283,13 @@ function _bellman_helper!( # For each state in the product process, compute the Bellman operator # for the corresponding Markov process - bellman!( + expectation!( workspace.underlying_workspace, local_strategy_cache, selectdim(Vres, ndims(Vres), state), W, - mp; + mp, + update_sequence; #TODO: need to separate automata states upper_bound = upper_bound, maximize = maximize, ) @@ -288,14 +298,15 @@ function _bellman_helper!( return Vres end -function _bellman_helper!( +function _expectation_helper!( workspace::ProductWorkspace, strategy_cache::AbstractStrategyCache, Vres, V::AbstractArray{R}, dfa::DFA, lf::ProbabilisticLabelling, - mp::IntervalMarkovProcess; + mp::IntervalMarkovProcess, + update_sequence; upper_bound = false, maximize = true, prop = nothing, @@ -325,12 +336,13 @@ function _bellman_helper!( # For each state in the product process, compute the Bellman operator # for the corresponding Markov process - bellman!( + expectation!( workspace.underlying_workspace, local_strategy_cache, selectdim(Vres, ndims(Vres), state), W, - mp; + mp, + update_sequence; #TODO: need to separate automata states upper_bound = upper_bound, maximize = maximize, ) @@ -370,118 +382,136 @@ end ########################################################################### # Non-threaded -function _bellman_helper!( +function _expectation_helper!( workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, - strategy_cache::AbstractStrategyCache, + strategy_cache::OptimizingStrategyCache, Vres, V, - model; + model, + update_sequence = sample(default_sampling_strategy(), model, strategy_cache); upper_bound = false, maximize = true, ) - bellman_precomputation!(workspace, V, upper_bound) + expectation_precomputation!(workspace, V, upper_bound) + + marginal = marginals(model)[1] + + @inbounds for (jₐ, jₛ) in update_sequence + ambiguity_set = marginal[jₐ, jₛ] + budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] + + Vres[jₐ, jₛ] = + state_action_expectation(workspace, V, ambiguity_set, budget, upper_bound) + end + + return Vres +end + +function _expectation_helper!( + workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, + strategy_cache::NonOptimizingStrategyCache, + Vres, + V, + model, + update_sequence = sample(default_sampling_strategy(), model, strategy_cache); + upper_bound = false, + maximize = true, +) + expectation_precomputation!(workspace, V, upper_bound) + + marginal = marginals(model)[1] - for jₛ in CartesianIndices(source_shape(model)) - state_bellman!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) + @inbounds for (jₐ, jₛ) in update_sequence + ambiguity_set = marginal[jₐ, jₛ] + budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] + + Vres[jₛ] = + state_action_expectation(workspace, V, ambiguity_set, budget, upper_bound) end return Vres end # Threaded -function _bellman_helper!( +function _expectation_helper!( workspace::Union{ ThreadedDenseIntervalOMaxWorkspace, ThreadedSparseIntervalOMaxWorkspace, }, - strategy_cache::AbstractStrategyCache, + strategy_cache::OptimizingStrategyCache, Vres, V, - model; + model, + update_sequence = sample(default_sampling_strategy(), model, strategy_cache); upper_bound = false, maximize = true, ) - @inbounds bellman_precomputation!(workspace, V, upper_bound) - @threadstid tid for jₛ in CartesianIndices(source_shape(model)) + @inbounds expectation_precomputation!(workspace, V, upper_bound) + + marginal = marginals(model)[1] + + @threadstid tid for (jₐ, jₛ) in update_sequence @inbounds ws = workspace[tid] - @inbounds state_bellman!( - ws, - strategy_cache, - Vres, - V, - model, - jₛ, - upper_bound, - maximize, - ) + + @inbounds ambiguity_set = marginal[jₐ, jₛ] + @inbounds budget = ws.budget[sub2ind(marginal, jₐ, jₛ)] + @inbounds Vres[jₐ, jₛ] = + state_action_expectation(ws, V, ambiguity_set, budget, upper_bound) + end return Vres end -Base.@propagate_inbounds function bellman_precomputation!( - workspace::Union{DenseIntervalOMaxWorkspace, ThreadedDenseIntervalOMaxWorkspace}, - V, - upper_bound, -) - # rev=true for upper bound - sortperm!(permutation(workspace), V; rev = upper_bound, scratch = scratch(workspace)) -end - -Base.@propagate_inbounds bellman_precomputation!( - workspace::Union{SparseIntervalOMaxWorkspace, ThreadedSparseIntervalOMaxWorkspace}, - V, - upper_bound, -) = nothing - -Base.@propagate_inbounds function state_bellman!( - workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, - strategy_cache::OptimizingStrategyCache, +function _expectation_helper!( + workspace::Union{ + ThreadedDenseIntervalOMaxWorkspace, + ThreadedSparseIntervalOMaxWorkspace, + }, + strategy_cache::NonOptimizingStrategyCache, Vres, V, model, - jₛ, - upper_bound, - maximize, + update_sequence = sample(default_sampling_strategy(), model, strategy_cache); + upper_bound = false, + maximize = true, ) + + @inbounds expectation_precomputation!(workspace, V, upper_bound) + marginal = marginals(model)[1] - for jₐ in available(model, jₛ) - ambiguity_set = marginal[jₐ, jₛ] - budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] - workspace.actions[jₐ] = - state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) + @threadstid tid for (jₐ, jₛ) in update_sequence + @inbounds ws = workspace[tid] + + @inbounds ambiguity_set = marginal[jₐ, jₛ] + @inbounds budget = ws.budget[sub2ind(marginal, jₐ, jₛ)] + @inbounds Vres[jₛ] = + state_action_expectation(ws, V, ambiguity_set, budget, upper_bound) + end - Vres[jₛ] = extract_strategy!( - strategy_cache, - workspace.actions, - available(model, jₛ), - jₛ, - maximize, - ) + return Vres end -Base.@propagate_inbounds function state_bellman!( - workspace::Union{DenseIntervalOMaxWorkspace, SparseIntervalOMaxWorkspace}, - strategy_cache::NonOptimizingStrategyCache, - Vres, +Base.@propagate_inbounds function expectation_precomputation!( + workspace::Union{DenseIntervalOMaxWorkspace, ThreadedDenseIntervalOMaxWorkspace}, V, - model, - jₛ, upper_bound, - maximize, ) - marginal = marginals(model)[1] - - jₐ = CartesianIndex(strategy_cache[jₛ]) - ambiguity_set = marginal[jₐ, jₛ] - budget = workspace.budget[sub2ind(marginal, jₐ, jₛ)] - Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_set, budget, upper_bound) + # rev=true for upper bound + sortperm!(permutation(workspace), V; rev = upper_bound, scratch = scratch(workspace)) end -Base.@propagate_inbounds function state_action_bellman( +Base.@propagate_inbounds expectation_precomputation!( + workspace::Union{SparseIntervalOMaxWorkspace, ThreadedSparseIntervalOMaxWorkspace}, + V, + upper_bound, +) = nothing + + +Base.@propagate_inbounds function state_action_expectation( workspace::DenseIntervalOMaxWorkspace, V, ambiguity_set, @@ -513,7 +543,8 @@ Base.@propagate_inbounds function gap_value( return res end -Base.@propagate_inbounds function state_action_bellman( + +Base.@propagate_inbounds function state_action_expectation( workspace::SparseIntervalOMaxWorkspace, V, ambiguity_set, @@ -556,7 +587,7 @@ end ########################################################## # Non-threaded -function _bellman_helper!( +function _expectation_helper!( workspace::FactoredIntervalMcCormickWorkspace, strategy_cache::AbstractStrategyCache, Vres, @@ -566,14 +597,14 @@ function _bellman_helper!( maximize = true, ) @inbounds for jₛ in CartesianIndices(source_shape(model)) - state_bellman!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) + state_expectation!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) end return Vres end # Threaded -function _bellman_helper!( +function _expectation_helper!( workspace::ThreadedFactoredIntervalMcCormickWorkspace, strategy_cache::AbstractStrategyCache, Vres, @@ -584,7 +615,7 @@ function _bellman_helper!( ) @threadstid tid for jₛ in CartesianIndices(source_shape(model)) @inbounds ws = workspace[tid] - @inbounds state_bellman!( + @inbounds state_expectation!( ws, strategy_cache, Vres, @@ -599,7 +630,7 @@ function _bellman_helper!( return Vres end -Base.@propagate_inbounds function state_bellman!( +Base.@propagate_inbounds function state_expectation!( workspace::FactoredIntervalMcCormickWorkspace, strategy_cache::OptimizingStrategyCache, Vres, @@ -612,7 +643,7 @@ Base.@propagate_inbounds function state_bellman!( for jₐ in available(model, jₛ) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) workspace.actions[jₐ] = - state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + state_action_expectation(workspace, V, ambiguity_sets, upper_bound) end Vres[jₛ] = extract_strategy!( @@ -624,7 +655,7 @@ Base.@propagate_inbounds function state_bellman!( ) end -Base.@propagate_inbounds function state_bellman!( +Base.@propagate_inbounds function state_expectation!( workspace::FactoredIntervalMcCormickWorkspace, strategy_cache::NonOptimizingStrategyCache, Vres, @@ -636,10 +667,10 @@ Base.@propagate_inbounds function state_bellman!( ) jₐ = CartesianIndex(strategy_cache[jₛ]) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) - Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + Vres[jₛ] = state_action_expectation(workspace, V, ambiguity_sets, upper_bound) end -Base.@propagate_inbounds function state_action_bellman( +Base.@propagate_inbounds function state_action_expectation( workspace::FactoredIntervalMcCormickWorkspace, V::AbstractArray{R}, ambiguity_sets, @@ -738,7 +769,7 @@ end #################################################### # O-Maximization-based Bellman operator for fIMDPs # #################################################### -function _bellman_helper!( +function _expectation_helper!( workspace::FactoredIntervalOMaxWorkspace, strategy_cache::AbstractStrategyCache, Vres, @@ -749,7 +780,7 @@ function _bellman_helper!( ) # For each source state @inbounds for jₛ in CartesianIndices(source_shape(model)) - state_bellman!( + state_expectation!( workspace, strategy_cache, Vres, @@ -764,7 +795,7 @@ function _bellman_helper!( return Vres end -function _bellman_helper!( +function _expectation_helper!( workspace::ThreadedFactoredIntervalOMaxWorkspace, strategy_cache::AbstractStrategyCache, Vres, @@ -777,7 +808,7 @@ function _bellman_helper!( @threadstid tid for jₛ in CartesianIndices(source_shape(model)) @inbounds ws = workspace[tid] - @inbounds state_bellman!( + @inbounds state_expectation!( ws, strategy_cache, Vres, @@ -792,7 +823,7 @@ function _bellman_helper!( return Vres end -Base.@propagate_inbounds function state_bellman!( +Base.@propagate_inbounds function state_expectation!( workspace::FactoredIntervalOMaxWorkspace, strategy_cache::OptimizingStrategyCache, Vres, @@ -808,7 +839,7 @@ Base.@propagate_inbounds function state_bellman!( budgets = getindex.(workspace.budgets, inds) workspace.actions[jₐ] = - state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) + state_action_expectation(workspace, V, model, ambiguity_sets, budgets, upper_bound) end Vres[jₛ] = extract_strategy!( @@ -820,7 +851,7 @@ Base.@propagate_inbounds function state_bellman!( ) end -Base.@propagate_inbounds function state_bellman!( +Base.@propagate_inbounds function state_expectation!( workspace::FactoredIntervalOMaxWorkspace, strategy_cache::NonOptimizingStrategyCache, Vres, @@ -835,10 +866,10 @@ Base.@propagate_inbounds function state_bellman!( inds = map(marginal -> sub2ind(marginal, jₐ, jₛ), marginals(model)) budgets = getindex.(workspace.budgets, inds) Vres[jₛ] = - state_action_bellman(workspace, V, model, ambiguity_sets, budgets, upper_bound) + state_action_expectation(workspace, V, model, ambiguity_sets, budgets, upper_bound) end -Base.@propagate_inbounds function state_action_bellman( +Base.@propagate_inbounds function state_action_expectation( workspace::FactoredIntervalOMaxWorkspace, V, model, @@ -919,7 +950,7 @@ end ########################################################## # Non-threaded -function _bellman_helper!( +function _expectation_helper!( workspace::FactoredVertexIteratorWorkspace, strategy_cache::AbstractStrategyCache, Vres, @@ -929,14 +960,14 @@ function _bellman_helper!( maximize = true, ) @inbounds for jₛ in CartesianIndices(source_shape(model)) - state_bellman!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) + state_expectation!(workspace, strategy_cache, Vres, V, model, jₛ, upper_bound, maximize) end return Vres end # Threaded -function _bellman_helper!( +function _expectation_helper!( workspace::ThreadedFactoredVertexIteratorWorkspace, strategy_cache::AbstractStrategyCache, Vres, @@ -947,7 +978,7 @@ function _bellman_helper!( ) @threadstid tid for jₛ in CartesianIndices(source_shape(model)) @inbounds ws = workspace[tid] - @inbounds state_bellman!( + @inbounds state_expectation!( ws, strategy_cache, Vres, @@ -962,7 +993,7 @@ function _bellman_helper!( return Vres end -Base.@propagate_inbounds function state_bellman!( +Base.@propagate_inbounds function state_expectation!( workspace::FactoredVertexIteratorWorkspace, strategy_cache::OptimizingStrategyCache, Vres, @@ -975,7 +1006,7 @@ Base.@propagate_inbounds function state_bellman!( for jₐ in available(model, jₛ) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) workspace.actions[jₐ] = - state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + state_action_expectation(workspace, V, ambiguity_sets, upper_bound) end Vres[jₛ] = extract_strategy!( @@ -987,7 +1018,7 @@ Base.@propagate_inbounds function state_bellman!( ) end -Base.@propagate_inbounds function state_bellman!( +Base.@propagate_inbounds function state_expectation!( workspace::FactoredVertexIteratorWorkspace, strategy_cache::NonOptimizingStrategyCache, Vres, @@ -999,10 +1030,10 @@ Base.@propagate_inbounds function state_bellman!( ) jₐ = CartesianIndex(strategy_cache[jₛ]) ambiguity_sets = getindex.(marginals(model), jₐ, jₛ) - Vres[jₛ] = state_action_bellman(workspace, V, ambiguity_sets, upper_bound) + Vres[jₛ] = state_action_expectation(workspace, V, ambiguity_sets, upper_bound) end -Base.@propagate_inbounds function state_action_bellman( +Base.@propagate_inbounds function state_action_expectation( workspace::FactoredVertexIteratorWorkspace, V::AbstractArray{R}, ambiguity_sets, diff --git a/src/robust_value_iteration.jl b/src/robust_value_iteration.jl index b5f5c308..060d65a9 100644 --- a/src/robust_value_iteration.jl +++ b/src/robust_value_iteration.jl @@ -175,12 +175,13 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback = # It is more efficient to use allocate first and reuse across iterations workspace = construct_workspace(mp, bellman_algorithm(alg)) strategy_cache = construct_strategy_cache(problem) + sampling_strat = sampling_strategy(alg) - value_function = ValueFunction(problem) + value_function = StateValueFunction(problem) initialize!(value_function, spec) nextiteration!(value_function) - step!(workspace, strategy_cache, value_function, 0, mp, spec) + bellman_update!(workspace, strategy_cache, sampling_strat, value_function, 0, mp, spec) k = 1 if !isnothing(callback) @@ -190,7 +191,7 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback = while !term_criteria(value_function.current, k, lastdiff!(value_function)) nextiteration!(value_function) - step!(workspace, strategy_cache, value_function, k, mp, spec) + bellman_update!(workspace, strategy_cache, sampling_strat, value_function, k, mp, spec) k += 1 if !isnothing(callback) @@ -205,41 +206,48 @@ function _value_iteration!(problem::AbstractIntervalMDPProblem, alg; callback = return value_function.current, k, value_function.previous, strategy_cache end -struct ValueFunction{R, A <: AbstractArray{R}} - previous::A - current::A -end +function bellman_update!(workspace, strategy_cache, sampling_strat::SamplingStrategy, value_function::StateValueFunction, k, mp, spec) -function ValueFunction(problem::AbstractIntervalMDPProblem) - mp = system(problem) - previous = arrayfactory(mp, valuetype(mp), state_values(mp)) - previous .= zero(valuetype(mp)) - current = copy(previous) + update_sequence = sample(sampling_strat, mp, select_strategy_cache(strategy_cache, k)) - return ValueFunction(previous, current) -end + # 1. compute expectation for Q(s, a) + expectation!( + workspace, + select_strategy_cache(strategy_cache, k), + value_function.intermediate_state_action_value, + value_function.previous, + select_model(mp, k), # For time-varying available and labelling functions + update_sequence; + upper_bound = isoptimistic(spec), + maximize = ismaximize(spec), + prop = system_property(spec), + ) -function lastdiff!(V::ValueFunction{R}) where {R} - # Reuse prev to store the latest difference - V.previous .-= V.current - rmul!(V.previous, -one(R)) + # 2. extract strategy and compute V'(s) = max_a Q(s, a) + strategy!( + select_strategy_cache(strategy_cache, k), + value_function.current, + value_function.intermediate_state_action_value, + select_model(mp, k), + ismaximize(spec), + ) - return V.previous + # 3. post process to compute V(s) = g(s, V'(s)) where the definition of g depends on the objective + step_postprocess_value_function!(value_function, spec) + step_postprocess_strategy_cache!(strategy_cache) end -function nextiteration!(V) - copy!(V.previous, V.current) +function bellman_update!(workspace, strategy_cache::NonOptimizingStrategyCache, sampling_strat::SamplingStrategy, value_function::StateValueFunction, k, mp, spec) - return V -end + update_sequence = sample(sampling_strat, mp, select_strategy_cache(strategy_cache, k)) -function step!(workspace, strategy_cache, value_function, k, mp, spec) - bellman!( + expectation!( workspace, select_strategy_cache(strategy_cache, k), value_function.current, value_function.previous, - select_model(mp, k); # For time-varying available and labelling functions + select_model(mp, k), # For time-varying available and labelling functions + update_sequence; upper_bound = isoptimistic(spec), maximize = ismaximize(spec), prop = system_property(spec), diff --git a/src/state_sampling.jl b/src/state_sampling.jl new file mode 100644 index 00000000..87cc0300 --- /dev/null +++ b/src/state_sampling.jl @@ -0,0 +1,283 @@ +################################### +# Custom Iterator Implementations # +################################### + +abstract type AbstractIterator end +function Base.length(iter::AbstractIterator) end +function Base.firstindex(iter::AbstractIterator) end +function Base.lastindex(iter::AbstractIterator) end +function Base.getindex(iter::AbstractIterator, i) end +function Base.iterate(iter::AbstractIterator) end +function Base.iterate(iter::AbstractIterator, state) end + + +struct ProductIterator{AI, SI} <: AbstractIterator + A::AI + S::SI + nA::Int + nS::Int + + function ProductIterator(A, S) + nA = length(A) + nS = length(S) + new{typeof(A), typeof(S)}(A, S, nA, nS) + end +end + +Base.length(iter::ProductIterator) = iter.nA * iter.nS +Base.firstindex(iter::ProductIterator) = (firstindex(iter.S)-1)*iter.nS + firstindex(iter.A) +Base.lastindex(iter::ProductIterator) = (lastindex(iter.S)-1)*iter.nS + lastindex(iter.A) +Base.getindex(iter::ProductIterator, i) = begin + A = iter.A + S = iter.S + + nA = iter.nA + nS = iter.nS + + ia = ((i - 1) % nA) + firstindex(A) + is = ((i - 1) ÷ nA) + firstindex(S) + return (A[ia], S[is]) +end +Base.iterate(iter::ProductIterator) = begin + (iter.nA == 0 || iter.nS == 0) && return nothing + + A = iter.A + S = iter.S + + ia = firstindex(A) + is = firstindex(S) + + return ((A[ia], S[is]), (ia, is)) +end + +Base.iterate(iter::ProductIterator, state) = begin + A = iter.A + S = iter.S + + ia, is = state + + # iterate s as outer loop due to column major order of value function Q(a, s) + + # 1. advance inner loop (actions) + ia += 1 + + if ia > lastindex(A) + # 2. reset inner loop and advance outer loop (states) + ia = firstindex(A) + is += 1 + end + + # 3. loop exit condition + if is > lastindex(S) + return nothing + end + + return ((A[ia], S[is]), (ia, is)) +end + +struct ZipIterator{AI, SI} <: AbstractIterator + A::AI + S::SI + n::Int + + function ZipIterator(A, S) + nA = length(A) + nS = length(S) + + @assert nA == nS "Action and state spaces must have the same length for ZipIterator" + + new{typeof(A), typeof(S)}(A, S, nA) + end +end + +Base.length(iter::ZipIterator) = iter.n +Base.firstindex(iter::ZipIterator) = 1 +Base.lastindex(iter::ZipIterator) = iter.n +Base.getindex(iter::ZipIterator, i) = begin + A = iter.A + S = iter.S + + return (A[i], S[i]) +end + +Base.iterate(iter::ZipIterator) = begin + iter.n == 0 && return nothing + + A = iter.A + S = iter.S + + i = 1 + return ((A[firstindex(A)-1+i], S[firstindex(S)-1+i]), i) +end + +Base.iterate(iter::ZipIterator, i) = begin + A = iter.A + S = iter.S + + i += 1 + if i > iter.n + return nothing + end + + return ((A[firstindex(A)-1+i], S[firstindex(S)-1+i]), i) +end + +struct OnPolicyActionIterator <: AbstractIterator + S::CartesianIndices + strategy_cache::AbstractStrategyCache + + function OnPolicyActionIterator(S::CartesianIndices, strategy_cache::AbstractStrategyCache) + new(S, strategy_cache) + end +end +Base.length(iter::OnPolicyActionIterator) = length(iter.S) +Base.firstindex(iter::OnPolicyActionIterator) = firstindex(iter.S) +Base.lastindex(iter::OnPolicyActionIterator) = lastindex(iter.S) +Base.getindex(iter::OnPolicyActionIterator, i) = CartesianIndex(iter.strategy_cache[i]) +Base.iterate(iter::OnPolicyActionIterator) = begin + length(iter) == 0 && return nothing + + S = iter.S + strategy_cache = iter.strategy_cache + + i = firstindex(S) + return (CartesianIndex(strategy_cache[i]), i) +end +Base.iterate(iter::OnPolicyActionIterator, i) = begin + S = iter.S + strategy_cache = iter.strategy_cache + + i += 1 + if i > lastindex(S) + return nothing + end + + return (CartesianIndex(strategy_cache[i]), i) +end + + +struct GivenSequenceIterator{NA, NS, T} <: AbstractIterator + sequence::Vector{Tuple{NTuple{NA, T}, NTuple{NS, T}}} +end + +Base.length(iter::GivenSequenceIterator) = length(iter.sequence) +Base.firstindex(iter::GivenSequenceIterator) = firstindex(iter.sequence) +Base.lastindex(iter::GivenSequenceIterator) = lastindex(iter.sequence) +Base.getindex(iter::GivenSequenceIterator, i) = begin + a, s = getindex(iter.sequence, i) + + return (CartesianIndex(a...), CartesianIndex(s...)) +end +Base.iterate(iter::GivenSequenceIterator) = begin + next = iterate(iter.sequence) + + if next === nothing + return nothing + end + + value, index = next + a, s = value + + return ((CartesianIndex(a...), CartesianIndex(s...)), index) +end + +Base.iterate(iter::GivenSequenceIterator, state) = begin + next = iterate(iter.sequence, state) + + if next === nothing + return nothing + end + + value, index = next + a, s = value + + return ((CartesianIndex(a...), CartesianIndex(s...)), index) +end + + + + +################################### +# Sampling Strategies # +################################### + +abstract type SamplingStrategy end + +function sample(::SamplingStrategy, model) end + +struct AllSampling <: SamplingStrategy end + +default_sampling_strategy() = AllSampling() + +sample(::AllSampling, model) = exhaustive_cartesian(model) + +sample(::AllSampling, model, strategy_cache::AbstractStrategyCache) = exhaustive_cartesian(model, strategy_cache) + +exhaustive_cartesian(model::FactoredRMDP) = exhaustive_cartesian(model, modeltype(model)) +exhaustive_cartesian(model::FactoredRMDP, ::IsIMDP) = ProductIterator(CartesianIndices(action_shape(model)), CartesianIndices(source_shape(model))) +exhaustive_cartesian(model::IntervalAmbiguitySets) = ProductIterator(CartesianIndices(action_shape(model)), CartesianIndices(source_shape(model))) + +exhaustive_cartesian(model, strategy_cache::OptimizingStrategyCache) = exhaustive_cartesian(model) +exhaustive_cartesian(model::FactoredRMDP, strategy_cache::NonOptimizingStrategyCache) = exhaustive_cartesian(model, modeltype(model), strategy_cache) + +function exhaustive_cartesian(model::FactoredRMDP, ::IsIMDP, strategy_cache::NonOptimizingStrategyCache) + + S = CartesianIndices(source_shape(model)) + A = OnPolicyActionIterator(S, strategy_cache) + + return ZipIterator(A, S) +end + +function exhaustive_cartesian(model::IntervalAmbiguitySets, strategy_cache::NonOptimizingStrategyCache) + S = CartesianIndices(source_shape(model)) + A = OnPolicyActionIterator(S, strategy_cache) + + return ZipIterator(A, S) +end + + + + +struct GivenSequence <: SamplingStrategy end + +function sample(::GivenSequence, + model, + sequence::Vector{Tuple{NTuple{N, T}, NTuple{M, T}}} # each element: (state_tuple, action_tuple + ) where {N, M, T<:Integer} + return custom_sequence(model, sequence) +end + +function custom_sequence( + model::FactoredRMDP, + sequence::Vector{Tuple{NTuple{N, T}, NTuple{M, T}}} +)::AbstractVector{Tuple{CartesianIndex{N}, CartesianIndex{M}}} where {N, M, T<:Integer} + + # Precompute model shapes + shape_s = source_shape(model) # state shape tuple + shape_a = action_shape(model) # action shape tuple + + # Validate each state-action pair + for (s, a) in sequence + # check all entries >= 1 + @assert all(x -> x >= 1, s) + @assert all(x -> x >= 1, a) + + # check each entry within bounds + @assert all((xi, yi) -> xi <= yi, zip(s, shape_s)) + @assert all((xi, yi) -> xi <= yi, zip(a, shape_a)) + end + + return GivenSequenceIterator(sequence) +end + +# TODO: 1. random sampling of states, with or without replacement, with or without weighting (e.g. based on current value function) +# TODO: - subset of states each iteration? +# TODO: - one state per iteration? +# TODO: +# TODO: 2. (epsilon) greedy on policy trajectory simulation +# TODO: 3. BRTDP gap based trajectory simulation +# TODO: + + +### Robust Value Iteration +sampling_strategy(alg::RobustValueIteration) = AllSampling() diff --git a/src/strategy_cache.jl b/src/strategy_cache.jl index 0da47b12..a8512c73 100644 --- a/src/strategy_cache.jl +++ b/src/strategy_cache.jl @@ -159,3 +159,26 @@ function _extract_strategy!(cur_strategy, values, available_actions, neutral, j @inbounds cur_strategy[jₛ] = opt_index return opt_val end + + +function strategy!( + strategy_cache::OptimizingStrategyCache, + Vres::AbstractArray{R}, + Q::AbstractArray{R}, + model, + maximize, +) where {R <: Real} + + #TODO: can be threaded? + @inbounds for jₛ in CartesianIndices(source_shape(model)) + + Vres[jₛ] = extract_strategy!( + strategy_cache, + @view(Q[:, jₛ]), + available(model, jₛ), + jₛ, + maximize, + ) + + end +end \ No newline at end of file diff --git a/src/value.jl b/src/value.jl new file mode 100644 index 00000000..314236aa --- /dev/null +++ b/src/value.jl @@ -0,0 +1,73 @@ +abstract type ValueFunction end + +struct StateValueFunction{R, A1 <: AbstractArray{R}, A2 <: AbstractArray{R}} <: ValueFunction + previous::A1 + current::A1 + intermediate_state_action_value::A2 +end + +function StateValueFunction(problem::AbstractIntervalMDPProblem) + mp = system(problem) + previous = arrayfactory(mp, valuetype(mp), state_values(mp)) + previous .= zero(valuetype(mp)) + current = copy(previous) + + dim = (action_values(mp)..., state_values(mp)...) + # concat gives shape: (a1, a2) , (s1, s2) => (a1, a2, s1, s2) + # (a, s) to access a more frequently due to column major + # TODO: works for IMDP, need to check for fIMDP + intermediate_state_action_value = arrayfactory(mp, valuetype(mp), dim) + intermediate_state_action_value .= zero(valuetype(mp)) + + return StateValueFunction(previous, current, intermediate_state_action_value) +end + +function lastdiff!(V::StateValueFunction{R}) where {R} + # Reuse prev to store the latest difference + V.previous .-= V.current + rmul!(V.previous, -one(R)) + + return V.previous +end + +function nextiteration!(V::StateValueFunction) + copy!(V.previous, V.current) + + return V +end + + +struct StateActionValueFunction{R, A1 <: AbstractArray{R}, A2 <: AbstractArray{R}} <: ValueFunction + previous::A1 + current::A1 + intermediate_state_value::A2 +end + +function StateActionValueFunction(problem::AbstractIntervalMDPProblem) + mp = system(problem) + dim = (action_values(mp)..., state_values(mp)...) + # TODO: works for IMDP, need to check for fIMDP + previous = arrayfactory(mp, valuetype(mp), dim) + previous .= zero(valuetype(mp)) + current = copy(previous) + + intermediate_state_value = arrayfactory(mp, valuetype(mp), state_values(mp)) + intermediate_state_value .= zero(valuetype(mp)) + + return StateActionValueFunction(previous, current, intermediate_state_value) +end + + +function lastdiff!(V::StateActionValueFunction{R}) where {R} + # Reuse prev to store the latest difference + V.previous .-= V.current + rmul!(V.previous, -one(R)) + + return V.previous +end + +function nextiteration!(V::StateActionValueFunction) + copy!(V.previous, V.current) + + return V +end \ No newline at end of file diff --git a/src/workspace.jl b/src/workspace.jl index c36ca4c4..56a7b4bc 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -53,7 +53,6 @@ struct DenseIntervalOMaxWorkspace{T <: Real} budget::Vector{T} scratch::Vector{Int32} permutation::Vector{Int32} - actions::Vector{T} end function DenseIntervalOMaxWorkspace( @@ -63,8 +62,7 @@ function DenseIntervalOMaxWorkspace( budget = 1 .- vec(sum(ambiguity_set.lower; dims = 1)) scratch = Vector{Int32}(undef, num_target(ambiguity_set)) perm = Vector{Int32}(undef, num_target(ambiguity_set)) - actions = Vector{R}(undef, nactions) - return DenseIntervalOMaxWorkspace(budget, scratch, perm, actions) + return DenseIntervalOMaxWorkspace(budget, scratch, perm) end permutation(ws::DenseIntervalOMaxWorkspace) = ws.permutation @@ -83,7 +81,7 @@ function ThreadedDenseIntervalOMaxWorkspace( perm = Vector{Int32}(undef, num_target(ambiguity_set)) workspaces = [ - DenseIntervalOMaxWorkspace(budget, scratch, perm, Vector{R}(undef, nactions)) + DenseIntervalOMaxWorkspace(budget, scratch, perm) for _ in 1:Threads.nthreads() ] return ThreadedDenseIntervalOMaxWorkspace(workspaces) diff --git a/test/base/bellman.jl b/test/base/bellman.jl index b09107b0..e981d233 100644 --- a/test/base/bellman.jl +++ b/test/base/bellman.jl @@ -13,41 +13,41 @@ using IntervalMDP @testset "maximization" begin ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) - @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] + Vres = zeros(N, 1, 2) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + @test Vres ≈ N[27 // 10, 17 // 10]' # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) - @test Vres ≈ N[27 // 10, 17 // 10] + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + @test Vres ≈ N[27 // 10, 17 // 10]' ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) - @test Vres ≈ N[27 // 10, 17 // 10] + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + @test Vres ≈ N[27 // 10, 17 // 10]' end #### Minimization @testset "minimization" begin ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) - Vres = zeros(N, 2) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) - @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] + Vres = zeros(N, 1, 2) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + @test Vres ≈ N[17 // 10, 15 // 10]' # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) - @test Vres ≈ N[17 // 10, 15 // 10] + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + @test Vres ≈ N[17 // 10, 15 // 10]' ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) - @test Vres ≈ N[17 // 10, 15 // 10] + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + @test Vres ≈ N[17 // 10, 15 // 10]' end end diff --git a/test/base/factored.jl b/test/base/factored.jl index 7f9cbbc5..526e0b38 100644 --- a/test/base/factored.jl +++ b/test/base/factored.jl @@ -424,19 +424,19 @@ end end @testset "maximization" begin - Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent + Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -445,42 +445,42 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected end @testset "minimization" begin - Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent + Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -489,25 +489,25 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected end end @@ -584,7 +584,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) @test V_vertex ≈ N[ 1076//75 4279//300 167//15 @@ -594,7 +594,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -612,7 +612,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -621,13 +621,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -644,13 +644,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -659,7 +659,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) @test V_vertex ≈ N[ 4399//450 41//5 488//45 @@ -669,7 +669,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -687,7 +687,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -696,13 +696,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -719,13 +719,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end @@ -786,7 +786,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -799,7 +799,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -818,7 +818,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -835,7 +835,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -849,7 +849,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -867,7 +867,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -881,7 +881,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -897,7 +897,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -910,7 +910,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -929,7 +929,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -946,7 +946,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -960,7 +960,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -978,7 +978,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -992,7 +992,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1009,7 +1009,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -1022,7 +1022,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1041,7 +1041,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1058,7 +1058,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1072,7 +1072,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1090,7 +1090,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1104,7 +1104,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1120,7 +1120,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -1133,7 +1133,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1152,7 +1152,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1169,7 +1169,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1183,7 +1183,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1201,7 +1201,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1215,7 +1215,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1333,12 +1333,12 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1356,7 +1356,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -1365,13 +1365,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1388,13 +1388,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -1403,12 +1403,12 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1426,7 +1426,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -1435,13 +1435,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1458,13 +1458,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end diff --git a/test/base/imdp.jl b/test/base/imdp.jl index b3055b31..b0971713 100644 --- a/test/base/imdp.jl +++ b/test/base/imdp.jl @@ -49,34 +49,30 @@ using IntervalMDP @testset "bellman" begin V = N[1, 2, 3] - Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[ - (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, - (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, - 1 * 3, + 17//10 18//10 3//1 + 15//10 21//10 3//1 ] - Vres = similar(Vres) - IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) + Vres = Array{N}(undef, (2, 3)) + IntervalMDP.expectation!(Vres, V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[ - (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, - (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, - 1 * 3, + 17//10 18//10 3//1 + 15//10 21//10 3//1 ] - Vres = IntervalMDP.bellman(V, mdp; upper_bound = true, maximize = false) + Vres = IntervalMDP.expectation(V, mdp; upper_bound = true, maximize = false) @test Vres ≈ N[ - (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, - (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3, - 1 * 3, + 27//10 23//10 3//1 + 17//10 22//10 3//1 ] - Vres = similar(Vres) - IntervalMDP.bellman!(Vres, V, mdp; upper_bound = true, maximize = false) + Vres = Array{N}(undef, (2, 3)) + IntervalMDP.expectation!(Vres, V, mdp; upper_bound = true, maximize = false) @test Vres ≈ N[ - (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, - (1 // 5) * 1 + (2 // 5) * 2 + (2 // 5) * 3, - 1 * 3, + 27//10 23//10 3//1 + 17//10 22//10 3//1 ] end diff --git a/test/base/mixture.jl b/test/base/mixture.jl index c697b4b1..dc6c7643 100644 --- a/test/base/mixture.jl +++ b/test/base/mixture.jl @@ -62,7 +62,7 @@ for N in [Float32, Float64] ws = IntervalMDP.construct_workspace(mixture_prob) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -76,7 +76,7 @@ for N in [Float32, Float64] ws = IntervalMDP.MixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -90,7 +90,7 @@ for N in [Float32, Float64] ws = IntervalMDP.ThreadedMixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -111,7 +111,7 @@ for N in [Float32, Float64] ws = IntervalMDP.construct_workspace(mixture_prob) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -125,7 +125,7 @@ for N in [Float32, Float64] ws = IntervalMDP.MixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -139,7 +139,7 @@ for N in [Float32, Float64] ws = IntervalMDP.ThreadedMixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -214,7 +214,7 @@ for N in [Float32, Float64] ws = IntervalMDP.construct_workspace(mixture_prob) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -228,7 +228,7 @@ for N in [Float32, Float64] ws = IntervalMDP.MixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -242,7 +242,7 @@ for N in [Float32, Float64] ws = IntervalMDP.ThreadedMixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -263,7 +263,7 @@ for N in [Float32, Float64] ws = IntervalMDP.construct_workspace(mixture_prob) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -277,7 +277,7 @@ for N in [Float32, Float64] ws = IntervalMDP.MixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -291,7 +291,7 @@ for N in [Float32, Float64] ws = IntervalMDP.ThreadedMixtureWorkspace(mixture_prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(mixture_prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, diff --git a/test/base/product.jl b/test/base/product.jl index ed49f562..cbcec314 100644 --- a/test/base/product.jl +++ b/test/base/product.jl @@ -137,13 +137,16 @@ end 0 5 ] - Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false) - @test Vres ≈ N[ + Vtar = N[ 30//10 24//10 33//10 2 5 5 ] + Vtar = reshape(Vtar, 1, size(Vtar)...) + + @test Vres ≈ Vtar end end end @@ -220,7 +223,7 @@ end ] # No Strategy - Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false) @test Vtar ≈ Vres atol=eps # Non Stationary Strategy (Init iteration) @@ -229,7 +232,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -251,7 +254,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -273,7 +276,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -290,7 +293,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -312,7 +315,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -334,7 +337,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -356,7 +359,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -421,7 +424,7 @@ end 45//10 5 ] - Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false) @test Vres ≈ Vtar end end @@ -504,7 +507,7 @@ end ] # No Strategy - Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + Vres = IntervalMDP.expectation(V, prod_proc; upper_bound = false) @test Vtar ≈ Vres atol=eps # Non Stationary Strategy @@ -513,7 +516,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -535,7 +538,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -557,7 +560,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -574,7 +577,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -596,7 +599,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -618,7 +621,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, @@ -640,7 +643,7 @@ end Vres = copy(V) - Vres = IntervalMDP.bellman!( + Vres = IntervalMDP.expectation!( workspace, strategy_cache, Vres, diff --git a/test/cuda/dense/bellman.jl b/test/cuda/dense/bellman.jl index d5ff36c5..9475a5a1 100644 --- a/test/cuda/dense/bellman.jl +++ b/test/cuda/dense/bellman.jl @@ -23,7 +23,7 @@ using IntervalMDP, CUDA ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[27 // 10, 17 // 10] # [0.3 * 2 + 0.7 * 3, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] end @@ -33,7 +33,7 @@ using IntervalMDP, CUDA ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[17 // 10, 15 // 10] # [0.5 * 1 + 0.3 * 2 + 0.2 * 3, 0.6 * 1 + 0.3 * 2 + 0.1 * 3] end diff --git a/test/cuda/dense/imdp.jl b/test/cuda/dense/imdp.jl index d70f7483..67bb4e09 100644 --- a/test/cuda/dense/imdp.jl +++ b/test/cuda/dense/imdp.jl @@ -48,7 +48,7 @@ using IntervalMDP, CUDA @testset "bellman" begin V = IntervalMDP.cu(N[1, 2, 3]) - Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[ (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, diff --git a/test/cuda/sparse/bellman.jl b/test/cuda/sparse/bellman.jl index d930fddd..97837709 100644 --- a/test/cuda/sparse/bellman.jl +++ b/test/cuda/sparse/bellman.jl @@ -36,7 +36,7 @@ for N in [Float32, Float64] ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -54,7 +54,7 @@ for N in [Float32, Float64] ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = CUDA.zeros(N, 2) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, Vres, @@ -113,7 +113,7 @@ end ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_cpu, @@ -125,7 +125,7 @@ end ws = IntervalMDP.construct_workspace(cuda_prob) strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_gpu, @@ -151,7 +151,7 @@ end ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_cpu, @@ -163,7 +163,7 @@ end ws = IntervalMDP.construct_workspace(cuda_prob) strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_gpu, @@ -189,7 +189,7 @@ end ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_cpu, @@ -201,7 +201,7 @@ end ws = IntervalMDP.construct_workspace(cuda_prob) strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_gpu, @@ -227,7 +227,7 @@ end ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_cpu, @@ -239,7 +239,7 @@ end ws = IntervalMDP.construct_workspace(cuda_prob) strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_gpu, @@ -265,7 +265,7 @@ end ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) V_cpu = zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_cpu, @@ -277,7 +277,7 @@ end ws = IntervalMDP.construct_workspace(cuda_prob) strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) V_gpu = CUDA.zeros(Float64, m) - IntervalMDP._bellman_helper!( + IntervalMDP._expectation_helper!( ws, strategy_cache, V_gpu, @@ -303,7 +303,7 @@ end ws = IntervalMDP.construct_workspace(cuda_prob) strategy_cache = IntervalMDP.construct_strategy_cache(cuda_prob) V_gpu = CUDA.zeros(Float64, m) - @test_throws IntervalMDP.OutOfSharedMemory IntervalMDP._bellman_helper!( + @test_throws IntervalMDP.OutOfSharedMemory IntervalMDP._expectation_helper!( ws, strategy_cache, V_gpu, diff --git a/test/cuda/sparse/imdp.jl b/test/cuda/sparse/imdp.jl index a68301bb..39fd6161 100644 --- a/test/cuda/sparse/imdp.jl +++ b/test/cuda/sparse/imdp.jl @@ -51,7 +51,7 @@ using IntervalMDP, CUDA, SparseArrays @testset "bellman" begin V = IntervalMDP.cu(N[1, 2, 3]) - Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true) Vres = IntervalMDP.cpu(Vres) # Convert to CPU for testing @test Vres ≈ N[ (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, diff --git a/test/sparse/bellman.jl b/test/sparse/bellman.jl index 1aaa9276..bd9ff5c5 100644 --- a/test/sparse/bellman.jl +++ b/test/sparse/bellman.jl @@ -20,19 +20,19 @@ using IntervalMDP, SparseArrays ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = true) @test Vres ≈ N[82 // 10, 57 // 10] # [0.3 * 4 + 0.7 * 10, 0.5 * 1 + 0.3 * 2 + 0.2 * 3] end @@ -41,19 +41,19 @@ using IntervalMDP, SparseArrays ws = IntervalMDP.construct_workspace(prob) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = zeros(N, 2) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] ws = IntervalMDP.DenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] ws = IntervalMDP.ThreadedDenseIntervalOMaxWorkspace(prob, 1) strategy_cache = IntervalMDP.construct_strategy_cache(prob) Vres = similar(Vres) - IntervalMDP._bellman_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) + IntervalMDP._expectation_helper!(ws, strategy_cache, Vres, V, prob; upper_bound = false) @test Vres ≈ N[37 // 10, 55 // 10] # [0.5 * 1 + 0.3 * 4 + 0.2 * 10, 0.6 * 5 + 0.3 * 6 + 0.1 * 7] end end diff --git a/test/sparse/factored.jl b/test/sparse/factored.jl index b768ea1b..4baa4016 100644 --- a/test/sparse/factored.jl +++ b/test/sparse/factored.jl @@ -90,19 +90,19 @@ end end @testset "maximization" begin - Vexpected = IntervalMDP.bellman(V, imc; upper_bound = true) # Using O-maximization, should be equivalent + Vexpected = IntervalMDP.expectation(V, imc; upper_bound = true) # Using O-maximization, should be equivalent ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -111,42 +111,42 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = true) @test Vres ≈ Vexpected end @testset "minimization" begin - Vexpected = IntervalMDP.bellman(V, imc; upper_bound = false) # Using O-maximization, should be equivalent + Vexpected = IntervalMDP.expectation(V, imc; upper_bound = false) # Using O-maximization, should be equivalent ws = IntervalMDP.construct_workspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredIntervalMcCormickWorkspace(imc, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -155,25 +155,25 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.construct_workspace(imc, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = zeros(N, 3) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.FactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected ws = IntervalMDP.ThreadedFactoredVertexIteratorWorkspace(imc) strategy_cache = IntervalMDP.construct_strategy_cache(imc) Vres = similar(Vres) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, imc; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, imc; upper_bound = false) @test Vres ≈ Vexpected end end @@ -244,7 +244,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) @test V_vertex ≈ N[ 1076//75 4279//300 1081//75 @@ -254,7 +254,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -272,7 +272,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -281,13 +281,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -304,13 +304,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -319,7 +319,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) @test V_vertex ≈ N[ 412//45 41//5 488//45 @@ -329,7 +329,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -347,7 +347,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -356,13 +356,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -379,13 +379,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end @@ -446,7 +446,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -459,7 +459,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -478,7 +478,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -495,7 +495,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -509,7 +509,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -527,7 +527,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -541,7 +541,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -557,7 +557,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -570,7 +570,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -589,7 +589,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -606,7 +606,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -620,7 +620,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -638,7 +638,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -652,7 +652,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -669,7 +669,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -682,7 +682,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -701,7 +701,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -718,7 +718,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -732,7 +732,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -750,7 +750,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -764,7 +764,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -780,7 +780,7 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, V_vertex, @@ -793,7 +793,7 @@ end ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -812,7 +812,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -829,7 +829,7 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -843,7 +843,7 @@ end ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 2, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -861,7 +861,7 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -875,7 +875,7 @@ end ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres, @@ -1005,12 +1005,12 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1028,7 +1028,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -1037,13 +1037,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1060,13 +1060,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -1075,12 +1075,12 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1098,7 +1098,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -1107,13 +1107,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1130,13 +1130,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end @@ -1254,12 +1254,12 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = true) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1277,7 +1277,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -1286,13 +1286,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1309,13 +1309,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = true) @test Vres ≈ Vres_first_OMax end @@ -1324,12 +1324,12 @@ end ws = IntervalMDP.construct_workspace(mdp, VertexEnumeration()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) V_vertex = zeros(N, 3, 3, 3) - IntervalMDP.bellman!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, V_vertex, V, mdp; upper_bound = false) ws = IntervalMDP.construct_workspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_McCormick = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_McCormick, @@ -1347,7 +1347,7 @@ end IntervalMDP.FactoredIntervalMcCormickWorkspace(mdp, LPMcCormickRelaxation()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.ThreadedFactoredIntervalMcCormickWorkspace( @@ -1356,13 +1356,13 @@ end ) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_McCormick) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_McCormick ws = IntervalMDP.construct_workspace(mdp, OMaximization()) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres_first_OMax = zeros(N, 3, 3, 3) - IntervalMDP.bellman!( + IntervalMDP.expectation!( ws, strategy_cache, Vres_first_OMax, @@ -1379,13 +1379,13 @@ end ws = IntervalMDP.FactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax ws = IntervalMDP.ThreadedFactoredIntervalOMaxWorkspace(mdp) strategy_cache = IntervalMDP.construct_strategy_cache(mdp) Vres = similar(Vres_first_OMax) - IntervalMDP.bellman!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) + IntervalMDP.expectation!(ws, strategy_cache, Vres, V, mdp; upper_bound = false) @test Vres ≈ Vres_first_OMax end end diff --git a/test/sparse/imdp.jl b/test/sparse/imdp.jl index bfae3ba9..63ff30a0 100644 --- a/test/sparse/imdp.jl +++ b/test/sparse/imdp.jl @@ -51,7 +51,7 @@ using IntervalMDP, SparseArrays @testset "bellman" begin V = N[1, 2, 3] - Vres = IntervalMDP.bellman(V, mdp; upper_bound = false, maximize = true) + Vres = IntervalMDP.expectation(V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[ (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3, @@ -59,7 +59,7 @@ using IntervalMDP, SparseArrays ] Vres = similar(Vres) - IntervalMDP.bellman!(Vres, V, mdp; upper_bound = false, maximize = true) + IntervalMDP.expectation!(Vres, V, mdp; upper_bound = false, maximize = true) @test Vres ≈ N[ (1 // 2) * 1 + (3 // 10) * 2 + (1 // 5) * 3, (3 // 10) * 1 + (3 // 10) * 2 + (2 // 5) * 3,