diff --git a/docs/src/reference/systems.md b/docs/src/reference/systems.md index 9846ffb9..7513ec8e 100644 --- a/docs/src/reference/systems.md +++ b/docs/src/reference/systems.md @@ -68,7 +68,12 @@ num_labels(tf::TransitionFunction) ### Labelling of IMDP states to Automaton alphabet ```@docs -LabellingFunction -mapping(labelling_func::LabellingFunction) -num_labels(labelling_func::LabellingFunction) +DeterministicLabelling +mapping(dl::DeterministicLabelling) +num_labels(dl::DeterministicLabelling) +state_values(dl::DeterministicLabelling) +ProbabilisticLabelling +mapping(pl::ProbabilisticLabelling) +num_labels(pl::ProbabilisticLabelling) +state_values(pl::ProbabilisticLabelling) ``` \ No newline at end of file diff --git a/docs/src/specifications.md b/docs/src/specifications.md index cf70b6d9..5f6ec15c 100644 --- a/docs/src/specifications.md +++ b/docs/src/specifications.md @@ -385,7 +385,7 @@ mdp = IntervalMarkovDecisionProcess(transition_probs, istates) ```@example product_process_example map = [1, 2, 3] # "", "a", "b" -lf = LabellingFunction(map) +lf = DeterministicLabelling(map) product_process = ProductProcess(mdp, dfa, lf) ``` diff --git a/src/bellman.jl b/src/bellman.jl index ed2d92a7..52e0c361 100644 --- a/src/bellman.jl +++ b/src/bellman.jl @@ -217,6 +217,30 @@ function bellman!( lf = labelling_function(model) dfa = automaton(model) + return _bellman_helper!( + workspace, + strategy_cache, + Vres, + V, + dfa, + mp, + lf, + upper_bound, + maximize, + ) +end + +function _bellman_helper!( + workspace::ProductWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V, + dfa::DFA, + mp::IntervalMarkovProcess, + lf::DeterministicLabelling, + upper_bound = false, + maximize = true, +) W = workspace.intermediate_values @inbounds for state in dfa @@ -244,6 +268,51 @@ function bellman!( return Vres end +function _bellman_helper!( + workspace::ProductWorkspace, + strategy_cache::AbstractStrategyCache, + Vres, + V::AbstractArray{R}, + dfa::DFA, + mp::IntervalMarkovProcess, + lf::ProbabilisticLabelling, + upper_bound = false, + maximize = true, +) where {R} + W = workspace.intermediate_values + + @inbounds for state in dfa + local_strategy_cache = localize_strategy_cache(strategy_cache, state) + + # Select the value function for the current DFA state + # according to the appropriate DFA transition function + map!(W, CartesianIndices(state_values(mp))) do idx + v = zero(R) + + for (label, prob) in enumerate(lf[idx]) + new_dfa_state = dfa[state, label] + v += prob * V[idx, new_dfa_state] + end + + return v + end + + # For each state in the product process, compute the Bellman operator + # for the corresponding Markov process + bellman!( + workspace.underlying_workspace, + local_strategy_cache, + selectdim(Vres, ndims(Vres), state), + W, + mp; + upper_bound = upper_bound, + maximize = maximize, + ) + end + + return Vres +end + function localize_strategy_cache(strategy_cache::NoStrategyCache, dfa_state) return strategy_cache end diff --git a/src/models/ProductProcess.jl b/src/models/ProductProcess.jl index 6a671dc4..3c3406a4 100644 --- a/src/models/ProductProcess.jl +++ b/src/models/ProductProcess.jl @@ -55,7 +55,7 @@ function checkproduct( ) # check labelling states (input) match MDP states - if size(labelling_func) != state_values(mdp) + if state_values(labelling_func) != state_values(mdp) throw( DimensionMismatch( "The mapped states $(size(labelling_func)) in the labelling function is not equal the fRMDP state variables $(state_values(mdp)).", diff --git a/src/probabilities/DeterministicLabelling.jl b/src/probabilities/DeterministicLabelling.jl new file mode 100644 index 00000000..4a1b1618 --- /dev/null +++ b/src/probabilities/DeterministicLabelling.jl @@ -0,0 +1,84 @@ +""" + struct DeterministicLabelling{ + T <: Integer, + AT <: AbstractArray{T} + } + +A type representing the labelling of IMDP states into DFA inputs. + +Formally, let ``L : S \\to 2^{AP}`` be a labelling function, where +- ``S`` is the set of IMDP states, and +- ``2^{AP}`` is the power set of atomic propositions + +Then the ```DeterministicLabelling``` type is defined as vector which stores the mapping. + +### Fields +- `map::AT`: mapping function where indices are (factored) IMDP states and stored values are DFA inputs. +- `num_outputs::Int32`: number of labels accounted for in mapping. + +""" +struct DeterministicLabelling{T <: Integer, AT <: AbstractArray{T}} <: AbstractLabelling + map::AT + num_outputs::Int32 + + function DeterministicLabelling(map::AT) where {T <: Integer, AT <: AbstractArray{T}} + num_outputs = checklabelling(map) + + return new{T, AT}(map, Int32(num_outputs)) + end +end + +function checklabelling(map::AbstractArray{<:Integer}) + labels = unique(map) + + if any(labels .< 1) + throw(ArgumentError("Labelled state index cannot be less than 1")) + end + + # Check that labels are consecutive integers + sort!(labels) + if any(diff(labels) .!= 1) + throw(ArgumentError("Labelled state indices must be consecutive integers")) + end + + return last(labels) +end + +""" + mapping(dl::DeterministicLabelling) + +Return the mapping array of the labelling function. +""" +mapping(dl::DeterministicLabelling) = dl.map + +""" + size(dl::DeterministicLabelling) + +Returns the shape of the input range of the labeling function ``L : S \\to 2^{AP}``, which can be multiple dimensions in case of factored IMDPs. +""" +Base.size(dl::DeterministicLabelling) = size(dl.map) + +""" + num_labels(dl::DeterministicLabelling) +Return the number of labels (DFA inputs) in the labelling function. +""" +num_labels(dl::DeterministicLabelling) = dl.num_outputs + +""" + state_values(dl::DeterministicLabelling) +Return a tuple with the number of states for each state variable of the labeling function ``L : S \\to 2^{AP}``, which can be multiple dimensions in case of factored IMDPs. +""" +state_values(dl::DeterministicLabelling) = size(dl.map) + +""" + num_states(dl::DeterministicLabelling) +Return the number of states of the labeling function ``L : S \\to 2^{AP}`` +""" +num_states(dl::DeterministicLabelling) = prod(state_values(dl)) + +""" + getindex(dl::DeterministicLabelling, s...) + +Return the label for state s. +""" +Base.getindex(dl::DeterministicLabelling, s...) = dl.map[s...] diff --git a/src/probabilities/Labelling.jl b/src/probabilities/Labelling.jl index deef2acc..ec2dc707 100644 --- a/src/probabilities/Labelling.jl +++ b/src/probabilities/Labelling.jl @@ -1,74 +1,6 @@ -abstract type AbstractLabelling end - -""" - struct LabellingFunction{ - T <: Integer, - AT <: AbstractArray{T} - } - -A type representing the labelling of IMDP states into DFA inputs. - -Formally, let ``L : S \\to 2^{AP}`` be a labelling function, where -- ``S`` is the set of IMDP states, and -- ``2^{AP}`` is the power set of atomic propositions - -Then the ```LabellingFunction``` type is defined as vector which stores the mapping. - -### Fields -- `map::AT`: mapping function where indices are (factored) IMDP states and stored values are DFA inputs. -- `num_inputs::Int32`: number of IMDP states accounted for in mapping. - -""" -struct LabellingFunction{T <: Integer, AT <: AbstractArray{T}} <: AbstractLabelling - map::AT - num_outputs::Int32 -end - -function LabellingFunction(map::AT) where {T <: Integer, AT <: AbstractArray{T}} - num_outputs = checklabelling(map) - - return LabellingFunction(map, Int32(num_outputs)) -end - -function checklabelling(map::AbstractArray{<:Integer}) - labels = unique(map) - - if any(labels .< 1) - throw(ArgumentError("Labelled state index cannot be less than 1")) - end - - # Check that labels are consecutive integers - sort!(labels) - if any(diff(labels) .!= 1) - throw(ArgumentError("Labelled state indices must be consecutive integers")) - end - - return last(labels) -end - -""" - mapping(labelling_func::LabellingFunction) - -Return the mapping array of the labelling function. -""" -mapping(labelling_func::LabellingFunction) = labelling_func.map - -""" - size(labelling_func::LabellingFunction) - -Returns the shape of the input range of the labeling function ``L : S \\to 2^{AP}``, which can be multiple dimensions in case of factored IMDPs. -""" -Base.size(labelling_func::LabellingFunction) = size(labelling_func.map) - -""" - num_labels(labelling_func::LabellingFunction) -Return the number of labels (DFA inputs) in the labelling function. """ -num_labels(labelling_func::LabellingFunction) = labelling_func.num_outputs + AbstractLabelling +An abstract type for labelling functions. """ - getindex(lf::LabellingFunction, s...) - -Return the label for state s. -""" -Base.getindex(lf::LabellingFunction, s...) = lf.map[s...] +abstract type AbstractLabelling end diff --git a/src/probabilities/ProbabilisticLabelling.jl b/src/probabilities/ProbabilisticLabelling.jl new file mode 100644 index 00000000..19d16024 --- /dev/null +++ b/src/probabilities/ProbabilisticLabelling.jl @@ -0,0 +1,84 @@ +""" + struct ProbabilisticLabelling{ + R <: Real, + MR <: AbstractMatrix{R} + } + +A type representing the Probabilistic labelling of IMDP states into DFA inputs. Each labelling is assigned a probability. + +Formally, let ``L : S \\times 2^{AP} \\to [0, 1]`` be a labelling function, where +- ``S`` is the set of IMDP states, and +- ``2^{AP}`` is the power set of atomic propositions + +Then the ```ProbabilisticLabelling``` type is defined as matrix which stores the mapping. + +### Fields +- `map::MT`: mapping function encoded as matrix with labels on the rows, IMDP states on the columns, and valid probability values for the destination. + +The choice to have labels on the rows is due to the column-major storage of matrices in Julia and the fact that we want the inner loop over DFA target states +in the Bellman operator `bellman!`. + +""" +struct ProbabilisticLabelling{R <: Real, MR <: AbstractMatrix{R}} <: AbstractLabelling + map::MR + + function ProbabilisticLabelling(map::MR) where {R <: Real, MR <: AbstractMatrix{R}} + checklabellingprobs(map) + + return new{R, MR}(map) + end +end + +function checklabellingprobs(map::AbstractMatrix{<:Real}) + + # check for each state, all the labels probabilities sum to 1 + if any(sum(map; dims = 1) .!= one(eltype(map))) + throw( + ArgumentError( + "For each IMDP state, probabilities over label states must sum to 1", + ), + ) + end +end + +""" + mapping(pl::ProbabilisticLabelling) + +Return the mapping matrix of the probabilistic labelling function. +""" +mapping(pl::ProbabilisticLabelling) = pl.map + +Base.size(pl::ProbabilisticLabelling) = size(pl.map) +Base.size(pl::ProbabilisticLabelling, i) = size(pl.map, i) + +""" + getindex(pl::ProbabilisticLabelling, s, l) + +Return the probabilities for labelling l from state s. +""" +Base.getindex(pl::ProbabilisticLabelling, l, s) = pl.map[l, s] + +""" + getindex(pl::ProbabilisticLabelling, s) + +Return the probabilities over labels from state s. +""" +Base.getindex(pl::ProbabilisticLabelling, s) = @view(pl.map[:, s]) + +""" + num_labels(pl::ProbabilisticLabelling) +Return the number of labels (DFA inputs) in the probabilistic labelling function. +""" +num_labels(pl::ProbabilisticLabelling) = size(pl.map, 1) + +""" + state_values(pl::ProbabilisticLabelling) +Return a tuple with the number of states for each state variable of the labeling function ``L : S \\to 2^{AP}``, which can be multiple dimensions in case of factored IMDPs. +""" +state_values(pl::ProbabilisticLabelling) = Base.tail(size(pl.map)) + +""" + num_states(pl::ProbabilisticLabelling) +Return the number of states of the labeling function ``L : S \\to 2^{AP}`` +""" +num_states(pl::ProbabilisticLabelling) = prod(state_values(pl)) diff --git a/src/probabilities/probabilities.jl b/src/probabilities/probabilities.jl index af732f65..5a9efc72 100644 --- a/src/probabilities/probabilities.jl +++ b/src/probabilities/probabilities.jl @@ -50,4 +50,9 @@ include("TransitionFunction.jl") export TransitionFunction, transition include("Labelling.jl") -export LabellingFunction, mapping, num_labels + +include("DeterministicLabelling.jl") +export DeterministicLabelling, mapping, num_labels, state_values, num_states + +include("ProbabilisticLabelling.jl") +export ProbabilisticLabelling, mapping, num_labels, state_values, num_states diff --git a/src/workspace.jl b/src/workspace.jl index 04684d40..1a35aa7e 100644 --- a/src/workspace.jl +++ b/src/workspace.jl @@ -229,9 +229,8 @@ function FactoredIntervalOMaxWorkspace(sys::FactoredRMDP) Tuple(maxsupportsize(ambiguity_sets(marginal)) for marginal in marginals(sys)) max_support = maximum(max_support_per_marginal) - expectation_cache = NTuple{N - 1, Vector{R}}( - zeros(R, n) for n in num_target.(marginals(sys)[2:end]) - ) + expectation_cache = + NTuple{N - 1, Vector{R}}(zeros(R, n) for n in num_target.(marginals(sys)[2:end])) values_gaps = Vector{Tuple{R, R}}(undef, max_support) scratch = Vector{Tuple{R, R}}(undef, max_support) diff --git a/test/base/labelling.jl b/test/base/labelling.jl index 757796ba..31b1fcf7 100644 --- a/test/base/labelling.jl +++ b/test/base/labelling.jl @@ -1,50 +1,106 @@ using Revise, Test using IntervalMDP -@testset "1d" begin - a = UInt16[1, 3, 2, 2] - lf = LabellingFunction(a) - - @test mapping(lf) == a - @test size(lf) == (4,) - @test num_labels(lf) == 3 - - @test lf[1] == 1 - @test lf[2] == 3 - @test lf[3] == 2 - @test lf[4] == 2 -end +@testset "DeterministicLabelling" begin + @testset "1d" begin + a = UInt16[1, 3, 2, 2] + lf = DeterministicLabelling(a) + + @test mapping(lf) == a + @test size(lf) == (4,) + @test num_labels(lf) == 3 + @test state_values(lf) == (4,) + + @test lf[1] == 1 + @test lf[2] == 3 + @test lf[3] == 2 + @test lf[4] == 2 + end + + @testset "2d" begin + a = UInt16[ + 1 3 2 2 + 1 3 2 5 + 1 3 4 2 + ] + lf = DeterministicLabelling(a) + + @test mapping(lf) == a + @test size(lf) == (3, 4) + @test num_labels(lf) == 5 + @test state_values(lf) == (3, 4) + + @test lf[1, 1] == 1 + @test lf[1, 2] == 3 + @test lf[1, 3] == 2 + @test lf[1, 4] == 2 + + @test lf[2, 1] == 1 + @test lf[2, 2] == 3 + @test lf[2, 3] == 2 + @test lf[2, 4] == 5 + + @test lf[3, 1] == 1 + @test lf[3, 2] == 3 + @test lf[3, 3] == 4 + @test lf[3, 4] == 2 + end -@testset "2d" begin - a = UInt16[ - 1 3 2 2 - 1 3 2 5 - 1 3 4 2 - ] - lf = LabellingFunction(a) - - @test mapping(lf) == a - @test size(lf) == (3, 4) - @test num_labels(lf) == 5 - - @test lf[1, 1] == 1 - @test lf[1, 2] == 3 - @test lf[1, 3] == 2 - @test lf[1, 4] == 2 - - @test lf[2, 1] == 1 - @test lf[2, 2] == 3 - @test lf[2, 3] == 2 - @test lf[2, 4] == 5 - - @test lf[3, 1] == 1 - @test lf[3, 2] == 3 - @test lf[3, 3] == 4 - @test lf[3, 4] == 2 + @testset "invalid labelling" begin + @test_throws ArgumentError DeterministicLabelling(UInt16[0, 1, 2]) + @test_throws ArgumentError DeterministicLabelling(UInt16[1, 2, 4]) + @test_throws ArgumentError DeterministicLabelling(UInt16[1, 2, 3, 5]) + end end -@testset "invalid labelling" begin - @test_throws ArgumentError LabellingFunction(UInt16[0, 1, 2]) - @test_throws ArgumentError LabellingFunction(UInt16[1, 2, 4]) - @test_throws ArgumentError LabellingFunction(UInt16[1, 2, 3, 5]) +@testset "ProbabilisticLabelling" begin + @testset "good case 1d" begin + m = Float32[ + 0.5 0.2 1 0.3 + 0 0.7 0 0.4 + 0.5 0.1 0 0.3 + ] + + dl = ProbabilisticLabelling(m) + + @test mapping(dl) == m + @test size(dl) == (3, 4) + @test size(dl, 1) == 3 + @test size(dl, 2) == 4 + @test num_labels(dl) == 3 + @test num_states(dl) == 4 + @test state_values(dl) == (4,) + + @test dl[1, 1] ≈ 0.5 + @test dl[1, 2] ≈ 0.2 + @test dl[1, 3] ≈ 1 + @test dl[1, 4] ≈ 0.3 + + @test dl[2, 1] ≈ 0.0 + @test dl[2, 2] ≈ 0.7 + @test dl[2, 3] ≈ 0.0 + @test dl[2, 4] ≈ 0.4 + + @test dl[3, 1] ≈ 0.5 + @test dl[3, 2] ≈ 0.1 + @test dl[3, 3] ≈ 0.0 + @test dl[3, 4] ≈ 0.3 + end + + @testset "invalid probabilities" begin + m1 = Float32[ + 0.5 1 + 0.5 0 + 0.1 0 + ] + + m2 = Float32[ + 0.4 1 + 0.5 0 + 0 0 + ] + + @test_throws ArgumentError ProbabilisticLabelling(m1) + @test_throws ArgumentError ProbabilisticLabelling(m2) + end end diff --git a/test/base/product.jl b/test/base/product.jl index 7dc9b863..ed49f562 100644 --- a/test/base/product.jl +++ b/test/base/product.jl @@ -52,7 +52,7 @@ using IntervalMDP # labelling map = UInt16[1, 2, 3] - lf = LabellingFunction(map) + lf = DeterministicLabelling(map) prodIMDP = ProductProcess(mdp, dfa, lf) @@ -66,14 +66,17 @@ using IntervalMDP @test occursin("ProductProcess", str) @test occursin("Underlying process", str) @test occursin("Automaton", str) - @test occursin("Labelling type: LabellingFunction{UInt16, Vector{UInt16}}", str) + @test occursin( + "Labelling type: DeterministicLabelling{UInt16, Vector{UInt16}}", + str, + ) end @testset "IMDP state labelling func input mismatch" begin # labelling map = UInt16[1, 2] - lf = LabellingFunction(map) + lf = DeterministicLabelling(map) @test_throws DimensionMismatch ProductProcess(mdp, dfa, lf) end @@ -92,13 +95,13 @@ using IntervalMDP # labelling map = UInt16[1, 2, 3] - lf = LabellingFunction(map) + lf = DeterministicLabelling(map) @test_throws DimensionMismatch ProductProcess(mdp, dfa, lf) end end -@testset "bellman" begin +@testset "bellman deterministic labelling" begin for N in [Float32, Float64, Rational{BigInt}] @testset "N = $N" begin prob = IntervalAmbiguitySets(; @@ -124,7 +127,7 @@ end atomic_props = ["reach"] dfa = DFA(delta, istate, atomic_props) - labelling = LabellingFunction(Int32[1, 1, 2]) + labelling = DeterministicLabelling(Int32[1, 1, 2]) prod_proc = ProductProcess(mc, dfa, labelling) @@ -145,7 +148,517 @@ end end end -@testset "value iteration" begin +@testset "bellman deterministic labelling wtih strategy" begin + for N in [Float32, Float64, Rational{BigInt}] + @testset "N = $N" begin + prob1 = IntervalAmbiguitySets(; + lower = N[ + 0//10 4//10 + 1//10 3//10 + 2//10 2//10 + ], + upper = N[ + 5//10 9//10 + 6//10 8//10 + 7//10 7//10 + ], + ) + + prob2 = IntervalAmbiguitySets(; + lower = N[ + 5//10 2//10 + 3//10 3//10 + 1//10 4//10 + ], + upper = N[ + 7//10 3//10 + 5//10 6//10 + 3//10 9//10 + ], + ) + + prob3 = IntervalAmbiguitySets(; lower = N[ + 0 0 + 0 0 + 1 1 + ], upper = N[ + 0 0 + 0 0 + 1 1 + ]) + + transition_probs = [prob1, prob2, prob3] + mdp = IntervalMarkovDecisionProcess(transition_probs) + + # Product model - just simple reachability + delta = TransitionFunction(Int32[ + 1 2 + 2 2 + ]) + istate = Int32(1) + atomic_props = ["reach"] + dfa = DFA(delta, istate, atomic_props) + + labelling = DeterministicLabelling(Int32[1, 1, 2]) + + prod_proc = ProductProcess(mdp, dfa, labelling) + + V = N[ + 4 1 + 2 3 + 0 5 + ] + + workspace = IntervalMDP.construct_workspace(prod_proc) + + eps = one(N) / N(1000) + + Vtar = N[ + 34//10 24//10 + 36//10 32//10 + 5 5 + ] + + # No Strategy + Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + @test Vtar ≈ Vres atol=eps + + # Non Stationary Strategy (Init iteration) + strategy_cache = + IntervalMDP.TimeVaryingStrategyCache(fill(ntuple(_ -> Int32(0), 1), 3, 2)) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Non Stationary Strategy (optimal policy in cache) + strategy_cache = IntervalMDP.TimeVaryingStrategyCache( + [ + (Int32(2),) (Int32(1),); + (Int32(2),) (Int32(2),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Non Stationary Strategy (non optimal policy in cache) + strategy_cache = IntervalMDP.TimeVaryingStrategyCache( + [ + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Stationary Strategy + strategy_cache = + IntervalMDP.StationaryStrategyCache(fill(ntuple(_ -> Int32(0), 1), 3, 2)) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Stationary Strategy (non optimal policy in cache) + strategy_cache = IntervalMDP.StationaryStrategyCache( + [ + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Stationary Strategy (optimal policy in cache) + strategy_cache = IntervalMDP.StationaryStrategyCache( + [ + (Int32(2),) (Int32(1),); + (Int32(2),) (Int32(2),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Given Strategy + strategy_cache = IntervalMDP.ActiveGivenStrategyCache( + [ + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vres ≈ N[ + 30//10 24//10 + 33//10 2 + 5 5 + ] atol=eps + end + end +end + +@testset "bellman probabilistic labelling" begin + for N in [Float32, Float64, Rational{BigInt}] + @testset "N = $N" begin + prob = IntervalAmbiguitySets(; + lower = N[ + 0 5//10 0 + 1//10 3//10 0 + 2//10 1//10 1 + ], + upper = N[ + 5//10 7//10 0 + 6//10 5//10 0 + 7//10 3//10 1 + ], + ) + mc = IntervalMarkovChain(prob) + + # Product model - just simple reachability + delta = TransitionFunction(Int32[ + 1 2 + 2 2 + ]) + istate = Int32(1) + atomic_props = ["reach"] + dfa = DFA(delta, istate, atomic_props) + + m = N[ + 9//10 7//10 1//10 + 1//10 3//10 9//10 + ] + + labelling = ProbabilisticLabelling(m) + + prod_proc = ProductProcess(mc, dfa, labelling) + + V = N[ + 4 1 + 2 3 + 0 5 + ] + + Vtar = N[ + 302//100 24//10 + 322//100 2 + 45//10 5 + ] + + Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + @test Vres ≈ Vtar + end + end +end + +@testset "bellman probabilistic labelling wtih strategy" begin + for N in [Float32, Float64, Rational{BigInt}] + @testset "N = $N" begin + prob1 = IntervalAmbiguitySets(; + lower = N[ + 0//10 4//10 + 1//10 3//10 + 2//10 2//10 + ], + upper = N[ + 5//10 9//10 + 6//10 8//10 + 7//10 7//10 + ], + ) + + prob2 = IntervalAmbiguitySets(; + lower = N[ + 5//10 2//10 + 3//10 3//10 + 1//10 4//10 + ], + upper = N[ + 7//10 3//10 + 5//10 6//10 + 3//10 9//10 + ], + ) + + prob3 = IntervalAmbiguitySets(; lower = N[ + 0 0 + 0 0 + 1 1 + ], upper = N[ + 0 0 + 0 0 + 1 1 + ]) + + transition_probs = [prob1, prob2, prob3] + mdp = IntervalMarkovDecisionProcess(transition_probs) + + # Product model - just simple reachability + delta = TransitionFunction(Int32[ + 1 2 + 2 2 + ]) + istate = Int32(1) + atomic_props = ["reach"] + dfa = DFA(delta, istate, atomic_props) + + m = N[ + 9//10 7//10 1//10 + 1//10 3//10 9//10 + ] + + labelling = ProbabilisticLabelling(m) + + prod_proc = ProductProcess(mdp, dfa, labelling) + + V = N[ + 4 1 + 2 3 + 0 5 + ] + + workspace = IntervalMDP.construct_workspace(prod_proc) + + eps = one(N) / N(1000) + + Vtar = N[ + 33//10 24//10 + 346//100 32//10 + 45//10 5 + ] + + # No Strategy + Vres = IntervalMDP.bellman(V, prod_proc; upper_bound = false) + @test Vtar ≈ Vres atol=eps + + # Non Stationary Strategy + strategy_cache = + IntervalMDP.TimeVaryingStrategyCache(fill(ntuple(_ -> Int32(0), 1), 3, 2)) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Non Stationary Strategy (non optimal policy in cache) + strategy_cache = IntervalMDP.TimeVaryingStrategyCache( + [ + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Non Stationary Strategy (optimal policy in cache) + strategy_cache = IntervalMDP.TimeVaryingStrategyCache( + [ + (Int32(2),) (Int32(1),); + (Int32(2),) (Int32(2),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Stationary Strategy + strategy_cache = + IntervalMDP.StationaryStrategyCache(fill(ntuple(_ -> Int32(0), 1), 3, 2)) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Stationary Strategy (non optimal policy in cache) + strategy_cache = IntervalMDP.StationaryStrategyCache( + [ + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Stationary Strategy (optimal policy in cache) + strategy_cache = IntervalMDP.StationaryStrategyCache( + [ + (Int32(2),) (Int32(1),); + (Int32(2),) (Int32(2),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vtar ≈ Vres atol=eps + + # Given Strategy + strategy_cache = IntervalMDP.ActiveGivenStrategyCache( + [ + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + (Int32(1),) (Int32(1),); + ], + ) + + Vres = copy(V) + + Vres = IntervalMDP.bellman!( + workspace, + strategy_cache, + Vres, + V, + prod_proc, + upper_bound = false, + ) + + @test Vres ≈ N[ + 302//100 24//10 + 322//100 2 + 45//10 5 + ] atol=eps + end + end +end + +@testset "value iteration deterministic labelling" begin for N in [Float32, Float64, Rational{BigInt}] @testset "N = $N" begin prob1 = IntervalAmbiguitySets(; @@ -186,7 +699,7 @@ end atomic_props = ["reach"] dfa = DFA(delta, istate, atomic_props) - labelling = LabellingFunction(Int32[1, 1, 2]) + labelling = DeterministicLabelling(Int32[1, 1, 2]) prod_proc = ProductProcess(mdp, dfa, labelling) @@ -282,3 +795,148 @@ end end end end + +@testset "value iteration probabilistic labelling" begin + for N in [Float32, Float64, Rational{BigInt}] + @testset "N = $N" begin + prob1 = IntervalAmbiguitySets(; + lower = N[ + 0//10 5//10 + 1//10 3//10 + 2//10 1//10 + ], + upper = N[ + 5//10 7//10 + 6//10 5//10 + 7//10 3//10 + ], + ) + + prob2 = IntervalAmbiguitySets(; + lower = N[ + 1//10 2//10 + 2//10 3//10 + 3//10 4//10 + ], + upper = N[ + 6//10 6//10 + 5//10 5//10 + 4//10 4//10 + ], + ) + + transition_probs = [prob1, prob2] + mdp = IntervalMarkovDecisionProcess(transition_probs) + + # Product model - just simple reachability + delta = TransitionFunction(Int32[ + 1 2 + 2 2 + ]) + istate = Int32(1) + atomic_props = ["reach"] + dfa = DFA(delta, istate, atomic_props) + + m = N[ + 9//10 7//10 1//10 + 1//10 3//10 9//10 + ] + + labelling = ProbabilisticLabelling(m) + + prod_proc = ProductProcess(mdp, dfa, labelling) + + eps = one(N) / N(1000) + + @testset "finite time reachability" begin + prop = FiniteTimeDFAReachability([2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(prod_proc, spec) + + policy, V_fixed_it1, k, res = solve(problem) + + @test all(V_fixed_it1 .>= 0) + @test k == 10 + @test V_fixed_it1[:, 2] == N[1, 1, 1] + + problem = VerificationProblem(prod_proc, spec, policy) + V_mc, k, res = solve(problem) + + @test V_fixed_it1 ≈ V_mc + + prop = FiniteTimeDFAReachability([2], 11) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(prod_proc, spec) + + V_fixed_it2, k, res = solve(problem) + + @test all(V_fixed_it2 .>= 0) + @test k == 11 + @test V_fixed_it2[:, 2] == N[1, 1, 1] + @test all(V_fixed_it2 .>= V_fixed_it1) + end + + @testset "infinite time reachability" begin + prop = InfiniteTimeDFAReachability([2], eps) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(prod_proc, spec) + + policy, V_conv, k, res = solve(problem) + + @test all(V_conv .>= 0) + @test maximum(res) <= eps + @test V_conv[:, 2] == N[1, 1, 1] + + problem = VerificationProblem(prod_proc, spec, policy) + V_mc, k, res = solve(problem) + + @test V_conv ≈ V_mc atol=eps + end + + @testset "finite time safety" begin + prop = FiniteTimeDFASafety([2], 10) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(prod_proc, spec) + + policy, V_fixed_it1, k, res = solve(problem) + + @test all(V_fixed_it1 .>= 0) + @test k == 10 + @test V_fixed_it1[:, 2] == N[0, 0, 0] + + problem = VerificationProblem(prod_proc, spec, policy) + V_mc, k, res = solve(problem) + + @test V_fixed_it1 ≈ V_mc + + prop = FiniteTimeDFASafety([2], 11) + spec = Specification(prop, Pessimistic, Maximize) + problem = VerificationProblem(prod_proc, spec) + + V_fixed_it2, k, res = solve(problem) + + @test all(V_fixed_it2 .>= 0) + @test k == 11 + @test V_fixed_it2[:, 2] == N[0, 0, 0] + @test all(V_fixed_it2 .<= V_fixed_it1) + end + + @testset "infinite time safety" begin + prop = InfiniteTimeDFASafety([2], eps) + spec = Specification(prop, Pessimistic, Maximize) + problem = ControlSynthesisProblem(prod_proc, spec) + + policy, V_conv, k, res = solve(problem) + + @test all(V_conv .>= 0) + @test maximum(res) <= eps + @test V_conv[:, 2] == N[0, 0, 0] + + problem = VerificationProblem(prod_proc, spec, policy) + V_mc, k, res = solve(problem) + + @test V_conv ≈ V_mc atol=eps + end + end + end +end diff --git a/test/base/specification.jl b/test/base/specification.jl index 6b82e0f3..fe02b2b7 100644 --- a/test/base/specification.jl +++ b/test/base/specification.jl @@ -295,7 +295,7 @@ end atomic_props = ["reach"] dfa = DFA(delta, istate, atomic_props) - labelling = LabellingFunction(Int32[1, 1, 2]) + labelling = DeterministicLabelling(Int32[1, 1, 2]) prod_proc = ProductProcess(mc, dfa, labelling) tv_prod_strat = TimeVaryingStrategy([Tuple{Int32}[