11"""
2- bellman(V, prob ; upper_bound = false)
2+ bellman(V, model ; upper_bound = false)
33
4- Compute robust Bellman update with the value function `V` and the interval probabilities `prob`
4+ Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref),
55that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
66Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument.
77That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower
88bound is computed.
99
1010### Examples
1111```jldoctest
12- prob = IntervalProbabilities(;
13- lower = sparse_hcat(
14- SparseVector(15, [4, 10], [0.1, 0.2]),
15- SparseVector(15, [5, 6, 7], [0.5, 0.3, 0.1]),
16- ),
17- upper = sparse_hcat(
18- SparseVector(15, [1, 4, 10], [0.5, 0.6, 0.7]),
19- SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]),
20- ),
12+ prob1 = IntervalProbabilities(;
13+ lower = [
14+ 0.0 0.5
15+ 0.1 0.3
16+ 0.2 0.1
17+ ],
18+ upper = [
19+ 0.5 0.7
20+ 0.6 0.5
21+ 0.7 0.3
22+ ],
2123)
2224
23- Vprev = collect(1:15)
24- Vcur = bellman(Vprev, prob; upper_bound = false)
25+ prob2 = IntervalProbabilities(;
26+ lower = [
27+ 0.1 0.2
28+ 0.2 0.3
29+ 0.3 0.4
30+ ],
31+ upper = [
32+ 0.6 0.6
33+ 0.5 0.5
34+ 0.4 0.4
35+ ],
36+ )
37+
38+ prob3 = IntervalProbabilities(; lower = [
39+ 0.0
40+ 0.0
41+ 1.0
42+ ][:, :], upper = [
43+ 0.0
44+ 0.0
45+ 1.0
46+ ][:, :])
47+
48+ transition_probs = [prob1, prob2, prob3]
49+ istates = [Int32(1)]
50+
51+ model = IntervalMarkovDecisionProcess(transition_probs, istates)
52+
53+ Vprev = [1, 2, 3]
54+ Vcur = bellman(Vprev, model; upper_bound = false)
2555```
2656
2757!!! note
@@ -31,16 +61,16 @@ Vcur = bellman(Vprev, prob; upper_bound = false)
3161[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883.
3262
3363"""
34- function bellman (V, prob ; upper_bound = false )
35- Vres = similar (V, source_shape (prob ))
36- return bellman! (Vres, V, prob ; upper_bound = upper_bound)
64+ function bellman (V, model ; upper_bound = false , maximize = true )
65+ Vres = similar (V, source_shape (model ))
66+ return bellman! (Vres, V, model ; upper_bound = upper_bound, maximize = maximize )
3767end
3868
3969"""
40- bellman!(workspace, strategy_cache, Vres, V, prob , stateptr; upper_bound = false, maximize = true)
70+ bellman!(workspace, strategy_cache, Vres, V, model , stateptr; upper_bound = false, maximize = true)
4171
42- Compute in-place robust Bellman update with the value function `V` and the interval probabilities
43- `prob` that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
72+ Compute in-place robust Bellman update with the value function `V` and the model `model`,
73+ e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
4474Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument.
4575That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower
4676bound is computed.
@@ -52,45 +82,92 @@ and [`construct_strategy_cache`](@ref) for more details on how to pre-allocate t
5282### Examples
5383
5484```jldoctest
55- prob = IntervalProbabilities(;
56- lower = sparse_hcat(
57- SparseVector(15, [4, 10], [0.1, 0.2]),
58- SparseVector(15, [5, 6, 7], [0.5, 0.3, 0.1]),
59- ),
60- upper = sparse_hcat(
61- SparseVector(15, [1, 4, 10], [0.5, 0.6, 0.7]),
62- SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]),
63- ),
85+ prob1 = IntervalProbabilities(;
86+ lower = [
87+ 0.0 0.5
88+ 0.1 0.3
89+ 0.2 0.1
90+ ],
91+ upper = [
92+ 0.5 0.7
93+ 0.6 0.5
94+ 0.7 0.3
95+ ],
96+ )
97+
98+ prob2 = IntervalProbabilities(;
99+ lower = [
100+ 0.1 0.2
101+ 0.2 0.3
102+ 0.3 0.4
103+ ],
104+ upper = [
105+ 0.6 0.6
106+ 0.5 0.5
107+ 0.4 0.4
108+ ],
64109)
65110
66- V = collect(1:15)
67- workspace = construct_workspace(prob)
111+ prob3 = IntervalProbabilities(; lower = [
112+ 0.0
113+ 0.0
114+ 1.0
115+ ][:, :], upper = [
116+ 0.0
117+ 0.0
118+ 1.0
119+ ][:, :])
120+
121+ transition_probs = [prob1, prob2, prob3]
122+ istates = [Int32(1)]
123+
124+ model = IntervalMarkovDecisionProcess(transition_probs, istates)
125+
126+ V = [1, 2, 3]
127+ workspace = construct_workspace(model)
68128strategy_cache = construct_strategy_cache(NoStrategyConfig())
69129Vres = similar(V)
70130
71- Vres = bellman!(workspace, strategy_cache, Vres, V, prob ; upper_bound = false, maximize = true)
131+ Vres = bellman!(workspace, strategy_cache, Vres, V, model ; upper_bound = false, maximize = true)
72132```
73133
74134[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883.
75135
76136"""
77137function bellman! end
78138
79- function bellman! (Vres, V, prob ; upper_bound = false )
80- workspace = construct_workspace (prob )
139+ function bellman! (Vres, V, model ; upper_bound = false , maximize = true )
140+ workspace = construct_workspace (model )
81141 strategy_cache = NoStrategyCache ()
82- return bellman! (workspace, strategy_cache, Vres, V, prob; upper_bound = upper_bound)
142+ return bellman! (
143+ workspace,
144+ strategy_cache,
145+ Vres,
146+ V,
147+ model;
148+ upper_bound = upper_bound,
149+ maximize = maximize,
150+ )
83151end
84152
85- function bellman! (workspace, strategy_cache, Vres, V, prob; upper_bound = false )
86- return bellman! (
153+ function bellman! (
154+ workspace,
155+ strategy_cache,
156+ Vres,
157+ V,
158+ model;
159+ upper_bound = false ,
160+ maximize = true ,
161+ )
162+ return _bellman_helper! (
87163 workspace,
88164 strategy_cache,
89165 Vres,
90166 V,
91- prob ,
92- stateptr (prob );
167+ transition_prob (model) ,
168+ stateptr (model );
93169 upper_bound = upper_bound,
170+ maximize = maximize,
94171 )
95172end
96173
99176# #####################################################
100177
101178# Non-threaded
102- function bellman ! (
179+ function _bellman_helper ! (
103180 workspace:: Union{DenseWorkspace, SparseWorkspace} ,
104181 strategy_cache:: AbstractStrategyCache ,
105182 Vres,
@@ -129,7 +206,7 @@ function bellman!(
129206end
130207
131208# Threaded
132- function bellman ! (
209+ function _bellman_helper ! (
133210 workspace:: Union{ThreadedDenseWorkspace, ThreadedSparseWorkspace} ,
134211 strategy_cache:: AbstractStrategyCache ,
135212 Vres,
@@ -231,20 +308,20 @@ Base.@propagate_inbounds function dense_sorted_state_action_bellman(V, prob, j
231308end
232309
233310Base. @propagate_inbounds function gap_value (
234- V,
311+ V:: AbstractVector{T} ,
235312 gap:: VR ,
236313 sum_lower,
237314 perm,
238- ) where {VR <: AbstractVector }
239- remaining = 1.0 - sum_lower
240- res = 0.0
315+ ) where {T, VR <: AbstractVector }
316+ remaining = one (T) - sum_lower
317+ res = zero (T)
241318
242319 @inbounds for i in perm
243320 p = min (remaining, gap[i])
244321 res += p * V[i]
245322
246323 remaining -= p
247- if remaining <= 0.0
324+ if remaining <= zero (T)
248325 break
249326 end
250327 end
@@ -275,16 +352,19 @@ Base.@propagate_inbounds function state_action_bellman(
275352 return dot (V, lowerⱼ) + gap_value (Vp_workspace, used)
276353end
277354
278- Base. @propagate_inbounds function gap_value (Vp, sum_lower)
279- remaining = 1.0 - sum_lower
280- res = 0.0
355+ Base. @propagate_inbounds function gap_value (
356+ Vp:: VP ,
357+ sum_lower,
358+ ) where {T, VP <: AbstractVector{<:Tuple{T, <:Real}} }
359+ remaining = one (T) - sum_lower
360+ res = zero (T)
281361
282362 for (V, p) in Vp
283363 p = min (remaining, p)
284364 res += p * V
285365
286366 remaining -= p
287- if remaining <= 0.0
367+ if remaining <= zero (T)
288368 break
289369 end
290370 end
295375# ###############################################################
296376# Bellman operator for OrthogonalIntervalMarkovDecisionProcess #
297377# ###############################################################
298- function bellman ! (
378+ function _bellman_helper ! (
299379 workspace:: Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace} ,
300380 strategy_cache:: AbstractStrategyCache ,
301381 Vres,
@@ -329,7 +409,7 @@ function bellman!(
329409 return Vres
330410end
331411
332- function bellman ! (
412+ function _bellman_helper ! (
333413 workspace:: Union {
334414 ThreadedDenseOrthogonalWorkspace,
335415 ThreadedSparseOrthogonalWorkspace,
0 commit comments