Skip to content

Commit 2748b45

Browse files
authored
Merge pull request #85 from Zinoex/fm/bellman_interface
New Bellman interface
2 parents de57cc4 + 85524dd commit 2748b45

File tree

12 files changed

+1689
-1289
lines changed

12 files changed

+1689
-1289
lines changed

ext/cuda/bellman/dense.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function IntervalMDP.bellman!(
1+
function IntervalMDP._bellman_helper!(
22
workspace::CuDenseWorkspace,
33
strategy_cache::IntervalMDP.AbstractStrategyCache,
44
Vres,

ext/cuda/bellman/sparse.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
function IntervalMDP.bellman!(
1+
function IntervalMDP._bellman_helper!(
22
workspace::CuSparseWorkspace,
33
strategy_cache::IntervalMDP.AbstractStrategyCache,
44
Vres,

src/IntervalMDP.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ export construct_strategy_cache, time_length
2121
include("specification.jl")
2222
export Property, LTLFormula, LTLfFormula, PCTLFormula
2323

24-
export AbstractReachability, FiniteTimeReachability, InfiniteTimeReachability, ExactTimeReachability
24+
export AbstractReachability,
25+
FiniteTimeReachability, InfiniteTimeReachability, ExactTimeReachability
2526
export AbstractReachAvoid, FiniteTimeReachAvoid, InfiniteTimeReachAvoid, ExactTimeReachAvoid
2627
export AbstractSafety, FiniteTimeSafety, InfiniteTimeSafety
2728
export AbstractReward, FiniteTimeReward, InfiniteTimeReward

src/bellman.jl

Lines changed: 131 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,57 @@
11
"""
2-
bellman(V, prob; upper_bound = false)
2+
bellman(V, model; upper_bound = false)
33
4-
Compute robust Bellman update with the value function `V` and the interval probabilities `prob`
4+
Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref),
55
that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
66
Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument.
77
That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower
88
bound is computed.
99
1010
### Examples
1111
```jldoctest
12-
prob = IntervalProbabilities(;
13-
lower = sparse_hcat(
14-
SparseVector(15, [4, 10], [0.1, 0.2]),
15-
SparseVector(15, [5, 6, 7], [0.5, 0.3, 0.1]),
16-
),
17-
upper = sparse_hcat(
18-
SparseVector(15, [1, 4, 10], [0.5, 0.6, 0.7]),
19-
SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]),
20-
),
12+
prob1 = IntervalProbabilities(;
13+
lower = [
14+
0.0 0.5
15+
0.1 0.3
16+
0.2 0.1
17+
],
18+
upper = [
19+
0.5 0.7
20+
0.6 0.5
21+
0.7 0.3
22+
],
2123
)
2224
23-
Vprev = collect(1:15)
24-
Vcur = bellman(Vprev, prob; upper_bound = false)
25+
prob2 = IntervalProbabilities(;
26+
lower = [
27+
0.1 0.2
28+
0.2 0.3
29+
0.3 0.4
30+
],
31+
upper = [
32+
0.6 0.6
33+
0.5 0.5
34+
0.4 0.4
35+
],
36+
)
37+
38+
prob3 = IntervalProbabilities(; lower = [
39+
0.0
40+
0.0
41+
1.0
42+
][:, :], upper = [
43+
0.0
44+
0.0
45+
1.0
46+
][:, :])
47+
48+
transition_probs = [prob1, prob2, prob3]
49+
istates = [Int32(1)]
50+
51+
model = IntervalMarkovDecisionProcess(transition_probs, istates)
52+
53+
Vprev = [1, 2, 3]
54+
Vcur = bellman(Vprev, model; upper_bound = false)
2555
```
2656
2757
!!! note
@@ -31,16 +61,16 @@ Vcur = bellman(Vprev, prob; upper_bound = false)
3161
[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883.
3262
3363
"""
34-
function bellman(V, prob; upper_bound = false)
35-
Vres = similar(V, source_shape(prob))
36-
return bellman!(Vres, V, prob; upper_bound = upper_bound)
64+
function bellman(V, model; upper_bound = false, maximize = true)
65+
Vres = similar(V, source_shape(model))
66+
return bellman!(Vres, V, model; upper_bound = upper_bound, maximize = maximize)
3767
end
3868

3969
"""
40-
bellman!(workspace, strategy_cache, Vres, V, prob, stateptr; upper_bound = false, maximize = true)
70+
bellman!(workspace, strategy_cache, Vres, V, model, stateptr; upper_bound = false, maximize = true)
4171
42-
Compute in-place robust Bellman update with the value function `V` and the interval probabilities
43-
`prob` that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
72+
Compute in-place robust Bellman update with the value function `V` and the model `model`,
73+
e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
4474
Whether the expectation is maximized or minimized is determined by the `upper_bound` keyword argument.
4575
That is, if `upper_bound == true` then an upper bound is computed and if `upper_bound == false` then a lower
4676
bound is computed.
@@ -52,45 +82,92 @@ and [`construct_strategy_cache`](@ref) for more details on how to pre-allocate t
5282
### Examples
5383
5484
```jldoctest
55-
prob = IntervalProbabilities(;
56-
lower = sparse_hcat(
57-
SparseVector(15, [4, 10], [0.1, 0.2]),
58-
SparseVector(15, [5, 6, 7], [0.5, 0.3, 0.1]),
59-
),
60-
upper = sparse_hcat(
61-
SparseVector(15, [1, 4, 10], [0.5, 0.6, 0.7]),
62-
SparseVector(15, [5, 6, 7], [0.7, 0.5, 0.3]),
63-
),
85+
prob1 = IntervalProbabilities(;
86+
lower = [
87+
0.0 0.5
88+
0.1 0.3
89+
0.2 0.1
90+
],
91+
upper = [
92+
0.5 0.7
93+
0.6 0.5
94+
0.7 0.3
95+
],
96+
)
97+
98+
prob2 = IntervalProbabilities(;
99+
lower = [
100+
0.1 0.2
101+
0.2 0.3
102+
0.3 0.4
103+
],
104+
upper = [
105+
0.6 0.6
106+
0.5 0.5
107+
0.4 0.4
108+
],
64109
)
65110
66-
V = collect(1:15)
67-
workspace = construct_workspace(prob)
111+
prob3 = IntervalProbabilities(; lower = [
112+
0.0
113+
0.0
114+
1.0
115+
][:, :], upper = [
116+
0.0
117+
0.0
118+
1.0
119+
][:, :])
120+
121+
transition_probs = [prob1, prob2, prob3]
122+
istates = [Int32(1)]
123+
124+
model = IntervalMarkovDecisionProcess(transition_probs, istates)
125+
126+
V = [1, 2, 3]
127+
workspace = construct_workspace(model)
68128
strategy_cache = construct_strategy_cache(NoStrategyConfig())
69129
Vres = similar(V)
70130
71-
Vres = bellman!(workspace, strategy_cache, Vres, V, prob; upper_bound = false, maximize = true)
131+
Vres = bellman!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true)
72132
```
73133
74134
[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883.
75135
76136
"""
77137
function bellman! end
78138

79-
function bellman!(Vres, V, prob; upper_bound = false)
80-
workspace = construct_workspace(prob)
139+
function bellman!(Vres, V, model; upper_bound = false, maximize = true)
140+
workspace = construct_workspace(model)
81141
strategy_cache = NoStrategyCache()
82-
return bellman!(workspace, strategy_cache, Vres, V, prob; upper_bound = upper_bound)
142+
return bellman!(
143+
workspace,
144+
strategy_cache,
145+
Vres,
146+
V,
147+
model;
148+
upper_bound = upper_bound,
149+
maximize = maximize,
150+
)
83151
end
84152

85-
function bellman!(workspace, strategy_cache, Vres, V, prob; upper_bound = false)
86-
return bellman!(
153+
function bellman!(
154+
workspace,
155+
strategy_cache,
156+
Vres,
157+
V,
158+
model;
159+
upper_bound = false,
160+
maximize = true,
161+
)
162+
return _bellman_helper!(
87163
workspace,
88164
strategy_cache,
89165
Vres,
90166
V,
91-
prob,
92-
stateptr(prob);
167+
transition_prob(model),
168+
stateptr(model);
93169
upper_bound = upper_bound,
170+
maximize = maximize,
94171
)
95172
end
96173

@@ -99,7 +176,7 @@ end
99176
######################################################
100177

101178
# Non-threaded
102-
function bellman!(
179+
function _bellman_helper!(
103180
workspace::Union{DenseWorkspace, SparseWorkspace},
104181
strategy_cache::AbstractStrategyCache,
105182
Vres,
@@ -129,7 +206,7 @@ function bellman!(
129206
end
130207

131208
# Threaded
132-
function bellman!(
209+
function _bellman_helper!(
133210
workspace::Union{ThreadedDenseWorkspace, ThreadedSparseWorkspace},
134211
strategy_cache::AbstractStrategyCache,
135212
Vres,
@@ -231,20 +308,20 @@ Base.@propagate_inbounds function dense_sorted_state_action_bellman(V, prob, j
231308
end
232309

233310
Base.@propagate_inbounds function gap_value(
234-
V,
311+
V::AbstractVector{T},
235312
gap::VR,
236313
sum_lower,
237314
perm,
238-
) where {VR <: AbstractVector}
239-
remaining = 1.0 - sum_lower
240-
res = 0.0
315+
) where {T, VR <: AbstractVector}
316+
remaining = one(T) - sum_lower
317+
res = zero(T)
241318

242319
@inbounds for i in perm
243320
p = min(remaining, gap[i])
244321
res += p * V[i]
245322

246323
remaining -= p
247-
if remaining <= 0.0
324+
if remaining <= zero(T)
248325
break
249326
end
250327
end
@@ -275,16 +352,19 @@ Base.@propagate_inbounds function state_action_bellman(
275352
return dot(V, lowerⱼ) + gap_value(Vp_workspace, used)
276353
end
277354

278-
Base.@propagate_inbounds function gap_value(Vp, sum_lower)
279-
remaining = 1.0 - sum_lower
280-
res = 0.0
355+
Base.@propagate_inbounds function gap_value(
356+
Vp::VP,
357+
sum_lower,
358+
) where {T, VP <: AbstractVector{<:Tuple{T, <:Real}}}
359+
remaining = one(T) - sum_lower
360+
res = zero(T)
281361

282362
for (V, p) in Vp
283363
p = min(remaining, p)
284364
res += p * V
285365

286366
remaining -= p
287-
if remaining <= 0.0
367+
if remaining <= zero(T)
288368
break
289369
end
290370
end
@@ -295,7 +375,7 @@ end
295375
################################################################
296376
# Bellman operator for OrthogonalIntervalMarkovDecisionProcess #
297377
################################################################
298-
function bellman!(
378+
function _bellman_helper!(
299379
workspace::Union{DenseOrthogonalWorkspace, SparseOrthogonalWorkspace, MixtureWorkspace},
300380
strategy_cache::AbstractStrategyCache,
301381
Vres,
@@ -329,7 +409,7 @@ function bellman!(
329409
return Vres
330410
end
331411

332-
function bellman!(
412+
function _bellman_helper!(
333413
workspace::Union{
334414
ThreadedDenseOrthogonalWorkspace,
335415
ThreadedSparseOrthogonalWorkspace,

src/specification.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,6 @@ Return `true` for ExpectedExitTime.
885885
"""
886886
isfinitetime(prop::ExpectedExitTime) = false
887887

888-
889888
"""
890889
terminal_states(prop::ExpectedExitTime)
891890

src/value_iteration.jl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,7 @@ function step!(
203203
strategy_cache,
204204
value_function.current,
205205
value_function.previous,
206-
transition_prob(mp),
207-
stateptr(mp);
206+
mp;
208207
upper_bound = upper_bound,
209208
maximize = maximize,
210209
)
@@ -224,8 +223,7 @@ function step!(
224223
strategy_cache[time_length(strategy_cache) - k],
225224
value_function.current,
226225
value_function.previous,
227-
transition_prob(mp),
228-
stateptr(mp);
226+
mp;
229227
upper_bound = upper_bound,
230228
maximize = maximize,
231229
)

0 commit comments

Comments
 (0)