Skip to content

Commit 0a12524

Browse files
authored
Merge pull request #87 from Zinoex/fm/product_bellman
Bellman operator for ProductProcess
2 parents 1e39de5 + c8c07ae commit 0a12524

35 files changed

+1200
-714
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ with great efficiency. Furthermore, it provides methods for accelerating the com
1616
certificate using CUDA hardware.
1717

1818
## Features
19-
- O-maximization and value iteration
19+
- Value iteration (Bellman operator via O-maximization)
2020
- Dense and sparse matrix support
21-
- Parametric probability types for customizable precision
21+
- Parametric probability and value types for customizable precision including rationals and floating-point numbers
2222
- Multithreaded CPU and CUDA-accelerated value iteration
2323
- Data loading and writing in formats by various tools (PRISM, bmdp-tool, IMDP.jl)
2424

@@ -72,7 +72,7 @@ V, k, residual = value_iteration(problem)
7272
See [Usage](https://www.baymler.com/IntervalMDP.jl/dev/usage/) for more information about different specifications, using sparse matrices, and CUDA.
7373

7474
## Ecosystem
75-
Building upon IntervalMDP.jl, we are designing an ecosystem of tools, which current consists of:
75+
Building upon IntervalMDP.jl, we are designing an ecosystem of tools, which currently consists of:
7676

7777
- [IntervalMDPAbstractions.jl](https://github.com/Zinoex/IntervalMDPAbstractions.jl) - constructing abstractions of stochastic dynamical systems to verify properties.
7878

docs/src/reference/specifications.md

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,30 @@ strategy
88
Specification
99
system_property
1010
Property
11+
BasicProperty
12+
ProductProperty
1113
satisfaction_mode
1214
SatisfactionMode
1315
strategy_mode
1416
StrategyMode
1517
```
1618

17-
## Temporal logic
19+
## DFA Reachability
1820

1921
```@docs
20-
LTLFormula
21-
isfinitetime(prop::LTLFormula)
22-
LTLfFormula
23-
isfinitetime(prop::LTLfFormula)
24-
time_horizon(prop::LTLfFormula)
25-
PCTLFormula
22+
AbstractDFAReachability
23+
24+
FiniteTimeDFAReachability
25+
isfinitetime(prop::FiniteTimeDFAReachability)
26+
terminal_states(prop::FiniteTimeDFAReachability)
27+
reach(prop::FiniteTimeDFAReachability)
28+
time_horizon(prop::FiniteTimeDFAReachability)
29+
30+
InfiniteTimeDFAReachability
31+
isfinitetime(prop::InfiniteTimeDFAReachability)
32+
terminal_states(prop::InfiniteTimeDFAReachability)
33+
reach(prop::InfiniteTimeDFAReachability)
34+
convergence_eps(prop::InfiniteTimeDFAReachability)
2635
```
2736

2837
## Reachability

docs/src/reference/systems.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@ MixtureIntervalMarkovDecisionProcess
1616
MixtureIntervalMarkovChain
1717
stateptr(mdp::MixtureIntervalMarkovDecisionProcess)
1818
DFA
19-
letters2alphabet(letters::AbstractVector{String})
20-
alphabet2index(alphabet::AbstractVector{String})
19+
num_states(dfa::DFA)
20+
num_labels(dfa::DFA)
2121
transition(dfa::DFA)
22-
alphabetptr(dfa::DFA)
22+
labelmap(dfa::DFA)
2323
initial_state(dfa::DFA)
2424
accepting_states(dfa::DFA)
25-
ProductIntervalMarkovDecisionProcessDFA
26-
imdp(md::ProductIntervalMarkovDecisionProcessDFA)
27-
automaton(md::ProductIntervalMarkovDecisionProcessDFA)
28-
labelling_function(md::ProductIntervalMarkovDecisionProcessDFA)
25+
ProductProcess
26+
markov_process(proc::ProductProcess)
27+
automaton(proc::ProductProcess)
28+
labelling_function(proc::ProductProcess)
2929
```
3030

3131
## Probability representation
@@ -75,12 +75,14 @@ weighting_probs
7575
### Labelling of IMDP states to Automaton alphabet
7676
```@docs
7777
LabellingFunction
78-
count_mapping(map::AbstractArray)
7978
mapping(labelling_func::LabellingFunction)
79+
num_labels(labelling_func::LabellingFunction)
8080
```
8181

8282
### Transition function for DFA
8383
```@docs
8484
TransitionFunction
8585
transition(transition_func::TransitionFunction)
86+
num_states(tf::TransitionFunction)
87+
num_labels(tf::TransitionFunction)
8688
```

ext/IntervalMDPCudaExt.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,19 @@ function IntervalMDP.checkdevice(b::AbstractGPUArray, A::AbstractMatrix)
117117
@assert false "The reward vector is on the GPU ($(typeof(b))) and the transition matrix is a CPU matrix ($(typeof(A)))."
118118
end
119119

120+
IntervalMDP.arrayfactory(
121+
::MR,
122+
T,
123+
num_states,
124+
) where {R, MR <: Union{CuSparseMatrixCSC{R}, CuMatrix{R}}} = CUDA.zeros(T, num_states)
125+
120126
include("cuda/utils.jl")
121127
include("cuda/array.jl")
122128
include("cuda/sorting.jl")
123129
include("cuda/workspace.jl")
124130
include("cuda/strategy.jl")
125131
include("cuda/bellman/dense.jl")
126132
include("cuda/bellman/sparse.jl")
127-
include("cuda/value_iteration.jl")
128133
include("cuda/interval_probabilities.jl")
129134
include("cuda/specification.jl")
130135

ext/cuda/strategy.jl

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,3 @@
1-
function IntervalMDP.construct_action_cache(
2-
::IntervalProbabilities{R, VR},
3-
dims,
4-
) where {R <: Real, VR <: AbstractGPUVector{R}}
5-
return CUDA.zeros(Int32, dims)
6-
end
7-
81
abstract type ActiveCache end
92
abstract type OptimizingActiveCache <: ActiveCache end
103

ext/cuda/value_iteration.jl

Lines changed: 0 additions & 5 deletions
This file was deleted.

src/IntervalMDP.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ export StationaryStrategy, TimeVaryingStrategy
1919
export construct_strategy_cache, time_length
2020

2121
include("specification.jl")
22-
export Property, LTLFormula, LTLfFormula, PCTLFormula
22+
export Property, BasicProperty, ProductProperty
2323

24+
export AbstractDFAReachability, FiniteTimeDFAReachability, InfiniteTimeDFAReachability
2425
export AbstractReachability,
2526
FiniteTimeReachability, InfiniteTimeReachability, ExactTimeReachability
2627
export AbstractReachAvoid, FiniteTimeReachAvoid, InfiniteTimeReachAvoid, ExactTimeReachAvoid

src/bellman.jl

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
bellman(V, model; upper_bound = false)
2+
bellman(V, model; upper_bound = false, maximize = true)
33
44
Compute robust Bellman update with the value function `V` and the model `model`, e.g. [`IntervalMarkovDecisionProcess`](@ref),
55
that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
@@ -56,7 +56,7 @@ Vcur = bellman(Vprev, model; upper_bound = false)
5656
5757
!!! note
5858
This function will construct a workspace object and an output vector.
59-
For a hot-loop, it is more efficient to use `bellman!` and pass in pre-allocated objects.
59+
For a hot-loop, it is more efficient to use [`bellman!`](@ref) and pass in pre-allocated objects.
6060
6161
[1] M. Lahijanian, S. B. Andersson and C. Belta, "Formal Verification and Synthesis for Discrete-Time Stochastic Systems," in IEEE Transactions on Automatic Control, vol. 60, no. 8, pp. 2031-2045, Aug. 2015, doi: 10.1109/TAC.2015.2398883.
6262
@@ -67,7 +67,7 @@ function bellman(V, model; upper_bound = false, maximize = true)
6767
end
6868

6969
"""
70-
bellman!(workspace, strategy_cache, Vres, V, model, stateptr; upper_bound = false, maximize = true)
70+
bellman!(workspace, strategy_cache, Vres, V, model; upper_bound = false, maximize = true)
7171
7272
Compute in-place robust Bellman update with the value function `V` and the model `model`,
7373
e.g. [`IntervalMarkovDecisionProcess`](@ref), that upper or lower bounds the expectation of the value function `V` via O-maximization [1].
@@ -155,7 +155,7 @@ function bellman!(
155155
strategy_cache,
156156
Vres,
157157
V,
158-
model;
158+
model::IntervalMarkovProcess;
159159
upper_bound = false,
160160
maximize = true,
161161
)
@@ -171,6 +171,72 @@ function bellman!(
171171
)
172172
end
173173

174+
function bellman!(
175+
workspace::ProductWorkspace,
176+
strategy_cache,
177+
Vres,
178+
V,
179+
model::ProductProcess;
180+
upper_bound = false,
181+
maximize = true,
182+
)
183+
mp = markov_process(model)
184+
lf = labelling_function(model)
185+
dfa = automaton(model)
186+
187+
W = workspace.intermediate_values
188+
189+
@inbounds for state in dfa
190+
local_strategy_cache = localize_strategy_cache(strategy_cache, state)
191+
192+
# Select the value function for the current DFA state
193+
# according to the appropriate DFA transition function
194+
map!(W, CartesianIndices(product_num_states(mp))) do idx
195+
return V[idx, dfa[state, lf[idx]]]
196+
end
197+
198+
# For each state in the product process, compute the Bellman operator
199+
# for the corresponding Markov process
200+
bellman!(
201+
workspace.underlying_workspace,
202+
local_strategy_cache,
203+
selectdim(Vres, ndims(Vres), state),
204+
W,
205+
mp;
206+
upper_bound = upper_bound,
207+
maximize = maximize,
208+
)
209+
end
210+
211+
return Vres
212+
end
213+
214+
function localize_strategy_cache(strategy_cache::NoStrategyCache, dfa_state)
215+
return strategy_cache
216+
end
217+
218+
function localize_strategy_cache(strategy_cache::TimeVaryingStrategyCache, dfa_state)
219+
return TimeVaryingStrategyCache(
220+
selectdim(
221+
strategy_cache.cur_strategy,
222+
ndims(strategy_cache.cur_strategy),
223+
dfa_state,
224+
),
225+
)
226+
end
227+
228+
function localize_strategy_cache(strategy_cache::StationaryStrategyCache, dfa_state)
229+
return StationaryStrategyCache(
230+
selectdim(strategy_cache.strategy, ndims(strategy_cache.strategy), dfa_state),
231+
)
232+
end
233+
234+
function localize_strategy_cache(strategy_cache::ActiveGivenStrategyCache, dfa_state)
235+
return ActiveGivenStrategyCache(
236+
selectdim(strategy_cache.strategy, ndims(strategy_cache.strategy), dfa_state),
237+
)
238+
end
239+
174240
######################################################
175241
# Bellman operator for IntervalMarkovDecisionProcess #
176242
######################################################

0 commit comments

Comments
 (0)