Skip to content

Commit 1da28de

Browse files
authored
Merge pull request #489 from shizejin/add_Markov_LQ
Add `LQMarkov`.
2 parents c96cae1 + 5fea2de commit 1da28de

File tree

4 files changed

+513
-6
lines changed

4 files changed

+513
-6
lines changed

quantecon/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from .kalman import Kalman
3030
from .lae import LAE
3131
from .arma import ARMA
32-
from .lqcontrol import LQ
32+
from .lqcontrol import LQ, LQMarkov
3333
from .filter import hamilton_filter
3434
from .lqnash import nnash
3535
from .lss import LinearStateSpace

quantecon/lqcontrol.py

Lines changed: 283 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
"""
22
Provides a class called LQ for solving linear quadratic control
3-
problems.
3+
problems, and a class called LQMarkov for solving Markov jump
4+
linear quadratic control problems.
45
56
"""
67
from textwrap import dedent
78
import numpy as np
89
from numpy import dot
910
from scipy.linalg import solve
10-
from .matrix_eqn import solve_discrete_riccati
11+
from .matrix_eqn import solve_discrete_riccati, solve_discrete_riccati_system
1112
from .util import check_random_state
13+
from .markov import MarkovChain
1214

1315

1416
class LQ:
@@ -327,3 +329,282 @@ def compute_sequence(self, x0, ts_length=None, method='doubling',
327329
x_path[:, T] = Ax + Bu + Cw_path[:, T]
328330

329331
return x_path, u_path, w_path
332+
333+
334+
class LQMarkov:
335+
r"""
336+
This class is for analyzing Markov jump linear quadratic optimal
337+
control problems of the infinite horizon form
338+
339+
.. math::
340+
341+
\min \mathbb{E}
342+
\Big[ \sum_{t=0}^{\infty} \beta^t r(x_t, s_t, u_t) \Big]
343+
344+
with
345+
346+
.. math::
347+
348+
r(x_t, s_t, u_t) :=
349+
(x_t' R(s_t) x_t + u_t' Q(s_t) u_t + 2 u_t' N(s_t) x_t)
350+
351+
subject to the law of motion
352+
353+
.. math::
354+
355+
x_{t+1} = A(s_t) x_t + B(s_t) u_t + C(s_t) w_{t+1}
356+
357+
Here :math:`x` is n x 1, :math:`u` is k x 1, :math:`w` is j x 1 and the
358+
matrices are conformable for these dimensions. The sequence :math:`{w_t}`
359+
is assumed to be white noise, with zero mean and
360+
:math:`\mathbb{E} [ w_t' w_t ] = I`, the j x j identity.
361+
362+
If :math:`C` is not supplied as a parameter, the model is assumed to be
363+
deterministic (and :math:`C` is set to a zero matrix of appropriate
364+
dimension).
365+
366+
The optimal value function :math:`V(x_t, s_t)` takes the form
367+
368+
.. math::
369+
370+
x_t' P(s_t) x_t + d(s_t)
371+
372+
and the optimal policy is of the form :math:`u_t = -F(s_t) x_t`.
373+
374+
Parameters
375+
----------
376+
Π : array_like(float, ndim=2)
377+
The Markov chain transition matrix with dimension m x m.
378+
Qs : array_like(float)
379+
Consists of m symmetric and non-negative definite payoff
380+
matrices Q(s) with dimension k x k that corresponds with
381+
the control variable u for each Markov state s
382+
Rs : array_like(float)
383+
Consists of m symmetric and non-negative definite payoff
384+
matrices R(s) with dimension n x n that corresponds with
385+
the state variable x for each Markov state s
386+
As : array_like(float)
387+
Consists of m state transition matrices A(s) with dimension
388+
n x n for each Markov state s
389+
Bs : array_like(float)
390+
Consists of m state transition matrices B(s) with dimension
391+
n x k for each Markov state s
392+
Cs : array_like(float), optional(default=None)
393+
Consists of m state transition matrices C(s) with dimension
394+
n x j for each Markov state s. If the model is deterministic
395+
then Cs should take default value of None
396+
Ns : array_like(float), optional(default=None)
397+
Consists of m cross product term matrices N(s) with dimension
398+
k x n for each Markov state,
399+
beta : scalar(float), optional(default=1)
400+
beta is the discount parameter
401+
402+
Attributes
403+
----------
404+
Π, Qs, Rs, Ns, As, Bs, Cs, beta : see Parameters
405+
Ps : array_like(float)
406+
Ps is part of the value function representation of
407+
:math:`V(x, s) = x' P(s) x + d(s)`
408+
ds : array_like(float)
409+
ds is part of the value function representation of
410+
:math:`V(x, s) = x' P(s) x + d(s)`
411+
Fs : array_like(float)
412+
Fs is the policy rule that determines the choice of control in
413+
each period at each Markov state
414+
m : scalar(int)
415+
The number of Markov states
416+
k, n, j : scalar(int)
417+
The dimensions of the matrices as presented above
418+
419+
"""
420+
421+
def __init__(self, Π, Qs, Rs, As, Bs, Cs=None, Ns=None, beta=1):
422+
423+
# == Make sure all matrices for each state are 2D arrays == #
424+
def converter(Xs):
425+
return np.array([np.atleast_2d(np.asarray(X, dtype='float'))
426+
for X in Xs])
427+
self.As, self.Bs, self.Qs, self.Rs = list(map(converter,
428+
(As, Bs, Qs, Rs)))
429+
430+
# == Record number of states == #
431+
self.m = self.Qs.shape[0]
432+
# == Record dimensions == #
433+
self.k, self.n = self.Qs.shape[1], self.Rs.shape[1]
434+
435+
if Ns is None:
436+
# == No cross product term in payoff. Set N=0. == #
437+
Ns = [np.zeros((self.k, self.n)) for i in range(self.m)]
438+
439+
self.Ns = converter(Ns)
440+
441+
if Cs is None:
442+
# == If C not given, then model is deterministic. Set C=0. == #
443+
self.j = 1
444+
Cs = [np.zeros((self.n, self.j)) for i in range(self.m)]
445+
446+
self.Cs = converter(Cs)
447+
self.j = self.Cs.shape[2]
448+
449+
self.beta = beta
450+
451+
self.Π = np.asarray(Π, dtype='float')
452+
453+
self.Ps = None
454+
self.ds = None
455+
self.Fs = None
456+
457+
def __repr__(self):
458+
return self.__str__()
459+
460+
def __str__(self):
461+
m = """\
462+
Markov Jump Linear Quadratic control system
463+
- beta (discount parameter) : {b}
464+
- T (time horizon) : {t}
465+
- m (number of Markov states) : {m}
466+
- n (number of state variables) : {n}
467+
- k (number of control variables) : {k}
468+
- j (number of shocks) : {j}
469+
"""
470+
t = "infinite"
471+
return dedent(m.format(b=self.beta, m=self.m, n=self.n, k=self.k,
472+
j=self.j, t=t))
473+
474+
def stationary_values(self):
475+
"""
476+
Computes the matrix :math:`P(s)` and scalar :math:`d(s)` that
477+
represent the value function
478+
479+
.. math::
480+
481+
V(x, s) = x' P(s) x + d(s)
482+
483+
in the infinite horizon case. Also computes the control matrix
484+
:math:`F` from :math:`u = - F(s) x`.
485+
486+
Returns
487+
-------
488+
Ps : array_like(float)
489+
Ps is part of the value function representation of
490+
:math:`V(x, s) = x' P(s) x + d(s)`
491+
ds : array_like(float)
492+
ds is part of the value function representation of
493+
:math:`V(x, s) = x' P(s) x + d(s)`
494+
Fs : array_like(float)
495+
Fs is the policy rule that determines the choice of control in
496+
each period at each Markov state
497+
498+
"""
499+
500+
# == Simplify notations == #
501+
beta, Π = self.beta, self.Π
502+
m, n, k = self.m, self.n, self.k
503+
As, Bs, Cs = self.As, self.Bs, self.Cs
504+
Qs, Rs, Ns = self.Qs, self.Rs, self.Ns
505+
506+
# == Solve for P(s) by iterating discrete riccati system== #
507+
Ps = solve_discrete_riccati_system(Π, As, Bs, Cs, Qs, Rs, Ns, beta)
508+
509+
# == calculate F and d == #
510+
Fs = np.array([np.empty((k, n)) for i in range(m)])
511+
X = np.empty((m, m))
512+
sum1, sum2 = np.empty((k, k)), np.empty((k, n))
513+
for i in range(m):
514+
# CCi = C_i C_i'
515+
CCi = Cs[i] @ Cs[i].T
516+
sum1[:, :] = 0.
517+
sum2[:, :] = 0.
518+
for j in range(m):
519+
# for F
520+
sum1 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ Bs[i]
521+
sum2 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ As[i]
522+
523+
# for d
524+
X[j, i] = np.trace(Ps[j] @ CCi)
525+
526+
Fs[i][:, :] = solve(Qs[i] + sum1, sum2 + Ns[i])
527+
528+
ds = solve(np.eye(m) - beta * Π,
529+
np.diag(beta * Π @ X).reshape((m, 1))).flatten()
530+
531+
self.Ps, self.ds, self.Fs = Ps, ds, Fs
532+
533+
return Ps, ds, Fs
534+
535+
def compute_sequence(self, x0, ts_length=None, random_state=None):
536+
"""
537+
Compute and return the optimal state and control sequences
538+
:math:`x_0, ..., x_T` and :math:`u_0,..., u_T` under the
539+
assumption that :math:`{w_t}` is iid and :math:`N(0, 1)`,
540+
with Markov states sequence :math:`s_0, ..., s_T`
541+
542+
Parameters
543+
----------
544+
x0 : array_like(float)
545+
The initial state, a vector of length n
546+
547+
ts_length : scalar(int), optional(default=None)
548+
Length of the simulation. If None, T is set to be 100
549+
550+
random_state : int or np.random.RandomState, optional
551+
Random seed (integer) or np.random.RandomState instance to set
552+
the initial state of the random number generator for
553+
reproducibility. If None, a randomly initialized RandomState is
554+
used.
555+
556+
Returns
557+
-------
558+
x_path : array_like(float)
559+
An n x T+1 matrix, where the t-th column represents :math:`x_t`
560+
561+
u_path : array_like(float)
562+
A k x T matrix, where the t-th column represents :math:`u_t`
563+
564+
w_path : array_like(float)
565+
A j x T+1 matrix, where the t-th column represent :math:`w_t`
566+
567+
state : array_like(int)
568+
Array containing the state values :math:`s_t` of the sample path
569+
570+
"""
571+
572+
# === solve for optimal policies === #
573+
self.stationary_values()
574+
575+
# === Simplify notation === #
576+
As, Bs, Cs = self.As, self.Bs, self.Cs
577+
Fs = self.Fs
578+
579+
random_state = check_random_state(random_state)
580+
x0 = np.asarray(x0)
581+
x0 = x0.reshape(self.n, 1)
582+
583+
T = ts_length if ts_length else 100
584+
585+
# == Simulate Markov states == #
586+
chain = MarkovChain(self.Π)
587+
state = chain.simulate_indices(ts_length=T+1,
588+
random_state=random_state)
589+
590+
# == Prepare storage arrays == #
591+
x_path = np.empty((self.n, T+1))
592+
u_path = np.empty((self.k, T))
593+
w_path = random_state.randn(self.j, T+1)
594+
Cw_path = np.empty((self.n, T+1))
595+
for i in range(T+1):
596+
Cw_path[:, i] = Cs[state[i]] @ w_path[:, i]
597+
598+
# == Use policy sequence to generate states and controls == #
599+
x_path[:, 0] = x0.flatten()
600+
u_path[:, 0] = - (Fs[state[0]] @ x0).flatten()
601+
for t in range(1, T):
602+
Ax = As[state[t]] @ x_path[:, t-1]
603+
Bu = Bs[state[t]] @ u_path[:, t-1]
604+
x_path[:, t] = Ax + Bu + Cw_path[:, t]
605+
u_path[:, t] = - (Fs[state[t]] @ x_path[:, t])
606+
Ax = As[state[T]] @ x_path[:, T-1]
607+
Bu = Bs[state[T]] @ u_path[:, T-1]
608+
x_path[:, T] = Ax + Bu + w_path[:, T]
609+
610+
return x_path, u_path, w_path, state

0 commit comments

Comments
 (0)