QuantEcon
diff --git a/‎quantecon/__init__.py
Lines changed: 1 addition & 1 deletion b/‎quantecon/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎quantecon/lqcontrol.py
Lines changed: 283 additions & 2 deletions b/‎quantecon/lqcontrol.py
Lines changed: 283 additions & 2 deletions
@@ -29,7 +29,7 @@
 from .kalman import Kalman
 from .lae import LAE
 from .arma import ARMA
-from .lqcontrol import LQ
+from .lqcontrol import LQ, LQMarkov
 from .filter import hamilton_filter
 from .lqnash import nnash
 from .lss import LinearStateSpace
 
@@ -1,14 +1,16 @@
 """
 Provides a class called LQ for solving linear quadratic control
-problems.
+problems, and a class called LQMarkov for solving Markov jump
+linear quadratic control problems.
 
 """
 from textwrap import dedent
 import numpy as np
 from numpy import dot
 from scipy.linalg import solve
-from .matrix_eqn import solve_discrete_riccati
+from .matrix_eqn import solve_discrete_riccati, solve_discrete_riccati_system
 from .util import check_random_state
+from .markov import MarkovChain
 
 
 class LQ:
@@ -327,3 +329,282 @@ def compute_sequence(self, x0, ts_length=None, method='doubling',
         x_path[:, T] = Ax + Bu + Cw_path[:, T]
 
         return x_path, u_path, w_path
+
+
+class LQMarkov:
+    r"""
+    This class is for analyzing Markov jump linear quadratic optimal
+    control problems of the infinite horizon form
+
+    .. math::
+
+        \min \mathbb{E}
+        \Big[ \sum_{t=0}^{\infty} \beta^t r(x_t, s_t, u_t) \Big]
+
+    with
+
+    .. math::
+
+         r(x_t, s_t, u_t) :=
+            (x_t' R(s_t) x_t + u_t' Q(s_t) u_t + 2 u_t' N(s_t) x_t)
+
+    subject to the law of motion
+
+    .. math::
+
+         x_{t+1} = A(s_t) x_t + B(s_t) u_t + C(s_t) w_{t+1}
+
+    Here :math:`x` is n x 1, :math:`u` is k x 1, :math:`w` is j x 1 and the
+    matrices are conformable for these dimensions.  The sequence :math:`{w_t}`
+    is assumed to be white noise, with zero mean and
+    :math:`\mathbb{E} [ w_t' w_t ] = I`, the j x j identity.
+
+    If :math:`C` is not supplied as a parameter, the model is assumed to be
+    deterministic (and :math:`C` is set to a zero matrix of appropriate
+    dimension).
+
+    The optimal value function :math:`V(x_t, s_t)` takes the form
+
+    .. math::
+
+         x_t' P(s_t) x_t + d(s_t)
+
+    and the optimal policy is of the form :math:`u_t = -F(s_t) x_t`.
+
+    Parameters
+    ----------
+    Π : array_like(float, ndim=2)
+        The Markov chain transition matrix with dimension m x m.
+    Qs : array_like(float)
+        Consists of m symmetric and non-negative definite payoff
+        matrices Q(s) with dimension k x k that corresponds with
+        the control variable u for each Markov state s
+    Rs : array_like(float)
+        Consists of m symmetric and non-negative definite payoff
+        matrices R(s) with dimension n x n that corresponds with
+        the state variable x for each Markov state s
+    As : array_like(float)
+        Consists of m state transition matrices A(s) with dimension
+        n x n for each Markov state s
+    Bs : array_like(float)
+        Consists of m state transition matrices B(s) with dimension
+        n x k for each Markov state s
+    Cs : array_like(float), optional(default=None)
+        Consists of m state transition matrices C(s) with dimension
+        n x j for each Markov state s. If the model is deterministic
+        then Cs should take default value of None
+    Ns : array_like(float), optional(default=None)
+        Consists of m cross product term matrices N(s) with dimension
+        k x n for each Markov state,
+    beta : scalar(float), optional(default=1)
+        beta is the discount parameter
+
+    Attributes
+    ----------
+    Π, Qs, Rs, Ns, As, Bs, Cs, beta : see Parameters
+    Ps : array_like(float)
+        Ps is part of the value function representation of
+        :math:`V(x, s) = x' P(s) x + d(s)`
+    ds : array_like(float)
+        ds is part of the value function representation of
+        :math:`V(x, s) = x' P(s) x + d(s)`
+    Fs : array_like(float)
+        Fs is the policy rule that determines the choice of control in
+        each period at each Markov state
+    m : scalar(int)
+        The number of Markov states
+    k, n, j : scalar(int)
+        The dimensions of the matrices as presented above
+
+    """
+
+    def __init__(self, Π, Qs, Rs, As, Bs, Cs=None, Ns=None, beta=1):
+
+        # == Make sure all matrices for each state are 2D arrays == #
+        def converter(Xs):
+            return np.array([np.atleast_2d(np.asarray(X, dtype='float'))
+                             for X in Xs])
+        self.As, self.Bs, self.Qs, self.Rs = list(map(converter,
+                                                      (As, Bs, Qs, Rs)))
+
+        # == Record number of states == #
+        self.m = self.Qs.shape[0]
+        # == Record dimensions == #
+        self.k, self.n = self.Qs.shape[1], self.Rs.shape[1]
+
+        if Ns is None:
+            # == No cross product term in payoff. Set N=0. == #
+            Ns = [np.zeros((self.k, self.n)) for i in range(self.m)]
+
+        self.Ns = converter(Ns)
+
+        if Cs is None:
+            # == If C not given, then model is deterministic. Set C=0. == #
+            self.j = 1
+            Cs = [np.zeros((self.n, self.j)) for i in range(self.m)]
+
+        self.Cs = converter(Cs)
+        self.j = self.Cs.shape[2]
+
+        self.beta = beta
+
+        self.Π = np.asarray(Π, dtype='float')
+
+        self.Ps = None
+        self.ds = None
+        self.Fs = None
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __str__(self):
+        m = """\
+        Markov Jump Linear Quadratic control system
+          - beta (discount parameter)       : {b}
+          - T (time horizon)                : {t}
+          - m (number of Markov states)     : {m}
+          - n (number of state variables)   : {n}
+          - k (number of control variables) : {k}
+          - j (number of shocks)            : {j}
+        """
+        t = "infinite"
+        return dedent(m.format(b=self.beta, m=self.m, n=self.n, k=self.k,
+                               j=self.j, t=t))
+
+    def stationary_values(self):
+        """
+        Computes the matrix :math:`P(s)` and scalar :math:`d(s)` that
+        represent the value function
+
+        .. math::
+
+             V(x, s) = x' P(s) x + d(s)
+
+        in the infinite horizon case.  Also computes the control matrix
+        :math:`F` from :math:`u = - F(s) x`.
+
+        Returns
+        -------
+        Ps : array_like(float)
+            Ps is part of the value function representation of
+            :math:`V(x, s) = x' P(s) x + d(s)`
+        ds : array_like(float)
+            ds is part of the value function representation of
+            :math:`V(x, s) = x' P(s) x + d(s)`
+        Fs : array_like(float)
+            Fs is the policy rule that determines the choice of control in
+            each period at each Markov state
+
+        """
+
+        # == Simplify notations == #
+        beta, Π = self.beta, self.Π
+        m, n, k = self.m, self.n, self.k
+        As, Bs, Cs = self.As, self.Bs, self.Cs
+        Qs, Rs, Ns = self.Qs, self.Rs, self.Ns
+
+        # == Solve for P(s) by iterating discrete riccati system== #
+        Ps = solve_discrete_riccati_system(Π, As, Bs, Cs, Qs, Rs, Ns, beta)
+
+        # == calculate F and d == #
+        Fs = np.array([np.empty((k, n)) for i in range(m)])
+        X = np.empty((m, m))
+        sum1, sum2 = np.empty((k, k)), np.empty((k, n))
+        for i in range(m):
+            # CCi = C_i C_i'
+            CCi = Cs[i] @ Cs[i].T
+            sum1[:, :] = 0.
+            sum2[:, :] = 0.
+            for j in range(m):
+                # for F
+                sum1 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ Bs[i]
+                sum2 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ As[i]
+
+                # for d
+                X[j, i] = np.trace(Ps[j] @ CCi)
+
+            Fs[i][:, :] = solve(Qs[i] + sum1, sum2 + Ns[i])
+
+        ds = solve(np.eye(m) - beta * Π,
+                   np.diag(beta * Π @ X).reshape((m, 1))).flatten()
+
+        self.Ps, self.ds, self.Fs = Ps, ds, Fs
+
+        return Ps, ds, Fs
+
+    def compute_sequence(self, x0, ts_length=None, random_state=None):
+        """
+        Compute and return the optimal state and control sequences
+        :math:`x_0, ..., x_T` and :math:`u_0,..., u_T`  under the
+        assumption that :math:`{w_t}` is iid and :math:`N(0, 1)`,
+        with Markov states sequence :math:`s_0, ..., s_T`
+
+        Parameters
+        ----------
+        x0 : array_like(float)
+            The initial state, a vector of length n
+
+        ts_length : scalar(int), optional(default=None)
+            Length of the simulation. If None, T is set to be 100
+
+        random_state : int or np.random.RandomState, optional
+            Random seed (integer) or np.random.RandomState instance to set
+            the initial state of the random number generator for
+            reproducibility. If None, a randomly initialized RandomState is
+            used.
+
+        Returns
+        -------
+        x_path : array_like(float)
+            An n x T+1 matrix, where the t-th column represents :math:`x_t`
+
+        u_path : array_like(float)
+            A k x T matrix, where the t-th column represents :math:`u_t`
+
+        w_path : array_like(float)
+            A j x T+1 matrix, where the t-th column represent :math:`w_t`
+
+        state : array_like(int)
+            Array containing the state values :math:`s_t` of the sample path
+
+        """
+
+        # === solve for optimal policies === #
+        self.stationary_values()
+
+        # === Simplify notation === #
+        As, Bs, Cs = self.As, self.Bs, self.Cs
+        Fs = self.Fs
+
+        random_state = check_random_state(random_state)
+        x0 = np.asarray(x0)
+        x0 = x0.reshape(self.n, 1)
+
+        T = ts_length if ts_length else 100
+
+        # == Simulate Markov states == #
+        chain = MarkovChain(self.Π)
+        state = chain.simulate_indices(ts_length=T+1,
+                                       random_state=random_state)
+
+        # == Prepare storage arrays == #
+        x_path = np.empty((self.n, T+1))
+        u_path = np.empty((self.k, T))
+        w_path = random_state.randn(self.j, T+1)
+        Cw_path = np.empty((self.n, T+1))
+        for i in range(T+1):
+            Cw_path[:, i] = Cs[state[i]] @ w_path[:, i]
+
+        # == Use policy sequence to generate states and controls == #
+        x_path[:, 0] = x0.flatten()
+        u_path[:, 0] = - (Fs[state[0]] @ x0).flatten()
+        for t in range(1, T):
+            Ax = As[state[t]] @ x_path[:, t-1]
+            Bu = Bs[state[t]] @ u_path[:, t-1]
+            x_path[:, t] = Ax + Bu + Cw_path[:, t]
+            u_path[:, t] = - (Fs[state[t]] @ x_path[:, t])
+        Ax = As[state[T]] @ x_path[:, T-1]
+        Bu = Bs[state[T]] @ u_path[:, T-1]
+        x_path[:, T] = Ax + Bu + w_path[:, T]
+
+        return x_path, u_path, w_path, state