|
1 | 1 | """
|
2 | 2 | Provides a class called LQ for solving linear quadratic control
|
3 |
| -problems. |
| 3 | +problems, and a class called LQMarkov for solving Markov jump |
| 4 | +linear quadratic control problems. |
4 | 5 |
|
5 | 6 | """
|
6 | 7 | from textwrap import dedent
|
7 | 8 | import numpy as np
|
8 | 9 | from numpy import dot
|
9 | 10 | from scipy.linalg import solve
|
10 |
| -from .matrix_eqn import solve_discrete_riccati |
| 11 | +from .matrix_eqn import solve_discrete_riccati, solve_discrete_riccati_system |
11 | 12 | from .util import check_random_state
|
| 13 | +from .markov import MarkovChain |
12 | 14 |
|
13 | 15 |
|
14 | 16 | class LQ:
|
@@ -327,3 +329,282 @@ def compute_sequence(self, x0, ts_length=None, method='doubling',
|
327 | 329 | x_path[:, T] = Ax + Bu + Cw_path[:, T]
|
328 | 330 |
|
329 | 331 | return x_path, u_path, w_path
|
| 332 | + |
| 333 | + |
| 334 | +class LQMarkov: |
| 335 | + r""" |
| 336 | + This class is for analyzing Markov jump linear quadratic optimal |
| 337 | + control problems of the infinite horizon form |
| 338 | +
|
| 339 | + .. math:: |
| 340 | +
|
| 341 | + \min \mathbb{E} |
| 342 | + \Big[ \sum_{t=0}^{\infty} \beta^t r(x_t, s_t, u_t) \Big] |
| 343 | +
|
| 344 | + with |
| 345 | +
|
| 346 | + .. math:: |
| 347 | +
|
| 348 | + r(x_t, s_t, u_t) := |
| 349 | + (x_t' R(s_t) x_t + u_t' Q(s_t) u_t + 2 u_t' N(s_t) x_t) |
| 350 | +
|
| 351 | + subject to the law of motion |
| 352 | +
|
| 353 | + .. math:: |
| 354 | +
|
| 355 | + x_{t+1} = A(s_t) x_t + B(s_t) u_t + C(s_t) w_{t+1} |
| 356 | +
|
| 357 | + Here :math:`x` is n x 1, :math:`u` is k x 1, :math:`w` is j x 1 and the |
| 358 | + matrices are conformable for these dimensions. The sequence :math:`{w_t}` |
| 359 | + is assumed to be white noise, with zero mean and |
| 360 | + :math:`\mathbb{E} [ w_t' w_t ] = I`, the j x j identity. |
| 361 | +
|
| 362 | + If :math:`C` is not supplied as a parameter, the model is assumed to be |
| 363 | + deterministic (and :math:`C` is set to a zero matrix of appropriate |
| 364 | + dimension). |
| 365 | +
|
| 366 | + The optimal value function :math:`V(x_t, s_t)` takes the form |
| 367 | +
|
| 368 | + .. math:: |
| 369 | +
|
| 370 | + x_t' P(s_t) x_t + d(s_t) |
| 371 | +
|
| 372 | + and the optimal policy is of the form :math:`u_t = -F(s_t) x_t`. |
| 373 | +
|
| 374 | + Parameters |
| 375 | + ---------- |
| 376 | + Π : array_like(float, ndim=2) |
| 377 | + The Markov chain transition matrix with dimension m x m. |
| 378 | + Qs : array_like(float) |
| 379 | + Consists of m symmetric and non-negative definite payoff |
| 380 | + matrices Q(s) with dimension k x k that corresponds with |
| 381 | + the control variable u for each Markov state s |
| 382 | + Rs : array_like(float) |
| 383 | + Consists of m symmetric and non-negative definite payoff |
| 384 | + matrices R(s) with dimension n x n that corresponds with |
| 385 | + the state variable x for each Markov state s |
| 386 | + As : array_like(float) |
| 387 | + Consists of m state transition matrices A(s) with dimension |
| 388 | + n x n for each Markov state s |
| 389 | + Bs : array_like(float) |
| 390 | + Consists of m state transition matrices B(s) with dimension |
| 391 | + n x k for each Markov state s |
| 392 | + Cs : array_like(float), optional(default=None) |
| 393 | + Consists of m state transition matrices C(s) with dimension |
| 394 | + n x j for each Markov state s. If the model is deterministic |
| 395 | + then Cs should take default value of None |
| 396 | + Ns : array_like(float), optional(default=None) |
| 397 | + Consists of m cross product term matrices N(s) with dimension |
| 398 | + k x n for each Markov state, |
| 399 | + beta : scalar(float), optional(default=1) |
| 400 | + beta is the discount parameter |
| 401 | +
|
| 402 | + Attributes |
| 403 | + ---------- |
| 404 | + Π, Qs, Rs, Ns, As, Bs, Cs, beta : see Parameters |
| 405 | + Ps : array_like(float) |
| 406 | + Ps is part of the value function representation of |
| 407 | + :math:`V(x, s) = x' P(s) x + d(s)` |
| 408 | + ds : array_like(float) |
| 409 | + ds is part of the value function representation of |
| 410 | + :math:`V(x, s) = x' P(s) x + d(s)` |
| 411 | + Fs : array_like(float) |
| 412 | + Fs is the policy rule that determines the choice of control in |
| 413 | + each period at each Markov state |
| 414 | + m : scalar(int) |
| 415 | + The number of Markov states |
| 416 | + k, n, j : scalar(int) |
| 417 | + The dimensions of the matrices as presented above |
| 418 | +
|
| 419 | + """ |
| 420 | + |
| 421 | + def __init__(self, Π, Qs, Rs, As, Bs, Cs=None, Ns=None, beta=1): |
| 422 | + |
| 423 | + # == Make sure all matrices for each state are 2D arrays == # |
| 424 | + def converter(Xs): |
| 425 | + return np.array([np.atleast_2d(np.asarray(X, dtype='float')) |
| 426 | + for X in Xs]) |
| 427 | + self.As, self.Bs, self.Qs, self.Rs = list(map(converter, |
| 428 | + (As, Bs, Qs, Rs))) |
| 429 | + |
| 430 | + # == Record number of states == # |
| 431 | + self.m = self.Qs.shape[0] |
| 432 | + # == Record dimensions == # |
| 433 | + self.k, self.n = self.Qs.shape[1], self.Rs.shape[1] |
| 434 | + |
| 435 | + if Ns is None: |
| 436 | + # == No cross product term in payoff. Set N=0. == # |
| 437 | + Ns = [np.zeros((self.k, self.n)) for i in range(self.m)] |
| 438 | + |
| 439 | + self.Ns = converter(Ns) |
| 440 | + |
| 441 | + if Cs is None: |
| 442 | + # == If C not given, then model is deterministic. Set C=0. == # |
| 443 | + self.j = 1 |
| 444 | + Cs = [np.zeros((self.n, self.j)) for i in range(self.m)] |
| 445 | + |
| 446 | + self.Cs = converter(Cs) |
| 447 | + self.j = self.Cs.shape[2] |
| 448 | + |
| 449 | + self.beta = beta |
| 450 | + |
| 451 | + self.Π = np.asarray(Π, dtype='float') |
| 452 | + |
| 453 | + self.Ps = None |
| 454 | + self.ds = None |
| 455 | + self.Fs = None |
| 456 | + |
| 457 | + def __repr__(self): |
| 458 | + return self.__str__() |
| 459 | + |
| 460 | + def __str__(self): |
| 461 | + m = """\ |
| 462 | + Markov Jump Linear Quadratic control system |
| 463 | + - beta (discount parameter) : {b} |
| 464 | + - T (time horizon) : {t} |
| 465 | + - m (number of Markov states) : {m} |
| 466 | + - n (number of state variables) : {n} |
| 467 | + - k (number of control variables) : {k} |
| 468 | + - j (number of shocks) : {j} |
| 469 | + """ |
| 470 | + t = "infinite" |
| 471 | + return dedent(m.format(b=self.beta, m=self.m, n=self.n, k=self.k, |
| 472 | + j=self.j, t=t)) |
| 473 | + |
| 474 | + def stationary_values(self): |
| 475 | + """ |
| 476 | + Computes the matrix :math:`P(s)` and scalar :math:`d(s)` that |
| 477 | + represent the value function |
| 478 | +
|
| 479 | + .. math:: |
| 480 | +
|
| 481 | + V(x, s) = x' P(s) x + d(s) |
| 482 | +
|
| 483 | + in the infinite horizon case. Also computes the control matrix |
| 484 | + :math:`F` from :math:`u = - F(s) x`. |
| 485 | +
|
| 486 | + Returns |
| 487 | + ------- |
| 488 | + Ps : array_like(float) |
| 489 | + Ps is part of the value function representation of |
| 490 | + :math:`V(x, s) = x' P(s) x + d(s)` |
| 491 | + ds : array_like(float) |
| 492 | + ds is part of the value function representation of |
| 493 | + :math:`V(x, s) = x' P(s) x + d(s)` |
| 494 | + Fs : array_like(float) |
| 495 | + Fs is the policy rule that determines the choice of control in |
| 496 | + each period at each Markov state |
| 497 | +
|
| 498 | + """ |
| 499 | + |
| 500 | + # == Simplify notations == # |
| 501 | + beta, Π = self.beta, self.Π |
| 502 | + m, n, k = self.m, self.n, self.k |
| 503 | + As, Bs, Cs = self.As, self.Bs, self.Cs |
| 504 | + Qs, Rs, Ns = self.Qs, self.Rs, self.Ns |
| 505 | + |
| 506 | + # == Solve for P(s) by iterating discrete riccati system== # |
| 507 | + Ps = solve_discrete_riccati_system(Π, As, Bs, Cs, Qs, Rs, Ns, beta) |
| 508 | + |
| 509 | + # == calculate F and d == # |
| 510 | + Fs = np.array([np.empty((k, n)) for i in range(m)]) |
| 511 | + X = np.empty((m, m)) |
| 512 | + sum1, sum2 = np.empty((k, k)), np.empty((k, n)) |
| 513 | + for i in range(m): |
| 514 | + # CCi = C_i C_i' |
| 515 | + CCi = Cs[i] @ Cs[i].T |
| 516 | + sum1[:, :] = 0. |
| 517 | + sum2[:, :] = 0. |
| 518 | + for j in range(m): |
| 519 | + # for F |
| 520 | + sum1 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ Bs[i] |
| 521 | + sum2 += beta * Π[i, j] * Bs[i].T @ Ps[j] @ As[i] |
| 522 | + |
| 523 | + # for d |
| 524 | + X[j, i] = np.trace(Ps[j] @ CCi) |
| 525 | + |
| 526 | + Fs[i][:, :] = solve(Qs[i] + sum1, sum2 + Ns[i]) |
| 527 | + |
| 528 | + ds = solve(np.eye(m) - beta * Π, |
| 529 | + np.diag(beta * Π @ X).reshape((m, 1))).flatten() |
| 530 | + |
| 531 | + self.Ps, self.ds, self.Fs = Ps, ds, Fs |
| 532 | + |
| 533 | + return Ps, ds, Fs |
| 534 | + |
| 535 | + def compute_sequence(self, x0, ts_length=None, random_state=None): |
| 536 | + """ |
| 537 | + Compute and return the optimal state and control sequences |
| 538 | + :math:`x_0, ..., x_T` and :math:`u_0,..., u_T` under the |
| 539 | + assumption that :math:`{w_t}` is iid and :math:`N(0, 1)`, |
| 540 | + with Markov states sequence :math:`s_0, ..., s_T` |
| 541 | +
|
| 542 | + Parameters |
| 543 | + ---------- |
| 544 | + x0 : array_like(float) |
| 545 | + The initial state, a vector of length n |
| 546 | +
|
| 547 | + ts_length : scalar(int), optional(default=None) |
| 548 | + Length of the simulation. If None, T is set to be 100 |
| 549 | +
|
| 550 | + random_state : int or np.random.RandomState, optional |
| 551 | + Random seed (integer) or np.random.RandomState instance to set |
| 552 | + the initial state of the random number generator for |
| 553 | + reproducibility. If None, a randomly initialized RandomState is |
| 554 | + used. |
| 555 | +
|
| 556 | + Returns |
| 557 | + ------- |
| 558 | + x_path : array_like(float) |
| 559 | + An n x T+1 matrix, where the t-th column represents :math:`x_t` |
| 560 | +
|
| 561 | + u_path : array_like(float) |
| 562 | + A k x T matrix, where the t-th column represents :math:`u_t` |
| 563 | +
|
| 564 | + w_path : array_like(float) |
| 565 | + A j x T+1 matrix, where the t-th column represent :math:`w_t` |
| 566 | +
|
| 567 | + state : array_like(int) |
| 568 | + Array containing the state values :math:`s_t` of the sample path |
| 569 | +
|
| 570 | + """ |
| 571 | + |
| 572 | + # === solve for optimal policies === # |
| 573 | + self.stationary_values() |
| 574 | + |
| 575 | + # === Simplify notation === # |
| 576 | + As, Bs, Cs = self.As, self.Bs, self.Cs |
| 577 | + Fs = self.Fs |
| 578 | + |
| 579 | + random_state = check_random_state(random_state) |
| 580 | + x0 = np.asarray(x0) |
| 581 | + x0 = x0.reshape(self.n, 1) |
| 582 | + |
| 583 | + T = ts_length if ts_length else 100 |
| 584 | + |
| 585 | + # == Simulate Markov states == # |
| 586 | + chain = MarkovChain(self.Π) |
| 587 | + state = chain.simulate_indices(ts_length=T+1, |
| 588 | + random_state=random_state) |
| 589 | + |
| 590 | + # == Prepare storage arrays == # |
| 591 | + x_path = np.empty((self.n, T+1)) |
| 592 | + u_path = np.empty((self.k, T)) |
| 593 | + w_path = random_state.randn(self.j, T+1) |
| 594 | + Cw_path = np.empty((self.n, T+1)) |
| 595 | + for i in range(T+1): |
| 596 | + Cw_path[:, i] = Cs[state[i]] @ w_path[:, i] |
| 597 | + |
| 598 | + # == Use policy sequence to generate states and controls == # |
| 599 | + x_path[:, 0] = x0.flatten() |
| 600 | + u_path[:, 0] = - (Fs[state[0]] @ x0).flatten() |
| 601 | + for t in range(1, T): |
| 602 | + Ax = As[state[t]] @ x_path[:, t-1] |
| 603 | + Bu = Bs[state[t]] @ u_path[:, t-1] |
| 604 | + x_path[:, t] = Ax + Bu + Cw_path[:, t] |
| 605 | + u_path[:, t] = - (Fs[state[t]] @ x_path[:, t]) |
| 606 | + Ax = As[state[T]] @ x_path[:, T-1] |
| 607 | + Bu = Bs[state[T]] @ u_path[:, T-1] |
| 608 | + x_path[:, T] = Ax + Bu + w_path[:, T] |
| 609 | + |
| 610 | + return x_path, u_path, w_path, state |
0 commit comments