Merge with main

gdeleva · gdeleva · commit 07d94b61c52b · 2025-07-17T11:04:46.000+09:00
diff --git a/pymc_extras/statespace/models/structural/components/seasonality.py b/pymc_extras/statespace/models/structural/components/seasonality.py
@@ -14,24 +14,28 @@ class TimeSeasonality(Component):
     ----------
     season_length: int
         The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for
-        daily data with weekly seasonal pattern, etc.
+        daily data with weekly seasonal pattern, etc. It must be greater than one.
+
+    duration: int, default 1
+        Number of time steps for each seasonal period.
+        This determines how long each seasonal period is held constant before moving to the next.
 
     innovations: bool, default True
         Whether to include stochastic innovations in the strength of the seasonal effect
 
     name: str, default None
         A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal
-        components are included in the same model. Default is ``f"Seasonal[s={season_length}]"``
+        components are included in the same model. Default is ``f"Seasonal[s={season_length}, d={duration}]"``
 
     state_names: list of str, default None
         List of strings for seasonal effect labels. If provided, it must be of length ``season_length``. An example
         would be ``state_names = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']`` when data is daily with a weekly
         seasonal pattern (``season_length = 7``).
 
-        If None, states will be numbered ``[State_0, ..., State_s]``
+        If None, states will be numbered ``[State_0, ..., State_s-1]``
 
     remove_first_state: bool, default True
-        If True, the first state will be removed from the model. This is done because there are only n-1 degrees of
+        If True, the first state will be removed from the model. This is done because there are only ``season_length-1`` degrees of
         freedom in the seasonal component, and one state is not identified. If False, the first state will be
         included in the model, but it will not be identified -- you will need to handle this in the priors (e.g. with
         ZeroSumNormal).
@@ -41,17 +45,76 @@ class TimeSeasonality(Component):
 
     Notes
     -----
-    A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to
-    model seasonal effects, the implementation used here is the one described by [1] as the "canonical" time domain
-    representation. The seasonal component can be expressed:
+    A seasonal effect is any pattern that repeats at fixed intervals. There are several ways to model such effects;
+    here, we present two models that are straightforward extensions of those described in [1].
+
+    **First model** (``remove_first_state=True``)
+
+    In this model, the state vector is defined as:
+
+    .. math::
+        \alpha_t :=(\gamma_t, \ldots, \gamma_{t-d(s-1)+1}), \quad t \ge 0.
+
+    This vector has length :math:`d(s-1)`, where:
+
+    - :math:`s` is the ``seasonal_length`` parameter, and
+    - :math:`d` is the ``duration`` parameter.
+
+    The components of the initial vector :math:`\alpha_{0}` are given by
+
+    .. math::
+        \gamma_{-l} := \tilde{\gamma}_{k_l}, \quad \text{where} \quad k_l := \left\lfloor \frac{l}{d} \right\rfloor \bmod s \quad \text{and} \quad l=0,\ldots, d(s-1)-1.
+
+    Here, the values
+
+    .. math::
+        \tilde{\gamma}_{0}, \ldots, \tilde{\gamma}_{s-2},
+
+    represent the initial seasonal states. The transition matrix of this model is the :math:`d(s-1) \times d(s-1)` matrix
+
+    .. math::
+        \begin{bmatrix}
+            -\mathbf{1}_d & -\mathbf{1}_d & \cdots & -\mathbf{1}_d & -\mathbf{1}_d \\
+            \mathbf{1}_d & \mathbf{0}_d & \cdots & \mathbf{0}_d & \mathbf{0}_d \\
+            \mathbf{0}_d & \mathbf{1}_d & \cdots & \mathbf{0}_d & \mathbf{0}_d \\
+            \vdots & \vdots & \ddots & \vdots \\
+            \mathbf{0}_d & \mathbf{0}_d & \cdots & \mathbf{1}_d & \mathbf{0}_d
+        \end{bmatrix}
+
+    where :math:`\mathbf{1}_d` and  :math:`\mathbf{0}_d` denote the :math:`d \times d` identity and null matrices, respectively.
+
+    **Second model** (``remove_first_state=False``)
+
+    In contrast, the state vector in the second model is defined as:
 
     .. math::
-        \gamma_t = -\sum_{i=1}^{s-1} \gamma_{t-i} + \omega_t, \quad \omega_t \sim N(0, \sigma_\gamma)
+        \alpha_t=(\gamma_t, \ldots, \gamma_{t-ds+1}), \quad t \ge 0.
 
-    Where :math:`s` is the ``seasonal_length`` parameter and :math:`\omega_t` is the (optional) stochastic innovation.
-    To give interpretation to the :math:`\gamma` terms, it is helpful to work  through the algebra for a simple
-    example. Let :math:`s=4`, and omit the shock term. Define initial conditions :math:`\gamma_0, \gamma_{-1},
-    \gamma_{-2}`. The value of the seasonal component for the first 5 timesteps will be:
+    This vector has length :math:`ds`. The components of the initial state vector :math:`\alpha_{0}` are defined similarly:
+
+    .. math::
+        \gamma_{-l} := \tilde{\gamma}_{k_l}, \quad \text{where} \quad k_l := \left\lfloor \frac{l}{d} \right\rfloor \bmod s \quad \text{and} \quad l=0,\ldots, ds-1.
+
+    In this case, the initial seasonal states :math:`\tilde{\gamma}_{0}, \ldots, \tilde{\gamma}_{s-1}` are required to satisfy the following condition:
+
+    .. math::
+        \sum_{i=0}^{s-1} \tilde{\gamma}_{i} = 0.
+
+    The transition matrix of this model is the following :math:`ds \times ds` circulant matrix:
+
+    .. math::
+        \begin{bmatrix}
+            0 & 1 & 0 & \cdots & 0 \\
+            0 & 0 & 1 & \cdots & 0 \\
+            \vdots & \vdots & \ddots & \ddots & \vdots \\
+            0 & 0 & \cdots & 0 & 1 \\
+            1 & 0 & \cdots & 0 & 0
+        \end{bmatrix}
+
+    To give interpretation to the :math:`\gamma` terms, it is helpful to work through the algebra for a simple
+    example. Let :math:`s=4`, :math:`d=1`, ``remove_first_state=True``, and omit the shock term. Then, we have
+    :math:`\gamma_{-i} = \tilde{\gamma}_{-i}`, for :math:`i=-2,\ldots, 0` and the value of the seasonal component
+    for the first 5 timesteps will be:
 
     .. math::
         \begin{align}
@@ -85,10 +148,38 @@ class TimeSeasonality(Component):
     And so on. So for interpretation, the ``season_length - 1`` initial states are, when reversed, the coefficients
     associated with ``state_names[1:]``.
 
+    In the next example, we set :math:`s=3`, :math:`d=2`, ``remove_first_state=True``, and omit the shock term.
+    By definition, the initial vector :math:`\alpha_{0}` is
+
+    .. math::
+        \alpha_0=(\tilde{\gamma}_{0}, \tilde{\gamma}_{0}, \tilde{\gamma}_{-1}, \tilde{\gamma}_{-1})
+
+    and the transition matrix is
+
+    .. math::
+        \begin{bmatrix}
+            -1 &  0 & -1 &  0 \\
+             0 & -1 &  0 & -1 \\
+             1 &  0 &  0 &  0 \\
+             0 &  1 &  0 &  0 \\
+        \end{bmatrix}
+
+    It is easy to verify that:
+
+    .. math::
+        \begin{align}
+            \gamma_1 &= -\tilde{\gamma}_0 - \tilde{\gamma}_{-1}\\
+            \gamma_2 &= -(-\tilde{\gamma}_0 - \tilde{\gamma}_{-1})-\tilde{\gamma}_0\\
+                     &= \tilde{\gamma}_{-1}\\
+            \gamma_3 &= -\tilde{\gamma}_{-1} +(\tilde{\gamma}_0 + \tilde{\gamma}_{-1})\\
+                     &= \tilde{\gamma}_{0}\\
+            \gamma_4 &= -\tilde{\gamma}_0 - \tilde{\gamma}_{-1}.\\
+        \end{align}
+
     .. warning::
-        Although the ``state_names`` argument expects a list of length ``season_length``, only ``state_names[1:]``
-        will be saved as model dimensions, since the 1st coefficient is not identified (it is defined as
-        :math:`-\sum_{i=1}^{s} \gamma_{t-i}`).
+        Although the ``state_names`` argument expects a list of length ``season_length`` times ``duration``,
+        only ``state_names[1:]`` will be saved as model dimensions, since the first coefficient is not identified
+        (it is defined as :math:`-\sum_{i=1}^{s-1} \tilde{\gamma}_{-i}`).
 
     Examples
     --------
@@ -137,6 +228,7 @@ class TimeSeasonality(Component):
     def __init__(
         self,
         season_length: int,
+        duration: int = 1,
         innovations: bool = True,
         name: str | None = None,
         state_names: list | None = None,
@@ -146,29 +238,38 @@ def __init__(
         if observed_state_names is None:
             observed_state_names = ["data"]
 
+        if season_length <= 1:
+            raise ValueError(f"season_length must be greater than 1, got {season_length}")
         if name is None:
-            name = f"Seasonal[s={season_length}]"
+            name = f"Seasonal[s={season_length}, d={duration}]"
         if state_names is None:
-            state_names = [f"{name}_{i}" for i in range(season_length)]
+            if duration > 1:
+                state_names = [
+                    f"{name}_{i}_{j}" for i in range(season_length) for j in range(duration)
+                ]
+            else:
+                state_names = [f"{name}_{i}" for i in range(season_length)]
         else:
-            if len(state_names) != season_length:
+            if len(state_names) != season_length * duration:
                 raise ValueError(
-                    f"state_names must be a list of length season_length, got {len(state_names)}"
+                    f"state_names must be a list of length season_length*duration, got {len(state_names)}"
                 )
             state_names = state_names.copy()
 
         self.innovations = innovations
+        self.duration = duration
         self.remove_first_state = remove_first_state
+        self.season_length = season_length
 
         if self.remove_first_state:
             # In traditional models, the first state isn't identified, so we can help out the user by automatically
             # discarding it.
             # TODO: Can this be stashed and reconstructed automatically somehow?
-            state_names.pop(0)
+            state_names = state_names[duration:]
 
         self.provided_state_names = state_names
 
-        k_states = season_length - int(self.remove_first_state)
+        k_states = season_length * duration - int(self.remove_first_state) * duration
         k_endog = len(observed_state_names)
         k_posdef = int(innovations)
 
@@ -236,8 +337,27 @@ def make_symbolic_graph(self) -> None:
         if self.remove_first_state:
             # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of
             # all previous states.
-            T = np.eye(k_states, k=-1)
-            T[0, :] = -1
+            zero_d = np.zeros((self.duration, self.duration))
+            id_d = np.eye(self.duration)
+
+            blocks = []
+
+            # First row: all -1_d blocks
+            first_row = [-id_d for _ in range(self.season_length - 1)]
+            blocks.append(first_row)
+
+            # Rows 2 to season_length-1: shifted identity blocks
+            for i in range(self.season_length - 2):
+                row = []
+                for j in range(self.season_length - 1):
+                    if j == i:
+                        row.append(id_d)
+                    else:
+                        row.append(zero_d)
+                blocks.append(row)
+
+            # Stack blocks
+            T = np.block(blocks)
         else:
             # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a
             # circulant matrix that cycles between the states.