Skip to content

Commit bb95efa

Browse files
committed
1. added forecast to exogenous variables section
2. cleaned up typos and figure sizes/zoom
1 parent c591437 commit bb95efa

File tree

2 files changed

+829
-84
lines changed

2 files changed

+829
-84
lines changed

examples/case_studies/ssm_hurricane_tracking.ipynb

Lines changed: 594 additions & 68 deletions
Large diffs are not rendered by default.

examples/case_studies/ssm_hurricane_tracking.myst.md

Lines changed: 235 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ In this case-study we are going to forecast the paths of hurricanes by applying
3030
As a brief introduction to SSMs the general idea is that we have two equations that define our system.<br>
3131
The state equation [1] and the observation equation [2]. $$x_{t+1} = A_{t}x_{t} + c_{t} + R_{t}\epsilon_{t} [1]$$ $$y_{t} = Z_{t}x_{t} + d_{t} + \eta_{t} [2]$$
3232

33-
The process/state covariance is given by $\eta_{t} \sim N(0, Q_{t})$ where $Q_{t}$ is the process/state noise and the observation/measurement covariance is given by $\epsilon_{t} \sim N(0, H_{t})$ where $H_{t}$ describe the noise in the measurement device or procedure.
33+
The process/state covariance is given by $\epsilon_{t} \sim N(0, Q_{t})$ where $Q_{t}$ is the process/state noise and the observation/measurement covariance is given by $\eta_{t} \sim N(0, H_{t})$ where $H_{t}$ describe the noise in the measurement device or procedure.
3434

3535
We have the following matrices:
3636
|State Equation variables|Definition|
@@ -475,7 +475,7 @@ class SimpleSSM(PyMCStateSpace):
475475
[0, (delta_t**2) / 2, 0, delta_t, 0, 1],
476476
]
477477
)
478-
self.ssm["obs_cov", :, :] = np.eye(2) * 0.1
478+
self.ssm["obs_cov", :, :] = np.eye(2) * 0.5
479479
480480
@property
481481
def param_names(self):
@@ -649,7 +649,7 @@ fig.update_layout(
649649
lat=fiona_df["latitude"].mean() + 15.0, lon=fiona_df["longitude"].mean()
650650
),
651651
"pitch": 0,
652-
"zoom": 1.5,
652+
"zoom": 2.0,
653653
},
654654
)
655655
fig.show(config={"displayModeBar": False})
@@ -774,7 +774,7 @@ fig.update_layout(
774774
lat=fiona_df["latitude"].mean() + 15.0, lon=fiona_df["longitude"].mean()
775775
),
776776
"pitch": 0,
777-
"zoom": 0.5,
777+
"zoom": 2.0,
778778
},
779779
)
780780
fig.show(config={"displayModeBar": False})
@@ -798,14 +798,14 @@ $$T = \begin{bmatrix}1&0&\Delta t&0&\frac{\Delta t^{2}}{2}&0&1&0 \\ 0&1&0&\Delta
798798

799799
The last 2 columns we added indicates what states our exogenous variable affects, and the last 2 rows indicates that the processes of our exogenous parameters are constant.
800800

801-
$$R = \begin{bmatrix} 1&0&0&0&0&0&0&0 \\
802-
0&1&0&0&0&0&0&0 \\
803-
0&0&1&0&0&0&0&0 \\
804-
0&0&0&1&0&0&0&0 \\
805-
0&0&0&0&1&0&0&0 \\
806-
0&0&0&0&0&1&0&0 \\
807-
0&0&0&0&0&0&0&0 \\
808-
0&0&0&0&0&0&0&0
801+
$$R = \begin{bmatrix} 1&0&0&0&0&0 \\
802+
0&1&0&0&0&0 \\
803+
0&0&1&0&0&0 \\
804+
0&0&0&1&0&0 \\
805+
0&0&0&0&1&0 \\
806+
0&0&0&0&0&1 \\
807+
0&0&0&0&0&0 \\
808+
0&0&0&0&0&0
809809
\end{bmatrix}$$
810810

811811
The addition to the R matrix imply that the exogenous parameters do not exhibit any process noise.
@@ -894,7 +894,7 @@ class ExogenousSSM(PyMCStateSpace):
894894
[0, (delta_t**2) / 2, 0, delta_t, 0, 1],
895895
]
896896
)
897-
self.ssm["obs_cov", :, :] = np.eye(2) * 0.1
897+
self.ssm["obs_cov", :, :] = np.eye(2) * 0.5
898898
899899
@property
900900
def data_names(self) -> list[str]:
@@ -917,7 +917,7 @@ class ExogenousSSM(PyMCStateSpace):
917917
* key: "shape", value: a tuple of integers
918918
* key: "dims", value: tuple of strings
919919
"""
920-
return {"exogenous_data": {"shape": (None, self.k_exog), "dims": (TIME_DIM, "exog_states")}}
920+
return {"exogenous_data": {"shape": (None, self.k_exog), "dims": (TIME_DIM, "wind_dims")}}
921921
922922
@property
923923
def param_names(self):
@@ -948,7 +948,7 @@ class ExogenousSSM(PyMCStateSpace):
948948
# pymc_extras.statespace.utils.constants
949949
950950
return {
951-
"x0": (ALL_STATE_DIM,),
951+
"x0": (self.k_states - self.k_exog,),
952952
"P0": (ALL_STATE_DIM, ALL_STATE_AUX_DIM),
953953
"acceleration_noise": (1,),
954954
"beta_wind": ("wind_dims",),
@@ -1027,7 +1027,226 @@ with pm.Model(coords=exog_ssm.coords) as exogenous:
10271027
As we suspected, the maximum sustained wind does not add any information to our model concerning the path or the speed that the hurricane travels. We confirm our hypothesis by looking at our beta parameter distributions; which are centered at zero.
10281028

10291029
```{code-cell} ipython3
1030-
az.plot_trace(idata, var_names=["beta_wind"], compact=False);
1030+
az.plot_trace(idata, var_names=["beta_wind"], compact=False, figsize=(16, 8));
1031+
```
1032+
1033+
# Make in-sample forecasts with new exogenous model
1034+
1035+
```{code-cell} ipython3
1036+
predicted_covs = idata.posterior["predicted_covariance"].mean(("chain", "draw"))
1037+
post_mean = idata.posterior["predicted_observed_state"].mean(("chain", "draw"))
1038+
```
1039+
1040+
It seems that adding the exogenous data to our model has increased the uncertainty around our forecasts.
1041+
1042+
```{code-cell} ipython3
1043+
fig = go.Figure()
1044+
for i in range(predicted_covs.shape[0]):
1045+
if (
1046+
i < 3
1047+
): # First handful of ellipses are huge because of little data in the iterative nature of the Kalman filter
1048+
continue
1049+
r_ellipse = ellipse_covariance(predicted_covs[i, :2, :2])
1050+
means = post_mean[i]
1051+
fig.add_trace(
1052+
go.Scattermap(
1053+
lon=r_ellipse[:, 0].astype(float) + means[0].values,
1054+
lat=r_ellipse[:, 1].astype(float) + means[1].values,
1055+
mode="lines",
1056+
fill="toself",
1057+
showlegend=True if i == 3 else False,
1058+
legendgroup="HDI",
1059+
hoverinfo="skip",
1060+
marker_color="blue",
1061+
name="95% CI",
1062+
)
1063+
)
1064+
fig.add_traces(
1065+
[
1066+
go.Scattermap(
1067+
lon=post_mean[:, 0],
1068+
lat=post_mean[:, 1],
1069+
name="predictions",
1070+
mode="lines+markers",
1071+
line=dict(color="lightblue"),
1072+
hovertemplate=[
1073+
f"""<b>Period:</b> {i+1}<br><b>Longitude:</b> {posterior[0]:.1f}<br><b>Latitude:</b> {posterior[1]:.1f}<extra></extra>
1074+
"""
1075+
for i, posterior in enumerate(post_mean)
1076+
],
1077+
),
1078+
go.Scattermap(
1079+
lon=fiona_df["longitude"],
1080+
lat=fiona_df["latitude"],
1081+
name="actuals",
1082+
mode="lines+markers",
1083+
line=dict(color="black"),
1084+
hovertemplate=[
1085+
f"""<b>Period:</b> {row['discrete_time']}<br><b>Longitude:</b> {row['longitude']:.1f}<br><b>Latitude:</b> {row['latitude']:.1f}<extra></extra>
1086+
"""
1087+
for row in fiona_df.iter_rows(named=True)
1088+
],
1089+
),
1090+
]
1091+
)
1092+
1093+
fig.update_layout(
1094+
margin=dict(b=0, t=0, l=0, r=0),
1095+
map={
1096+
"bearing": 0,
1097+
"center": go.layout.map.Center(
1098+
lat=fiona_df["latitude"].mean() + 15.0, lon=fiona_df["longitude"].mean()
1099+
),
1100+
"pitch": 0,
1101+
"zoom": 2.0,
1102+
},
1103+
)
1104+
fig.show(config={"displayModeBar": False})
1105+
```
1106+
1107+
```{code-cell} ipython3
1108+
# Hack that should be fixed in StateSpace soon
1109+
del exog_ssm._exog_data_info["exogenous_data"]["dims"]
1110+
```
1111+
1112+
```{code-cell} ipython3
1113+
# our last i will equal 60 so we need to pad our exogenous data to len 64
1114+
X_wind_padded = pl.concat(
1115+
(X_wind, X_wind.tail(1).select(pl.all().repeat_by(3).flatten())) # duplicate last entry 3 times
1116+
)
1117+
1118+
four_period_forecasts = []
1119+
for i in np.arange(0, idata.constant_data.time.shape[0], 4):
1120+
start = i
1121+
f = exog_ssm.forecast(
1122+
idata,
1123+
start=start,
1124+
periods=4,
1125+
filter_output="smoothed",
1126+
progressbar=False,
1127+
scenario={"exogenous_data": X_wind_padded.slice(i, 4).to_numpy()},
1128+
)
1129+
four_period_forecasts.append(f)
1130+
```
1131+
1132+
```{code-cell} ipython3
1133+
forecasts = xr.combine_by_coords(four_period_forecasts, combine_attrs="drop_conflicts")
1134+
```
1135+
1136+
```{code-cell} ipython3
1137+
# Only look at in-sample forecasts
1138+
forecasts = forecasts.sel(time=range(1, len(fiona_df)))
1139+
```
1140+
1141+
```{code-cell} ipython3
1142+
f_mean = forecasts["forecast_observed"].mean(("chain", "draw"))
1143+
```
1144+
1145+
```{code-cell} ipython3
1146+
longitude_cppc = az.extract(forecasts["forecast_observed"].sel(observed_state="x"))
1147+
latitude_cppc = az.extract(forecasts["forecast_observed"].sel(observed_state="y"))
1148+
```
1149+
1150+
```{code-cell} ipython3
1151+
cppc_var = forecasts["forecast_observed"].var(("chain", "draw"))
1152+
```
1153+
1154+
```{code-cell} ipython3
1155+
cppc_covs = xr.cov(
1156+
latitude_cppc["forecast_observed"], longitude_cppc["forecast_observed"], dim="sample"
1157+
)
1158+
```
1159+
1160+
```{code-cell} ipython3
1161+
covs_list = []
1162+
for i in range(cppc_covs.shape[0]):
1163+
covs_list.append(
1164+
np.array(
1165+
[
1166+
[
1167+
[cppc_var[i].values[0], cppc_covs[i].values.item()],
1168+
[cppc_covs[i].values.item(), cppc_var[i].values[1]],
1169+
]
1170+
]
1171+
)
1172+
)
1173+
```
1174+
1175+
```{code-cell} ipython3
1176+
cppc_vcov = np.concatenate(covs_list, axis=0)
1177+
```
1178+
1179+
Again, we don't expect any change here because the maximum sustained wind speed is not an informative variable with respect to the hurricane's path.
1180+
1181+
```{code-cell} ipython3
1182+
fig = go.Figure()
1183+
for i in range(cppc_vcov.shape[0]):
1184+
# if (
1185+
# i > 10
1186+
# ): # First handful of ellipses are huge because of little data in the iterative nature of the Kalman filter
1187+
# continue
1188+
r_ellipse = ellipse_covariance(cppc_vcov[i, :2, :2])
1189+
means = f_mean[i]
1190+
fig.add_trace(
1191+
go.Scattermap(
1192+
lon=r_ellipse[:, 0].astype(float) + means[0].values,
1193+
lat=r_ellipse[:, 1].astype(float) + means[1].values,
1194+
mode="lines",
1195+
fill="toself",
1196+
showlegend=True if i == 3 else False,
1197+
legendgroup="HDI",
1198+
hoverinfo="skip",
1199+
marker_color="blue",
1200+
name="95% CI",
1201+
)
1202+
)
1203+
fig.add_traces(
1204+
[
1205+
go.Scattermap(
1206+
lon=f_mean[:, 0],
1207+
lat=f_mean[:, 1],
1208+
name="predictions",
1209+
mode="lines+markers",
1210+
line=dict(color="lightblue"),
1211+
hovertemplate=[
1212+
f"""<b>Period:</b> {i+2}<br><b>Longitude:</b> {posterior[0]:.1f}<br><b>Latitude:</b> {posterior[1]:.1f}<extra></extra>
1213+
"""
1214+
for i, posterior in enumerate(post_mean)
1215+
],
1216+
),
1217+
go.Scattermap(
1218+
lon=fiona_df["longitude"],
1219+
lat=fiona_df["latitude"],
1220+
name="actuals",
1221+
mode="lines+markers",
1222+
line=dict(color="black"),
1223+
hovertemplate=[
1224+
f"""<b>Period:</b> {row['discrete_time']}<br><b>Longitude:</b> {row['longitude']:.1f}<br><b>Latitude:</b> {row['latitude']:.1f}<extra></extra>
1225+
"""
1226+
for row in fiona_df.iter_rows(named=True)
1227+
],
1228+
),
1229+
]
1230+
)
1231+
1232+
fig.update_layout(
1233+
margin=dict(b=0, t=0, l=0, r=0),
1234+
map={
1235+
"bearing": 0,
1236+
"center": go.layout.map.Center(
1237+
lat=fiona_df["latitude"].mean() + 15.0, lon=fiona_df["longitude"].mean()
1238+
),
1239+
"pitch": 0,
1240+
"zoom": 2.0,
1241+
},
1242+
)
1243+
fig.show(config={"displayModeBar": False})
1244+
```
1245+
1246+
# Add non-linear gaussian process
1247+
1248+
```{code-cell} ipython3
1249+
10311250
```
10321251

10331252
# Authors

0 commit comments

Comments
 (0)