Skip to content

Commit e10d585

Browse files
committed
better priors scaled for the problem
1 parent 199eead commit e10d585

File tree

2 files changed

+146
-104
lines changed

2 files changed

+146
-104
lines changed

causalpy/pymc_models.py

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,14 @@ class TransferFunctionLinearRegression(PyMCModel):
962962
The current implementation uses independent Normal errors. Future versions may
963963
include AR(1) autocorrelation modeling for residuals.
964964
965+
**Priors**: The model uses data-informed priors that scale with the outcome variable:
966+
967+
- Baseline coefficients: ``Normal(0, 5 * std(y))``
968+
- Treatment coefficients: ``Normal(0, 2 * std(y))`` or ``HalfNormal(2 * std(y))``
969+
- Error std: ``HalfNormal(2 * std(y))``
970+
971+
This adaptive approach ensures priors are reasonable regardless of data scale.
972+
965973
Examples
966974
--------
967975
>>> import causalpy as cp
@@ -1031,6 +1039,9 @@ def build_model(self, X, y, coords, treatment_data):
10311039
).values.tolist()
10321040
self.n_treatments = treatment_data.shape[1]
10331041

1042+
# Compute data scale BEFORE entering model context (for data-informed priors)
1043+
y_scale = float(np.std(y))
1044+
10341045
with self:
10351046
self.add_coords(coords)
10361047

@@ -1175,23 +1186,27 @@ def build_model(self, X, y, coords, treatment_data):
11751186
treatment_transformed = pt.stack(treatment_transformed_list, axis=1)
11761187

11771188
# ==================================================================
1178-
# Regression Coefficients
1189+
# Regression Coefficients (with data-informed priors)
11791190
# ==================================================================
1180-
# Baseline coefficients
1181-
beta = pm.Normal("beta", mu=0, sigma=50, dims=["treated_units", "coeffs"])
1191+
# Baseline coefficients: prior std = 5 * outcome scale
1192+
# This allows intercept to range widely while keeping some regularization
1193+
beta = pm.Normal(
1194+
"beta", mu=0, sigma=5 * y_scale, dims=["treated_units", "coeffs"]
1195+
)
11821196

1183-
# Treatment coefficients
1197+
# Treatment coefficients: prior std = 2 * outcome scale
1198+
# Treatments typically have smaller effects than baseline level
11841199
if self.coef_constraint == "nonnegative":
11851200
theta_treatment = pm.HalfNormal(
11861201
"theta_treatment",
1187-
sigma=100,
1202+
sigma=2 * y_scale,
11881203
dims=["treated_units", "treatment_names"],
11891204
)
11901205
else:
11911206
theta_treatment = pm.Normal(
11921207
"theta_treatment",
11931208
mu=0,
1194-
sigma=100,
1209+
sigma=2 * y_scale,
11951210
dims=["treated_units", "treatment_names"],
11961211
)
11971212

@@ -1214,7 +1229,8 @@ def build_model(self, X, y, coords, treatment_data):
12141229
# ==================================================================
12151230
# Likelihood
12161231
# ==================================================================
1217-
sigma = pm.HalfNormal("sigma", sigma=100, dims=["treated_units"])
1232+
# Error std: prior centered on outcome scale with wide support
1233+
sigma = pm.HalfNormal("sigma", sigma=2 * y_scale, dims=["treated_units"])
12181234

12191235
# For now, use independent Normal errors
12201236
# Note: AR(1) errors in regression context require more complex implementation
@@ -1351,6 +1367,15 @@ class TransferFunctionARRegression(PyMCModel):
13511367
- Posterior predictive sampling requires forward simulation of the AR process
13521368
- Convergence can be slower than the independent errors model; consider increasing tune/draws
13531369
1370+
**Priors**: The model uses data-informed priors that scale with the outcome variable:
1371+
1372+
- Baseline coefficients: ``Normal(0, 5 * std(y))``
1373+
- Treatment coefficients: ``Normal(0, 2 * std(y))`` or ``HalfNormal(2 * std(y))``
1374+
- Error std: ``HalfNormal(2 * std(y))``
1375+
- AR(1) coefficient: ``Uniform(-0.99, 0.99)``
1376+
1377+
This adaptive approach ensures priors are reasonable regardless of data scale.
1378+
13541379
Examples
13551380
--------
13561381
>>> import causalpy as cp
@@ -1432,6 +1457,9 @@ def build_model(self, X, y, coords, treatment_data):
14321457
).values.tolist()
14331458
self.n_treatments = treatment_data.shape[1]
14341459

1460+
# Compute data scale BEFORE entering model context (for data-informed priors)
1461+
y_scale = float(np.std(y))
1462+
14351463
with self:
14361464
self.add_coords(coords)
14371465

@@ -1581,23 +1609,27 @@ def build_model(self, X, y, coords, treatment_data):
15811609
treatment_transformed = pt.stack(treatment_transformed_list, axis=1)
15821610

15831611
# ==================================================================
1584-
# Regression Coefficients
1612+
# Regression Coefficients (with data-informed priors)
15851613
# ==================================================================
1586-
# Baseline coefficients
1587-
beta = pm.Normal("beta", mu=0, sigma=50, dims=["treated_units", "coeffs"])
1614+
# Baseline coefficients: prior std = 5 * outcome scale
1615+
# This allows intercept to range widely while keeping some regularization
1616+
beta = pm.Normal(
1617+
"beta", mu=0, sigma=5 * y_scale, dims=["treated_units", "coeffs"]
1618+
)
15881619

1589-
# Treatment coefficients
1620+
# Treatment coefficients: prior std = 2 * outcome scale
1621+
# Treatments typically have smaller effects than baseline level
15901622
if self.coef_constraint == "nonnegative":
15911623
theta_treatment = pm.HalfNormal(
15921624
"theta_treatment",
1593-
sigma=100,
1625+
sigma=2 * y_scale,
15941626
dims=["treated_units", "treatment_names"],
15951627
)
15961628
else:
15971629
theta_treatment = pm.Normal(
15981630
"theta_treatment",
15991631
mu=0,
1600-
sigma=100,
1632+
sigma=2 * y_scale,
16011633
dims=["treated_units", "treatment_names"],
16021634
)
16031635

@@ -1623,8 +1655,8 @@ def build_model(self, X, y, coords, treatment_data):
16231655
# AR(1) parameter: rho (constrained to ensure stationarity)
16241656
rho = pm.Uniform("rho", lower=-0.99, upper=0.99, dims=["treated_units"])
16251657

1626-
# Innovation standard deviation
1627-
sigma = pm.HalfNormal("sigma", sigma=100, dims=["treated_units"])
1658+
# Innovation standard deviation: prior centered on outcome scale
1659+
sigma = pm.HalfNormal("sigma", sigma=2 * y_scale, dims=["treated_units"])
16281660

16291661
# Quasi-differencing approach using manual log-likelihood
16301662
# We can't use y_diff as observed data because it depends on rho
@@ -1637,11 +1669,12 @@ def build_model(self, X, y, coords, treatment_data):
16371669

16381670
# Compute log-likelihood for t > 0 (Gaussian log-likelihood formula)
16391671
# log p(y[t] | y[t-1], params) = -0.5 * ((y_diff - mu_diff) / sigma)^2 - log(sigma) - 0.5 * log(2π)
1640-
n_diff = y_data.shape[0] - 1
1672+
n_diff = y_data.shape[0] - 1 # Number of differenced observations
1673+
n_units = y_data.shape[1] # Number of units
16411674
logp_diff = (
16421675
-0.5 * pt.sum((residuals_diff / sigma) ** 2)
16431676
- n_diff * pt.sum(pt.log(sigma))
1644-
- 0.5 * n_diff * pt.log(2 * np.pi)
1677+
- 0.5 * n_diff * n_units * pt.log(2 * np.pi)
16451678
)
16461679
pm.Potential("logp_diff", logp_diff)
16471680

@@ -1654,7 +1687,7 @@ def build_model(self, X, y, coords, treatment_data):
16541687
logp_0 = (
16551688
-0.5 * pt.sum((residuals_0 / sigma_0) ** 2)
16561689
- pt.sum(pt.log(sigma_0))
1657-
- 0.5 * pt.log(2 * np.pi)
1690+
- 0.5 * n_units * pt.log(2 * np.pi)
16581691
)
16591692
pm.Potential("logp_0", logp_0)
16601693

docs/source/notebooks/graded_intervention_time_series_single_channel_ols.ipynb

Lines changed: 95 additions & 86 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)