@@ -962,6 +962,14 @@ class TransferFunctionLinearRegression(PyMCModel):
962962 The current implementation uses independent Normal errors. Future versions may
963963 include AR(1) autocorrelation modeling for residuals.
964964
965+ **Priors**: The model uses data-informed priors that scale with the outcome variable:
966+
967+ - Baseline coefficients: ``Normal(0, 5 * std(y))``
968+ - Treatment coefficients: ``Normal(0, 2 * std(y))`` or ``HalfNormal(2 * std(y))``
969+ - Error std: ``HalfNormal(2 * std(y))``
970+
971+ This adaptive approach ensures priors are reasonable regardless of data scale.
972+
965973 Examples
966974 --------
967975 >>> import causalpy as cp
@@ -1031,6 +1039,9 @@ def build_model(self, X, y, coords, treatment_data):
10311039 ).values .tolist ()
10321040 self .n_treatments = treatment_data .shape [1 ]
10331041
1042+ # Compute data scale BEFORE entering model context (for data-informed priors)
1043+ y_scale = float (np .std (y ))
1044+
10341045 with self :
10351046 self .add_coords (coords )
10361047
@@ -1175,23 +1186,27 @@ def build_model(self, X, y, coords, treatment_data):
11751186 treatment_transformed = pt .stack (treatment_transformed_list , axis = 1 )
11761187
11771188 # ==================================================================
1178- # Regression Coefficients
1189+ # Regression Coefficients (with data-informed priors)
11791190 # ==================================================================
1180- # Baseline coefficients
1181- beta = pm .Normal ("beta" , mu = 0 , sigma = 50 , dims = ["treated_units" , "coeffs" ])
1191+ # Baseline coefficients: prior std = 5 * outcome scale
1192+ # This allows intercept to range widely while keeping some regularization
1193+ beta = pm .Normal (
1194+ "beta" , mu = 0 , sigma = 5 * y_scale , dims = ["treated_units" , "coeffs" ]
1195+ )
11821196
1183- # Treatment coefficients
1197+ # Treatment coefficients: prior std = 2 * outcome scale
1198+ # Treatments typically have smaller effects than baseline level
11841199 if self .coef_constraint == "nonnegative" :
11851200 theta_treatment = pm .HalfNormal (
11861201 "theta_treatment" ,
1187- sigma = 100 ,
1202+ sigma = 2 * y_scale ,
11881203 dims = ["treated_units" , "treatment_names" ],
11891204 )
11901205 else :
11911206 theta_treatment = pm .Normal (
11921207 "theta_treatment" ,
11931208 mu = 0 ,
1194- sigma = 100 ,
1209+ sigma = 2 * y_scale ,
11951210 dims = ["treated_units" , "treatment_names" ],
11961211 )
11971212
@@ -1214,7 +1229,8 @@ def build_model(self, X, y, coords, treatment_data):
12141229 # ==================================================================
12151230 # Likelihood
12161231 # ==================================================================
1217- sigma = pm .HalfNormal ("sigma" , sigma = 100 , dims = ["treated_units" ])
1232+ # Error std: prior centered on outcome scale with wide support
1233+ sigma = pm .HalfNormal ("sigma" , sigma = 2 * y_scale , dims = ["treated_units" ])
12181234
12191235 # For now, use independent Normal errors
12201236 # Note: AR(1) errors in regression context require more complex implementation
@@ -1351,6 +1367,15 @@ class TransferFunctionARRegression(PyMCModel):
13511367 - Posterior predictive sampling requires forward simulation of the AR process
13521368 - Convergence can be slower than the independent errors model; consider increasing tune/draws
13531369
1370+ **Priors**: The model uses data-informed priors that scale with the outcome variable:
1371+
1372+ - Baseline coefficients: ``Normal(0, 5 * std(y))``
1373+ - Treatment coefficients: ``Normal(0, 2 * std(y))`` or ``HalfNormal(2 * std(y))``
1374+ - Error std: ``HalfNormal(2 * std(y))``
1375+ - AR(1) coefficient: ``Uniform(-0.99, 0.99)``
1376+
1377+ This adaptive approach ensures priors are reasonable regardless of data scale.
1378+
13541379 Examples
13551380 --------
13561381 >>> import causalpy as cp
@@ -1432,6 +1457,9 @@ def build_model(self, X, y, coords, treatment_data):
14321457 ).values .tolist ()
14331458 self .n_treatments = treatment_data .shape [1 ]
14341459
1460+ # Compute data scale BEFORE entering model context (for data-informed priors)
1461+ y_scale = float (np .std (y ))
1462+
14351463 with self :
14361464 self .add_coords (coords )
14371465
@@ -1581,23 +1609,27 @@ def build_model(self, X, y, coords, treatment_data):
15811609 treatment_transformed = pt .stack (treatment_transformed_list , axis = 1 )
15821610
15831611 # ==================================================================
1584- # Regression Coefficients
1612+ # Regression Coefficients (with data-informed priors)
15851613 # ==================================================================
1586- # Baseline coefficients
1587- beta = pm .Normal ("beta" , mu = 0 , sigma = 50 , dims = ["treated_units" , "coeffs" ])
1614+ # Baseline coefficients: prior std = 5 * outcome scale
1615+ # This allows intercept to range widely while keeping some regularization
1616+ beta = pm .Normal (
1617+ "beta" , mu = 0 , sigma = 5 * y_scale , dims = ["treated_units" , "coeffs" ]
1618+ )
15881619
1589- # Treatment coefficients
1620+ # Treatment coefficients: prior std = 2 * outcome scale
1621+ # Treatments typically have smaller effects than baseline level
15901622 if self .coef_constraint == "nonnegative" :
15911623 theta_treatment = pm .HalfNormal (
15921624 "theta_treatment" ,
1593- sigma = 100 ,
1625+ sigma = 2 * y_scale ,
15941626 dims = ["treated_units" , "treatment_names" ],
15951627 )
15961628 else :
15971629 theta_treatment = pm .Normal (
15981630 "theta_treatment" ,
15991631 mu = 0 ,
1600- sigma = 100 ,
1632+ sigma = 2 * y_scale ,
16011633 dims = ["treated_units" , "treatment_names" ],
16021634 )
16031635
@@ -1623,8 +1655,8 @@ def build_model(self, X, y, coords, treatment_data):
16231655 # AR(1) parameter: rho (constrained to ensure stationarity)
16241656 rho = pm .Uniform ("rho" , lower = - 0.99 , upper = 0.99 , dims = ["treated_units" ])
16251657
1626- # Innovation standard deviation
1627- sigma = pm .HalfNormal ("sigma" , sigma = 100 , dims = ["treated_units" ])
1658+ # Innovation standard deviation: prior centered on outcome scale
1659+ sigma = pm .HalfNormal ("sigma" , sigma = 2 * y_scale , dims = ["treated_units" ])
16281660
16291661 # Quasi-differencing approach using manual log-likelihood
16301662 # We can't use y_diff as observed data because it depends on rho
@@ -1637,11 +1669,12 @@ def build_model(self, X, y, coords, treatment_data):
16371669
16381670 # Compute log-likelihood for t > 0 (Gaussian log-likelihood formula)
16391671 # log p(y[t] | y[t-1], params) = -0.5 * ((y_diff - mu_diff) / sigma)^2 - log(sigma) - 0.5 * log(2π)
1640- n_diff = y_data .shape [0 ] - 1
1672+ n_diff = y_data .shape [0 ] - 1 # Number of differenced observations
1673+ n_units = y_data .shape [1 ] # Number of units
16411674 logp_diff = (
16421675 - 0.5 * pt .sum ((residuals_diff / sigma ) ** 2 )
16431676 - n_diff * pt .sum (pt .log (sigma ))
1644- - 0.5 * n_diff * pt .log (2 * np .pi )
1677+ - 0.5 * n_diff * n_units * pt .log (2 * np .pi )
16451678 )
16461679 pm .Potential ("logp_diff" , logp_diff )
16471680
@@ -1654,7 +1687,7 @@ def build_model(self, X, y, coords, treatment_data):
16541687 logp_0 = (
16551688 - 0.5 * pt .sum ((residuals_0 / sigma_0 ) ** 2 )
16561689 - pt .sum (pt .log (sigma_0 ))
1657- - 0.5 * pt .log (2 * np .pi )
1690+ - 0.5 * n_units * pt .log (2 * np .pi )
16581691 )
16591692 pm .Potential ("logp_0" , logp_0 )
16601693
0 commit comments