Skip to content

Commit f42be32

Browse files
committed
Add experiment reporting for more experiments/notebooks
1 parent d485665 commit f42be32

File tree

7 files changed

+1098
-242
lines changed

7 files changed

+1098
-242
lines changed

causalpy/experiments/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
_detect_experiment_type,
3434
_effect_summary_did,
3535
_effect_summary_rd,
36+
_effect_summary_rkink,
3637
_extract_counterfactual,
3738
_extract_window,
3839
_generate_prose,
@@ -188,6 +189,14 @@ def effect_summary(
188189
alpha=alpha,
189190
min_effect=min_effect,
190191
)
192+
elif experiment_type == "rkink":
193+
# Regression Kink: scalar effect (gradient change at kink point)
194+
return _effect_summary_rkink(
195+
self,
196+
direction=direction,
197+
alpha=alpha,
198+
min_effect=min_effect,
199+
)
191200
elif experiment_type == "did":
192201
# Difference-in-Differences: scalar effect, no time dimension
193202
if is_pymc:

causalpy/reporting.py

Lines changed: 207 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,17 @@ def _detect_experiment_type(result):
4444
"""Detect experiment type from result attributes."""
4545
if hasattr(result, "discontinuity_at_threshold"):
4646
return "rd" # Regression Discontinuity
47+
elif hasattr(result, "gradient_change"):
48+
return "rkink" # Regression Kink
4749
elif hasattr(result, "causal_impact") and not hasattr(result, "post_impact"):
48-
return "did" # Difference-in-Differences
50+
return "did" # Difference-in-Differences or ANCOVA/PrePostNEGD
4951
elif hasattr(result, "post_impact"):
5052
return "its_or_sc" # ITS or Synthetic Control
5153
else:
5254
raise ValueError(
5355
"Unknown experiment type. Result must have 'discontinuity_at_threshold' (RD), "
54-
"'causal_impact' (DiD), or 'post_impact' (ITS/Synthetic Control) attribute."
56+
"'gradient_change' (Regression Kink), 'causal_impact' (DiD/ANCOVA), "
57+
"or 'post_impact' (ITS/Synthetic Control) attribute."
5558
)
5659

5760

@@ -1246,3 +1249,205 @@ def fmt_num(x, decimals=2):
12461249
)
12471250

12481251
return prose
1252+
1253+
1254+
# ==============================================================================
1255+
# Regression Kink handler functions
1256+
# ==============================================================================
1257+
1258+
1259+
def _effect_summary_rkink(
1260+
result,
1261+
direction="increase",
1262+
alpha=0.05,
1263+
min_effect=None,
1264+
):
1265+
"""Generate effect summary for Regression Kink experiments."""
1266+
gradient_change = result.gradient_change
1267+
1268+
# Check if PyMC (xarray) or OLS (scalar)
1269+
is_pymc = isinstance(gradient_change, xr.DataArray)
1270+
1271+
if is_pymc:
1272+
# PyMC model: use posterior draws
1273+
hdi_prob = 1 - alpha
1274+
stats = _compute_statistics_rkink(
1275+
gradient_change,
1276+
hdi_prob=hdi_prob,
1277+
direction=direction,
1278+
min_effect=min_effect,
1279+
)
1280+
table = _generate_table_rkink(stats)
1281+
text = _generate_prose_rkink(stats, alpha=alpha, direction=direction)
1282+
else:
1283+
# OLS model: Not currently supported for RegressionKink, but structure is here
1284+
stats = _compute_statistics_rkink_ols(result, alpha=alpha)
1285+
table = _generate_table_rkink_ols(stats)
1286+
text = _generate_prose_rkink_ols(stats, alpha=alpha)
1287+
1288+
return EffectSummary(table=table, text=text)
1289+
1290+
1291+
def _compute_statistics_rkink(
1292+
gradient_change,
1293+
hdi_prob=0.95,
1294+
direction="increase",
1295+
min_effect=None,
1296+
):
1297+
"""Compute statistics for Regression Kink scalar effect (PyMC)."""
1298+
stats = {
1299+
"mean": float(gradient_change.mean(dim=["chain", "draw"]).values),
1300+
"median": float(gradient_change.median(dim=["chain", "draw"]).values),
1301+
}
1302+
1303+
# HDI
1304+
hdi_result = az.hdi(gradient_change, hdi_prob=hdi_prob)
1305+
if isinstance(hdi_result, xr.Dataset):
1306+
hdi_data = list(hdi_result.data_vars.values())[0]
1307+
stats["hdi_lower"] = float(hdi_data.sel(hdi="lower").values)
1308+
stats["hdi_upper"] = float(hdi_data.sel(hdi="higher").values)
1309+
else:
1310+
stats["hdi_lower"] = float(hdi_result.sel(hdi="lower").values)
1311+
stats["hdi_upper"] = float(hdi_result.sel(hdi="higher").values)
1312+
1313+
# Tail probabilities
1314+
if direction == "increase":
1315+
stats["p_gt_0"] = float((gradient_change > 0).mean().values)
1316+
elif direction == "decrease":
1317+
stats["p_lt_0"] = float((gradient_change < 0).mean().values)
1318+
else: # two-sided
1319+
p_gt = float((gradient_change > 0).mean().values)
1320+
p_lt = float((gradient_change < 0).mean().values)
1321+
p_two_sided = 2 * min(p_gt, p_lt)
1322+
stats["p_two_sided"] = p_two_sided
1323+
stats["prob_of_effect"] = 1 - p_two_sided
1324+
1325+
# ROPE
1326+
if min_effect is not None:
1327+
if direction == "two-sided":
1328+
stats["p_rope"] = float(
1329+
(np.abs(gradient_change) > min_effect).mean().values
1330+
)
1331+
else:
1332+
stats["p_rope"] = float((gradient_change > min_effect).mean().values)
1333+
1334+
return stats
1335+
1336+
1337+
def _compute_statistics_rkink_ols(result, alpha=0.05):
1338+
"""Compute statistics for Regression Kink scalar effect with OLS model."""
1339+
# Note: RegressionKink currently only supports PyMC models
1340+
# This is a placeholder for future OLS support
1341+
raise NotImplementedError(
1342+
"OLS models are not currently supported for Regression Kink experiments. "
1343+
"Please use a PyMC model."
1344+
)
1345+
1346+
1347+
def _generate_table_rkink(stats):
1348+
"""Generate DataFrame table for Regression Kink (PyMC)."""
1349+
data = {
1350+
"metric": ["gradient_change"],
1351+
"mean": [stats["mean"]],
1352+
"median": [stats["median"]],
1353+
"HDI_lower": [stats["hdi_lower"]],
1354+
"HDI_upper": [stats["hdi_upper"]],
1355+
}
1356+
1357+
# Add direction-specific columns
1358+
if "p_gt_0" in stats:
1359+
data["P(effect>0)"] = [stats["p_gt_0"]]
1360+
elif "p_lt_0" in stats:
1361+
data["P(effect<0)"] = [stats["p_lt_0"]]
1362+
elif "p_two_sided" in stats:
1363+
data["P(two-sided)"] = [stats["p_two_sided"]]
1364+
data["P(effect)"] = [stats["prob_of_effect"]]
1365+
1366+
# Add ROPE if present
1367+
if "p_rope" in stats:
1368+
data["P(|effect|>min_effect)"] = [stats["p_rope"]]
1369+
1370+
return pd.DataFrame(data)
1371+
1372+
1373+
def _generate_table_rkink_ols(stats):
1374+
"""Generate DataFrame table for Regression Kink with OLS model."""
1375+
# Placeholder for future OLS support
1376+
data = {
1377+
"metric": ["gradient_change"],
1378+
"mean": [stats["mean"]],
1379+
"CI_lower": [stats["ci_lower"]],
1380+
"CI_upper": [stats["ci_upper"]],
1381+
"p_value": [stats["p_value"]],
1382+
}
1383+
return pd.DataFrame(data)
1384+
1385+
1386+
def _generate_prose_rkink(stats, alpha=0.05, direction="increase"):
1387+
"""Generate prose summary for Regression Kink (PyMC)."""
1388+
hdi_pct = int((1 - alpha) * 100)
1389+
1390+
def fmt_num(x, decimals=2):
1391+
return f"{x:.{decimals}f}"
1392+
1393+
mean = stats["mean"]
1394+
median = stats["median"]
1395+
lower = stats["hdi_lower"]
1396+
upper = stats["hdi_upper"]
1397+
1398+
prose_parts = [
1399+
f"The change in gradient at the kink point had a mean of {fmt_num(mean)} "
1400+
f"(median: {fmt_num(median)}, {hdi_pct}% HDI [{fmt_num(lower)}, {fmt_num(upper)}])."
1401+
]
1402+
1403+
# Add tail probability info
1404+
if direction == "increase":
1405+
prob = stats["p_gt_0"]
1406+
prose_parts.append(
1407+
f" There is a {fmt_num(prob * 100, 1)}% posterior probability "
1408+
f"that the gradient change is positive."
1409+
)
1410+
elif direction == "decrease":
1411+
prob = stats["p_lt_0"]
1412+
prose_parts.append(
1413+
f" There is a {fmt_num(prob * 100, 1)}% posterior probability "
1414+
f"that the gradient change is negative."
1415+
)
1416+
else: # two-sided
1417+
prob = stats["prob_of_effect"]
1418+
prose_parts.append(
1419+
f" There is a {fmt_num(prob * 100, 1)}% posterior probability "
1420+
f"of a non-zero gradient change (two-sided test)."
1421+
)
1422+
1423+
# Add ROPE info
1424+
if "p_rope" in stats:
1425+
p_rope = stats["p_rope"]
1426+
prose_parts.append(
1427+
f" The probability that the absolute gradient change exceeds "
1428+
f"the practical significance threshold is {fmt_num(p_rope * 100, 1)}%."
1429+
)
1430+
1431+
return "".join(prose_parts)
1432+
1433+
1434+
def _generate_prose_rkink_ols(stats, alpha=0.05):
1435+
"""Generate prose summary for Regression Kink with OLS model."""
1436+
# Placeholder for future OLS support
1437+
ci_pct = int((1 - alpha) * 100)
1438+
1439+
def fmt_num(x, decimals=2):
1440+
return f"{x:.{decimals}f}"
1441+
1442+
mean = stats["mean"]
1443+
lower = stats["ci_lower"]
1444+
upper = stats["ci_upper"]
1445+
p_val = stats["p_value"]
1446+
1447+
prose = (
1448+
f"The change in gradient at the kink point was {fmt_num(mean)} "
1449+
f"({ci_pct}% CI [{fmt_num(lower)}, {fmt_num(upper)}]), "
1450+
f"with a p-value of {fmt_num(p_val, 3)}."
1451+
)
1452+
1453+
return prose

causalpy/tests/test_reporting.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,116 @@ def test_effect_summary_rd_ols(mock_pymc_sample):
446446
assert "hdi_upper" not in stats.table.columns
447447

448448

449+
@pytest.mark.integration
450+
def test_effect_summary_rkink_pymc(mock_pymc_sample):
451+
"""Test effect_summary with Regression Kink (PyMC)."""
452+
# Generate data for regression kink analysis
453+
rng = np.random.default_rng(42)
454+
kink_point = 0.5
455+
beta = [1, 0.5, 0, 0.5, 0] # Parameters for the piecewise function
456+
N = 100
457+
x = rng.uniform(-1, 1, N)
458+
treated = (x >= kink_point).astype(int)
459+
y = (
460+
beta[0]
461+
+ beta[1] * x
462+
+ beta[2] * x**2
463+
+ beta[3] * (x - kink_point) * treated
464+
+ beta[4] * (x - kink_point) ** 2 * treated
465+
+ rng.normal(0, 0.1, N)
466+
)
467+
df = pd.DataFrame({"x": x, "y": y, "treated": treated})
468+
469+
result = cp.RegressionKink(
470+
df,
471+
formula="y ~ 1 + x + I(x**2) + I((x-0.5)*treated) + I(((x-0.5)**2)*treated)",
472+
kink_point=kink_point,
473+
model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
474+
)
475+
476+
stats = result.effect_summary()
477+
478+
assert isinstance(stats, EffectSummary)
479+
assert stats.table["metric"].iloc[0] == "gradient_change"
480+
assert "mean" in stats.table.columns
481+
assert "median" in stats.table.columns
482+
assert "HDI_lower" in stats.table.columns
483+
assert "HDI_upper" in stats.table.columns
484+
assert "P(effect>0)" in stats.table.columns
485+
486+
487+
@pytest.mark.integration
488+
def test_effect_summary_rkink_directions(mock_pymc_sample):
489+
"""Test effect_summary with Regression Kink with different directions."""
490+
# Generate data
491+
rng = np.random.default_rng(42)
492+
kink_point = 0.5
493+
beta = [1, 0.5, 0, -0.5, 0] # Negative gradient change
494+
N = 100
495+
x = rng.uniform(-1, 1, N)
496+
treated = (x >= kink_point).astype(int)
497+
y = (
498+
beta[0]
499+
+ beta[1] * x
500+
+ beta[2] * x**2
501+
+ beta[3] * (x - kink_point) * treated
502+
+ beta[4] * (x - kink_point) ** 2 * treated
503+
+ rng.normal(0, 0.1, N)
504+
)
505+
df = pd.DataFrame({"x": x, "y": y, "treated": treated})
506+
507+
result = cp.RegressionKink(
508+
df,
509+
formula="y ~ 1 + x + I(x**2) + I((x-0.5)*treated) + I(((x-0.5)**2)*treated)",
510+
kink_point=kink_point,
511+
model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
512+
)
513+
514+
# Test increase
515+
stats_increase = result.effect_summary(direction="increase")
516+
assert "P(effect>0)" in stats_increase.table.columns
517+
518+
# Test decrease
519+
stats_decrease = result.effect_summary(direction="decrease")
520+
assert "P(effect<0)" in stats_decrease.table.columns
521+
522+
# Test two-sided
523+
stats_two_sided = result.effect_summary(direction="two-sided")
524+
assert "P(two-sided)" in stats_two_sided.table.columns
525+
assert "P(effect)" in stats_two_sided.table.columns
526+
527+
528+
@pytest.mark.integration
529+
def test_effect_summary_rkink_rope(mock_pymc_sample):
530+
"""Test effect_summary with Regression Kink with ROPE."""
531+
# Generate data
532+
rng = np.random.default_rng(42)
533+
kink_point = 0.5
534+
beta = [1, 0.5, 0, 0.5, 0]
535+
N = 100
536+
x = rng.uniform(-1, 1, N)
537+
treated = (x >= kink_point).astype(int)
538+
y = (
539+
beta[0]
540+
+ beta[1] * x
541+
+ beta[2] * x**2
542+
+ beta[3] * (x - kink_point) * treated
543+
+ beta[4] * (x - kink_point) ** 2 * treated
544+
+ rng.normal(0, 0.1, N)
545+
)
546+
df = pd.DataFrame({"x": x, "y": y, "treated": treated})
547+
548+
result = cp.RegressionKink(
549+
df,
550+
formula="y ~ 1 + x + I(x**2) + I((x-0.5)*treated) + I(((x-0.5)**2)*treated)",
551+
kink_point=kink_point,
552+
model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
553+
)
554+
555+
stats = result.effect_summary(min_effect=0.2)
556+
assert "P(|effect|>min_effect)" in stats.table.columns
557+
558+
449559
@pytest.mark.integration
450560
def test_effect_summary_empty_window_error(mock_pymc_sample):
451561
"""Test that effect_summary raises error for empty window."""

docs/source/_static/interrogate_badge.svg

Lines changed: 3 additions & 3 deletions
Loading

docs/source/notebooks/ancova_pymc.ipynb

Lines changed: 162 additions & 25 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)