Skip to content

Commit 34c93da

Browse files
committed
initial commit
1 parent f90ddde commit 34c93da

File tree

7 files changed

+2085
-3
lines changed

7 files changed

+2085
-3
lines changed

causalpy/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from .data import load_data
2121
from .experiments.diff_in_diff import DifferenceInDifferences
22+
from .experiments.event_study import EventStudy
2223
from .experiments.instrumental_variable import InstrumentalVariable
2324
from .experiments.interrupted_time_series import InterruptedTimeSeries
2425
from .experiments.inverse_propensity_weighting import InversePropensityWeighting
@@ -30,6 +31,7 @@
3031
__all__ = [
3132
"__version__",
3233
"DifferenceInDifferences",
34+
"EventStudy",
3335
"create_causalpy_compatible_class",
3436
"InstrumentalVariable",
3537
"InterruptedTimeSeries",

causalpy/data/simulate_data.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,127 @@ def generate_multicell_geolift_data() -> pd.DataFrame:
440440
return df
441441

442442

443+
def generate_event_study_data(
444+
n_units: int = 20,
445+
n_time: int = 20,
446+
treatment_time: int = 10,
447+
treated_fraction: float = 0.5,
448+
event_window: tuple[int, int] = (-5, 5),
449+
treatment_effects: dict[int, float] | None = None,
450+
unit_fe_sigma: float = 1.0,
451+
time_fe_sigma: float = 0.5,
452+
noise_sigma: float = 0.2,
453+
seed: int | None = None,
454+
) -> pd.DataFrame:
455+
"""
456+
Generate synthetic panel data for event study / dynamic DiD analysis.
457+
458+
Creates panel data with unit and time fixed effects, where a fraction of units
459+
receive treatment at a common treatment time. Treatment effects can vary by
460+
event time (time relative to treatment).
461+
462+
Parameters
463+
----------
464+
n_units : int
465+
Total number of units (treated + control). Default 20.
466+
n_time : int
467+
Number of time periods. Default 20.
468+
treatment_time : int
469+
Time period when treatment occurs (0-indexed). Default 10.
470+
treated_fraction : float
471+
Fraction of units that are treated. Default 0.5.
472+
event_window : tuple[int, int]
473+
Range of event times (K_min, K_max) for which treatment effects are defined.
474+
Default (-5, 5).
475+
treatment_effects : dict[int, float], optional
476+
Dictionary mapping event time k to treatment effect beta_k.
477+
Default creates effects that are 0 for k < 0 (pre-treatment)
478+
and gradually increase post-treatment.
479+
unit_fe_sigma : float
480+
Standard deviation for unit fixed effects. Default 1.0.
481+
time_fe_sigma : float
482+
Standard deviation for time fixed effects. Default 0.5.
483+
noise_sigma : float
484+
Standard deviation for observation noise. Default 0.2.
485+
seed : int, optional
486+
Random seed for reproducibility.
487+
488+
Returns
489+
-------
490+
pd.DataFrame
491+
Panel data with columns:
492+
- unit: Unit identifier
493+
- time: Time period
494+
- y: Outcome variable
495+
- treat_time: Treatment time for unit (NaN if never treated)
496+
- treated: Whether unit is in treated group (0 or 1)
497+
498+
Example
499+
--------
500+
>>> from causalpy.data.simulate_data import generate_event_study_data
501+
>>> df = generate_event_study_data(
502+
... n_units=20, n_time=20, treatment_time=10, seed=42
503+
... )
504+
>>> df.shape
505+
(400, 5)
506+
>>> df.columns.tolist()
507+
['unit', 'time', 'y', 'treat_time', 'treated']
508+
"""
509+
if seed is not None:
510+
np.random.seed(seed)
511+
512+
# Default treatment effects: zero pre-treatment, gradual increase post-treatment
513+
if treatment_effects is None:
514+
treatment_effects = {}
515+
for k in range(event_window[0], event_window[1] + 1):
516+
if k < 0:
517+
treatment_effects[k] = 0.0 # No anticipation
518+
else:
519+
# Gradual treatment effect that increases post-treatment
520+
treatment_effects[k] = 0.5 + 0.1 * k
521+
522+
# Determine treated units
523+
n_treated = int(n_units * treated_fraction)
524+
treated_units = set(range(n_treated))
525+
526+
# Generate unit fixed effects
527+
unit_fe = np.random.normal(0, unit_fe_sigma, n_units)
528+
529+
# Generate time fixed effects
530+
time_fe = np.random.normal(0, time_fe_sigma, n_time)
531+
532+
# Build panel data
533+
data = []
534+
for unit in range(n_units):
535+
is_treated = unit in treated_units
536+
unit_treat_time = treatment_time if is_treated else np.nan
537+
538+
for t in range(n_time):
539+
# Base outcome: unit FE + time FE + noise
540+
y = unit_fe[unit] + time_fe[t] + np.random.normal(0, noise_sigma)
541+
542+
# Add treatment effect for treated units in event window
543+
if is_treated:
544+
event_time = t - treatment_time
545+
if (
546+
event_window[0] <= event_time <= event_window[1]
547+
and event_time in treatment_effects
548+
):
549+
y += treatment_effects[event_time]
550+
551+
data.append(
552+
{
553+
"unit": unit,
554+
"time": t,
555+
"y": y,
556+
"treat_time": unit_treat_time,
557+
"treated": 1 if is_treated else 0,
558+
}
559+
)
560+
561+
return pd.DataFrame(data)
562+
563+
443564
# -----------------
444565
# UTILITY FUNCTIONS
445566
# -----------------

causalpy/experiments/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""CausalPy experiment module"""
1515

1616
from .diff_in_diff import DifferenceInDifferences
17+
from .event_study import EventStudy
1718
from .instrumental_variable import InstrumentalVariable
1819
from .interrupted_time_series import InterruptedTimeSeries
1920
from .inverse_propensity_weighting import InversePropensityWeighting
@@ -24,6 +25,7 @@
2425

2526
__all__ = [
2627
"DifferenceInDifferences",
28+
"EventStudy",
2729
"InstrumentalVariable",
2830
"InversePropensityWeighting",
2931
"PrePostNEGD",

0 commit comments

Comments
 (0)