@@ -440,6 +440,127 @@ def generate_multicell_geolift_data() -> pd.DataFrame:
440440 return df
441441
442442
443+ def generate_event_study_data (
444+ n_units : int = 20 ,
445+ n_time : int = 20 ,
446+ treatment_time : int = 10 ,
447+ treated_fraction : float = 0.5 ,
448+ event_window : tuple [int , int ] = (- 5 , 5 ),
449+ treatment_effects : dict [int , float ] | None = None ,
450+ unit_fe_sigma : float = 1.0 ,
451+ time_fe_sigma : float = 0.5 ,
452+ noise_sigma : float = 0.2 ,
453+ seed : int | None = None ,
454+ ) -> pd .DataFrame :
455+ """
456+ Generate synthetic panel data for event study / dynamic DiD analysis.
457+
458+ Creates panel data with unit and time fixed effects, where a fraction of units
459+ receive treatment at a common treatment time. Treatment effects can vary by
460+ event time (time relative to treatment).
461+
462+ Parameters
463+ ----------
464+ n_units : int
465+ Total number of units (treated + control). Default 20.
466+ n_time : int
467+ Number of time periods. Default 20.
468+ treatment_time : int
469+ Time period when treatment occurs (0-indexed). Default 10.
470+ treated_fraction : float
471+ Fraction of units that are treated. Default 0.5.
472+ event_window : tuple[int, int]
473+ Range of event times (K_min, K_max) for which treatment effects are defined.
474+ Default (-5, 5).
475+ treatment_effects : dict[int, float], optional
476+ Dictionary mapping event time k to treatment effect beta_k.
477+ Default creates effects that are 0 for k < 0 (pre-treatment)
478+ and gradually increase post-treatment.
479+ unit_fe_sigma : float
480+ Standard deviation for unit fixed effects. Default 1.0.
481+ time_fe_sigma : float
482+ Standard deviation for time fixed effects. Default 0.5.
483+ noise_sigma : float
484+ Standard deviation for observation noise. Default 0.2.
485+ seed : int, optional
486+ Random seed for reproducibility.
487+
488+ Returns
489+ -------
490+ pd.DataFrame
491+ Panel data with columns:
492+ - unit: Unit identifier
493+ - time: Time period
494+ - y: Outcome variable
495+ - treat_time: Treatment time for unit (NaN if never treated)
496+ - treated: Whether unit is in treated group (0 or 1)
497+
498+ Example
499+ --------
500+ >>> from causalpy.data.simulate_data import generate_event_study_data
501+ >>> df = generate_event_study_data(
502+ ... n_units=20, n_time=20, treatment_time=10, seed=42
503+ ... )
504+ >>> df.shape
505+ (400, 5)
506+ >>> df.columns.tolist()
507+ ['unit', 'time', 'y', 'treat_time', 'treated']
508+ """
509+ if seed is not None :
510+ np .random .seed (seed )
511+
512+ # Default treatment effects: zero pre-treatment, gradual increase post-treatment
513+ if treatment_effects is None :
514+ treatment_effects = {}
515+ for k in range (event_window [0 ], event_window [1 ] + 1 ):
516+ if k < 0 :
517+ treatment_effects [k ] = 0.0 # No anticipation
518+ else :
519+ # Gradual treatment effect that increases post-treatment
520+ treatment_effects [k ] = 0.5 + 0.1 * k
521+
522+ # Determine treated units
523+ n_treated = int (n_units * treated_fraction )
524+ treated_units = set (range (n_treated ))
525+
526+ # Generate unit fixed effects
527+ unit_fe = np .random .normal (0 , unit_fe_sigma , n_units )
528+
529+ # Generate time fixed effects
530+ time_fe = np .random .normal (0 , time_fe_sigma , n_time )
531+
532+ # Build panel data
533+ data = []
534+ for unit in range (n_units ):
535+ is_treated = unit in treated_units
536+ unit_treat_time = treatment_time if is_treated else np .nan
537+
538+ for t in range (n_time ):
539+ # Base outcome: unit FE + time FE + noise
540+ y = unit_fe [unit ] + time_fe [t ] + np .random .normal (0 , noise_sigma )
541+
542+ # Add treatment effect for treated units in event window
543+ if is_treated :
544+ event_time = t - treatment_time
545+ if (
546+ event_window [0 ] <= event_time <= event_window [1 ]
547+ and event_time in treatment_effects
548+ ):
549+ y += treatment_effects [event_time ]
550+
551+ data .append (
552+ {
553+ "unit" : unit ,
554+ "time" : t ,
555+ "y" : y ,
556+ "treat_time" : unit_treat_time ,
557+ "treated" : 1 if is_treated else 0 ,
558+ }
559+ )
560+
561+ return pd .DataFrame (data )
562+
563+
443564# -----------------
444565# UTILITY FUNCTIONS
445566# -----------------
0 commit comments