pymc-labs
diff --git a/‎causalpy/data/simulate_data.py
Lines changed: 37 additions & 10 deletions b/‎causalpy/data/simulate_data.py
Lines changed: 37 additions & 10 deletions
diff --git a/‎causalpy/plot_utils.py
Lines changed: 16 additions & 1 deletion b/‎causalpy/plot_utils.py
Lines changed: 16 additions & 1 deletion
diff --git a/‎causalpy/pymc_experiments.py
Lines changed: 142 additions & 7 deletions b/‎causalpy/pymc_experiments.py
Lines changed: 142 additions & 7 deletions
@@ -29,12 +29,13 @@ def generate_synthetic_control_data(
     lowess_kwargs=default_lowess_kwargs,
 ):
     """
-    Example:
-    >> import pathlib
-    >> df, weightings_true = generate_synthetic_control_data(
-                                treatment_time=treatment_time
-                            )
-    >> df.to_csv(pathlib.Path.cwd() / 'synthetic_control.csv', index=False)
+    Example
+    --------
+    >>> import pathlib
+    >>> df, weightings_true = generate_synthetic_control_data(
+    ...                             treatment_time=treatment_time
+    ... )
+    >>> df.to_csv(pathlib.Path.cwd() / 'synthetic_control.csv', index=False)
     """
 
     # 1. Generate non-treated variables
@@ -73,6 +74,7 @@ def generate_synthetic_control_data(
 def generate_time_series_data(
     N=100, treatment_time=70, beta_temp=-1, beta_linear=0.5, beta_intercept=3
 ):
+    """ """
     x = np.arange(0, 100, 1)
     df = pd.DataFrame(
         {
@@ -102,6 +104,7 @@ def generate_time_series_data(
 
 
 def generate_time_series_data_seasonal(treatment_time):
+    """ """
     dates = pd.date_range(
         start=pd.to_datetime("2010-01-01"), end=pd.to_datetime("2020-01-01"), freq="M"
     )
@@ -149,6 +152,13 @@ def generate_time_series_data_simple(treatment_time, slope=0.0):
 
 
 def generate_did():
+    """
+    Generate Difference in Differences data
+
+    Example
+    --------
+    >>> df = generate_did()
+    """
     # true parameters
     control_intercept = 1
     treat_intercept_delta = 0.25
@@ -194,10 +204,13 @@ def generate_regression_discontinuity_data(
     N=100, true_causal_impact=0.5, true_treatment_threshold=0.0
 ):
     """
-    Example use:
-    >> import pathlib
-    >> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
-    >> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv', index=False)
+    Generate regression discontinuity example data
+
+    Example
+    --------
+    >>> import pathlib
+    >>> df = generate_regression_discontinuity_data(true_treatment_threshold=0.5)
+    >>> df.to_csv(pathlib.Path.cwd() / 'regression_discontinuity.csv', index=False)
     """
 
     def is_treated(x):
@@ -217,6 +230,20 @@ def impact(x):
 def generate_ancova_data(
     N=200, pre_treatment_means=np.array([10, 12]), treatment_effect=2, sigma=1
 ):
+    """
+    Generate ANCOVA eample data
+
+    Example
+    --------
+    >>> import pathlib
+    >>> df = generate_ancova_data(
+    ...     N=200,
+    ...     pre_treatment_threshold=np.array([10, 12]),
+    ...     treatment_effect=2,
+    ...     sigma=1
+    ... )
+    >>> df.to_csv(pathlib.Path.cwd() / 'ancova_data.csv', index=False)
+    """
     group = np.random.choice(2, size=N)
     pre = np.random.normal(loc=pre_treatment_means[group])
     post = pre + treatment_effect * group + np.random.normal(size=N) * sigma
 
@@ -21,7 +21,22 @@ def plot_xY(
     hdi_prob: float = 0.94,
     label: Union[str, None] = None,
 ) -> Tuple[Line2D, PolyCollection]:
-    """Utility function to plot HDI intervals."""
+    """
+    Utility function to plot HDI intervals.
+
+    :param x:
+        Pandas datetime index or numpy array of x-axis values
+    :param y:
+        Xarray data array of y-axis data
+    :param ax:
+        Matplotlib ax object
+    :param plot_hdi_kwargs:
+        Dictionary of keyword arguments passed to ax.plot()
+    :param hdi_prob:
+        The size of the HDI, default is 0.94
+    :param label:
+        The plot label
+    """
 
     if plot_hdi_kwargs is None:
         plot_hdi_kwargs = {}
 
@@ -30,7 +30,7 @@
 
 
 class ExperimentalDesign:
-    """Base class"""
+    """Base class for other experiment types"""
 
     model = None
     expt_type = None
@@ -43,7 +43,7 @@ def __init__(self, model=None, **kwargs):
 
     @property
     def idata(self):
-        """Access to the InferenceData object"""
+        """Access to the models InferenceData object"""
         return self.model.idata
 
     def print_coefficients(self) -> None:
@@ -66,8 +66,32 @@ def print_coefficients(self) -> None:
 
 
 class PrePostFit(ExperimentalDesign):
-    """A class to analyse quasi-experiments where parameter estimation is based on just
-    the pre-intervention data."""
+    """
+    A class to analyse quasi-experiments where parameter estimation is based on just
+    the pre-intervention data.
+
+    :param data:
+        A pandas data frame
+    :param treatment_time:
+        The time when treatment occured, should be in reference to the data index
+    :param formula:
+        A statistical model formula
+    :param model:
+        A PyMC model
+
+    Example
+    --------
+    >>> sc = cp.load_data("sc")
+    >>> seed = 42
+    >>> result = cp.pymc_experiments.PrePostFit(
+    ...     sc,
+    ...     treatment_time,
+    ...     formula="actual ~ 0 + a + b + c + d + e + f + g",
+    ...     model=cp.pymc_models.WeightedSumFitter(
+    ...         sample_kwargs={"target_accept": 0.95, "random_seed": seed}
+    ...     ),
+    ... )
+    """
 
     def __init__(
         self,
@@ -256,13 +280,64 @@ def summary(self) -> None:
 
 
 class InterruptedTimeSeries(PrePostFit):
-    """Interrupted time series analysis"""
+    """
+    A wrapper around PrePostFit class
+
+    :param data:
+        A pandas data frame
+    :param treatment_time:
+        The time when treatment occured, should be in reference to the data index
+    :param formula:
+        A statistical model formula
+    :param model:
+        A PyMC model
+
+    Example
+    --------
+    >>> df = (
+    ...     cp.load_data("its")
+    ...     .assign(date=lambda x: pd.to_datetime(x["date"]))
+    ...     .set_index("date")
+    ... )
+    >>> treatment_time = pd.to_datetime("2017-01-01")
+    >>> seed = 42
+    >>> result = cp.pymc_experiments.InterruptedTimeSeries(
+    ...     df,
+    ...     treatment_time,
+    ...     formula="y ~ 1 + t + C(month)",
+    ...     model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
+    ... )
+    """
 
     expt_type = "Interrupted Time Series"
 
 
 class SyntheticControl(PrePostFit):
-    """A wrapper around the PrePostFit class"""
+    """A wrapper around the PrePostFit class
+
+    :param data:
+        A pandas data frame
+    :param treatment_time:
+        The time when treatment occured, should be in reference to the data index
+    :param formula:
+        A statistical model formula
+    :param model:
+        A PyMC model
+
+    Example
+    --------
+    >>> df = cp.load_data("sc")
+    >>> treatment_time = 70
+    >>> seed = 42
+    >>> result = cp.pymc_experiments.SyntheticControl(
+    ...     df,
+    ...     treatment_time,
+    ...     formula="actual ~ 0 + a + b + c + d + e + f + g",
+    ...     model=cp.pymc_models.WeightedSumFitter(
+    ...         sample_kwargs={"target_accept": 0.95, "random_seed": seed}
+    ...     ),
+    ... )
+    """
 
     expt_type = "Synthetic Control"
 
@@ -285,6 +360,28 @@ class DifferenceInDifferences(ExperimentalDesign):
 
         There is no pre/post intervention data distinction for DiD, we fit all the
         data available.
+    :param data:
+        A pandas data frame
+    :param formula:
+        A statistical model formula
+    :param time_variable_name:
+        Name of the data column for the time variable
+    :param group_variable_name:
+        Name of the data column for the group variable
+    :param model:
+        A PyMC model for difference in differences
+
+    Example
+    --------
+    >>> df = cp.load_data("did")
+    >>> seed = 42
+    >>> result = cp.pymc_experiments.DifferenceInDifferences(
+    ...     df,
+    ...     formula="y ~ 1 + group*post_treatment",
+    ...     time_variable_name="t",
+    ...     group_variable_name="group",
+    ...     model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
+    ...  )
 
     """
 
@@ -572,6 +669,18 @@ class RegressionDiscontinuity(ExperimentalDesign):
     :param bandwidth:
         Data outside of the bandwidth (relative to the discontinuity) is not used to fit
         the model.
+
+    Example
+    --------
+    >>> df = cp.load_data("rd")
+    >>> seed = 42
+    >>> result = cp.pymc_experiments.RegressionDiscontinuity(
+    ...     df,
+    ...     formula="y ~ 1 + x + treated + x:treated",
+    ...     model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
+    ...     treatment_threshold=0.5,
+    ... )
+
     """
 
     def __init__(
@@ -742,7 +851,33 @@ def summary(self) -> None:
 
 
 class PrePostNEGD(ExperimentalDesign):
-    """A class to analyse data from pretest/posttest designs"""
+    """
+    A class to analyse data from pretest/posttest designs
+
+    :param data:
+        A pandas data frame
+    :param formula:
+        A statistical model formula
+    :param group_variable_name:
+        Name of the column in data for the group variable
+    :param pretreatment_variable_name:
+        Name of the column in data for the pretreatment variable
+    :param model:
+        A PyMC model
+
+    Example
+    --------
+    >>> df = cp.load_data("anova1")
+    >>> seed = 42
+    >>> result = cp.pymc_experiments.PrePostNEGD(
+    ...     df,
+    ...     formula="post ~ 1 + C(group) + pre",
+    ...     group_variable_name="group",
+    ...     pretreatment_variable_name="pre",
+    ...     model=cp.pymc_models.LinearRegression(sample_kwargs={"random_seed": seed}),
+    ... )
+
+    """
 
     def __init__(
         self,