pymc-labs
diff --git a/‎README.md
Lines changed: 3 additions & 0 deletions b/‎README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎causalpy/data/datasets.py
Lines changed: 1 addition & 0 deletions b/‎causalpy/data/datasets.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎causalpy/data/geolift1.csv
Lines changed: 209 additions & 0 deletions b/‎causalpy/data/geolift1.csv
Lines changed: 209 additions & 0 deletions
diff --git a/‎causalpy/data/simulate_data.py
Lines changed: 72 additions & 0 deletions b/‎causalpy/data/simulate_data.py
Lines changed: 72 additions & 0 deletions
diff --git a/‎causalpy/pymc_models.py
Lines changed: 4 additions & 2 deletions b/‎causalpy/pymc_models.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎causalpy/tests/test_integration_pymc_examples.py
Lines changed: 19 additions & 0 deletions b/‎causalpy/tests/test_integration_pymc_examples.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎docs/examples.rst
Lines changed: 1 addition & 0 deletions b/‎docs/examples.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/index.rst
Lines changed: 4 additions & 0 deletions b/‎docs/index.rst
Lines changed: 4 additions & 0 deletions
@@ -91,6 +91,9 @@ This is appropriate when you have multiple units, one of which is treated. You b
 
 > The data (treated and untreated units), pre-treatment model fit, and counterfactual (i.e. the synthetic control) are plotted (top). The causal impact is shown as a blue shaded region. The Bayesian analysis shows shaded Bayesian credible regions of the model fit and counterfactual. Also shown is the causal impact (middle) and cumulative causal impact (bottom).
 
+### Geographical lift (Geolift)
+We can also use synthetic control methods to analyse data from geographical lift studies. For example, we can try to evaluate the causal impact of an intervention (e.g. a marketing campaign) run in one geographical area by using control geographical areas which are similar to the intervention area but which did not recieve the specific marketing intervention.
+
 ### ANCOVA
 
 This is appropriate for non-equivalent group designs when you have a single pre and post intervention measurement and have a treament and a control group.
 
@@ -15,6 +15,7 @@
     "rd": {"filename": "regression_discontinuity.csv"},
     "sc": {"filename": "synthetic_control.csv"},
     "anova1": {"filename": "ancova_generated.csv"},
+    "geolift1": {"filename": "geolift1.csv"},
 }
 
 
 
@@ -220,3 +220,75 @@ def generate_ancova_data(
     post = pre + treatment_effect * group + np.random.normal(size=N) * sigma
     df = pd.DataFrame({"group": group, "pre": pre, "post": post})
     return df
+
+
+def generate_geolift_data():
+    """Generate synthetic data for a geolift example. This will consists of 6 untreated
+    countries. The treated unit `Denmark` is a weighted combination of the untreated
+    units. We additionally specify a treatment effect which takes effect after the
+    `treatment_time`. The timeseries data is observed at weekly resolution and has
+    annual seasonality, with this seasonality being a drawn from a Gaussian Process with
+    a periodic kernel."""
+    n_years = 4
+    treatment_time = pd.to_datetime("2022-01-01")
+    causal_impact = 0.2
+
+    def create_series(n=52, amplitude=1, length_scale=2):
+        return np.tile(
+            generate_seasonality(n=n, amplitude=amplitude, length_scale=2) + 3, n_years
+        )
+
+    time = pd.date_range(start="2019-01-01", periods=52 * n_years, freq="W")
+
+    untreated = [
+        "Austria",
+        "Belgium",
+        "Bulgaria",
+        "Croatia",
+        "Cyprus",
+        "Czech_Republic",
+    ]
+
+    df = (
+        pd.DataFrame({country: create_series() for country in untreated})
+        .assign(time=time)
+        .set_index("time")
+    )
+
+    # create treated unit as a weighted sum of the untreated units
+    weights = np.random.dirichlet(np.ones(len(untreated)), size=1)[0]
+    df = df.assign(Denmark=np.dot(df[untreated].values, weights))
+
+    # add observation noise
+    for col in untreated + ["Denmark"]:
+        df[col] += np.random.normal(size=len(df), scale=0.1)
+
+    # add treatment effect
+    df["Denmark"] += np.where(df.index < treatment_time, 0, causal_impact)
+    return df
+
+
+# -----------------
+# UTILITY FUNCTIONS
+# -----------------
+
+
+def generate_seasonality(n=12, amplitude=1, length_scale=0.5):
+    """Generate monthly seasonality by sampling from a Gaussian process with a
+    Gaussian kernel, using numpy code"""
+    # Generate the covariance matrix
+    x = np.linspace(0, 1, n)
+    x1, x2 = np.meshgrid(x, x)
+    cov = periodic_kernel(
+        x1, x2, period=1, length_scale=length_scale, amplitude=amplitude
+    )
+    # Generate the seasonality
+    seasonality = np.random.multivariate_normal(np.zeros(n), cov)
+    return seasonality
+
+
+def periodic_kernel(x1, x2, period=1, length_scale=1, amplitude=1):
+    """Generate a periodic kernal for gaussian process"""
+    return amplitude**2 * np.exp(
+        -2 * np.sin(np.pi * np.abs(x1 - x2) / period) ** 2 / length_scale**2
+    )
@@ -46,15 +46,17 @@ def fit(self, X, y, coords: Optional[Dict[str, Any]] = None) -> None:
         with self.model:
             self.idata = pm.sample(**self.sample_kwargs)
             self.idata.extend(pm.sample_prior_predictive())
-            self.idata.extend(pm.sample_posterior_predictive(self.idata))
+            self.idata.extend(
+                pm.sample_posterior_predictive(self.idata, progressbar=False)
+            )
         return self.idata
 
     def predict(self, X):
         """Predict data given input data `X`"""
         self._data_setter(X)
         with self.model:  # sample with new input data
             post_pred = pm.sample_posterior_predictive(
-                self.idata, var_names=["y_hat", "mu"]
+                self.idata, var_names=["y_hat", "mu"], progressbar=False
             )
         return post_pred
 
 
@@ -186,3 +186,22 @@ def test_ancova():
     assert isinstance(result, cp.pymc_experiments.PrePostNEGD)
     assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
     assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
+
+
+@pytest.mark.integration
+def test_geolift1():
+    df = cp.load_data("geolift1")
+    df["time"] = pd.to_datetime(df["time"])
+    df.set_index("time", inplace=True)
+    treatment_time = pd.to_datetime("2022-01-01")
+    result = cp.pymc_experiments.SyntheticControl(
+        df,
+        treatment_time,
+        formula="""Denmark ~ 0 + Austria + Belgium + Bulgaria + Croatia + Cyprus
+        + Czech_Republic""",
+        prediction_model=cp.pymc_models.WeightedSumFitter(sample_kwargs=sample_kwargs),
+    )
+    assert isinstance(df, pd.DataFrame)
+    assert isinstance(result, cp.pymc_experiments.SyntheticControl)
+    assert len(result.idata.posterior.coords["chain"]) == sample_kwargs["chains"]
+    assert len(result.idata.posterior.coords["draw"]) == sample_kwargs["draws"]
@@ -19,6 +19,7 @@ Synthetic Control
    notebooks/sc2_skl.ipynb
    notebooks/sc_pymc_brexit.ipynb
    notebooks/its_covid.ipynb
+   notebooks/geolift1.ipynb
 
 
 Difference in Differences
 
@@ -69,6 +69,10 @@ This is appropriate when you have multiple units, one of which is treated. You b
 
 .. image:: ../img/synthetic_control_pymc.svg
 
+Geographical Lift / Geolift
+""""""""""""""""""""""""""""
+We can also use synthetic control methods to analyse data from geographical lift studies. For example, we can try to evaluate the causal impact of an intervention (e.g. a marketing campaign) run in one geographical area by using control geographical areas which are similar to the intervention area but which did not recieve the specific marketing intervention.
+
 ANCOVA
 """"""
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`	`"rd": {"filename": "regression_discontinuity.csv"},`
`16`	`16`	`"sc": {"filename": "synthetic_control.csv"},`
`17`	`17`	`"anova1": {"filename": "ancova_generated.csv"},`
	`18`	`+ "geolift1": {"filename": "geolift1.csv"},`
`18`	`19`	`}`
`19`	`20`
`20`	`21`