|
187 | 187 | from pymc_marketing.mmm.fourier import YearlyFourier
|
188 | 188 | from pymc_marketing.mmm.hsgp import HSGPBase
|
189 | 189 | from pymc_marketing.mmm.lift_test import (
|
| 190 | + add_cost_per_target_potentials, |
190 | 191 | add_lift_measurements_to_likelihood_from_saturation,
|
191 | 192 | scale_lift_measurements,
|
192 | 193 | )
|
@@ -1613,6 +1614,22 @@ def _set_xarray_data(
|
1613 | 1614 | else:
|
1614 | 1615 | data["target_data"] = target_values
|
1615 | 1616 |
|
| 1617 | + # Handle optional spend data used for CPT calibration if available |
| 1618 | + if ( |
| 1619 | + hasattr(self, "_calibration_spend_xarray") |
| 1620 | + and "channel_data_spend" in model.named_vars |
| 1621 | + ): |
| 1622 | + spend_values = self._calibration_spend_xarray._channel |
| 1623 | + # Align to new coords |
| 1624 | + reindex_coords = {"date": coords["date"], "channel": coords["channel"]} |
| 1625 | + for dim in self.dims: |
| 1626 | + reindex_coords[dim] = coords[dim] |
| 1627 | + spend_values = spend_values.reindex(reindex_coords, fill_value=0) |
| 1628 | + # Ensure no NaNs are passed into pm.Data updates |
| 1629 | + spend_values = spend_values.fillna(0) |
| 1630 | + original_dtype = model.named_vars["channel_data_spend"].type.dtype |
| 1631 | + data["channel_data_spend"] = spend_values.astype(original_dtype) |
| 1632 | + |
1616 | 1633 | self.new_updated_data = data
|
1617 | 1634 | self.new_updated_coords = coords
|
1618 | 1635 | self.new_updated_model = model
|
@@ -1950,6 +1967,132 @@ def add_lift_test_measurements(
|
1950 | 1967 | name=name,
|
1951 | 1968 | )
|
1952 | 1969 |
|
| 1970 | + def add_cost_per_target_calibration( |
| 1971 | + self, |
| 1972 | + data: pd.DataFrame, |
| 1973 | + calibration_data: pd.DataFrame, |
| 1974 | + cpt_variable_name: str = "cost_per_target", |
| 1975 | + name_prefix: str = "cpt_calibration", |
| 1976 | + ) -> None: |
| 1977 | + """Calibrate cost-per-target using constraints via ``pm.Potential``. |
| 1978 | +
|
| 1979 | + This adds a deterministic ``cpt_variable_name`` computed as |
| 1980 | + ``channel_data_spend / channel_contribution_original_scale`` and creates |
| 1981 | + per-row penalty terms based on ``calibration_data`` using a quadratic penalty: |
| 1982 | +
|
| 1983 | + ``penalty = - |cpt_mean - target|^2 / (2 * sigma^2)``. |
| 1984 | +
|
| 1985 | + Parameters |
| 1986 | + ---------- |
| 1987 | + data : pd.DataFrame |
| 1988 | + Feature-like DataFrame with columns matching training ``X`` but with |
| 1989 | + channel values representing spend (original units). Must include the |
| 1990 | + same ``date`` and any model ``dims`` columns. |
| 1991 | + calibration_data : pd.DataFrame |
| 1992 | + DataFrame with rows specifying calibration targets. Must include: |
| 1993 | + - ``channel``: channel name in ``self.channel_columns`` |
| 1994 | + - ``cost_per_target``: desired CPT value |
| 1995 | + - ``sigma``: accepted deviation; larger => weaker penalty |
| 1996 | + and one column per dimension in ``self.dims``. |
| 1997 | + cpt_variable_name : str |
| 1998 | + Name for the cost-per-target Deterministic in the model. |
| 1999 | + name_prefix : str |
| 2000 | + Prefix to use for generated potential names. |
| 2001 | +
|
| 2002 | + Examples |
| 2003 | + -------- |
| 2004 | + Build a model and calibrate CPT for selected (dims, channel): |
| 2005 | +
|
| 2006 | + .. code-block:: python |
| 2007 | +
|
| 2008 | + # spend data in original scale with the same structure as X |
| 2009 | + spend_df = X.copy() |
| 2010 | + # e.g., if X contains impressions, replace with monetary spend |
| 2011 | + # spend_df[channels] = ... |
| 2012 | +
|
| 2013 | + calibration_df = pd.DataFrame( |
| 2014 | + { |
| 2015 | + "channel": ["C1", "C2"], |
| 2016 | + "geo": ["US", "US"], # dims columns as needed |
| 2017 | + "cost_per_target": [30.0, 45.0], |
| 2018 | + "sigma": [2.0, 3.0], |
| 2019 | + } |
| 2020 | + ) |
| 2021 | +
|
| 2022 | + mmm.add_cost_per_target_calibration( |
| 2023 | + data=spend_df, |
| 2024 | + calibration_data=calibration_df, |
| 2025 | + cpt_variable_name="cost_per_target", |
| 2026 | + name_prefix="cpt_calibration", |
| 2027 | + ) |
| 2028 | + """ |
| 2029 | + if not hasattr(self, "model"): |
| 2030 | + raise RuntimeError("Model must be built before adding calibration.") |
| 2031 | + |
| 2032 | + # Validate required columns in calibration_data |
| 2033 | + if "channel" not in calibration_data.columns: |
| 2034 | + raise KeyError("'channel' column missing in calibration_data") |
| 2035 | + for dim in self.dims: |
| 2036 | + if dim not in calibration_data.columns: |
| 2037 | + raise KeyError( |
| 2038 | + f"The {dim} column is required in calibration_data to map to model dims." |
| 2039 | + ) |
| 2040 | + |
| 2041 | + # Prepare spend data as xarray (original units) |
| 2042 | + spend_ds = self._create_xarray_from_pandas( |
| 2043 | + data=data, |
| 2044 | + date_column=self.date_column, |
| 2045 | + dims=self.dims, |
| 2046 | + metric_list=self.channel_columns, |
| 2047 | + metric_coordinate_name="channel", |
| 2048 | + ).transpose("date", *self.dims, "channel") |
| 2049 | + # Cache for predictive alignment |
| 2050 | + self._calibration_spend_xarray = spend_ds |
| 2051 | + |
| 2052 | + with self.model: |
| 2053 | + # Ensure original-scale contribution exists |
| 2054 | + if "channel_contribution_original_scale" not in self.model.named_vars: |
| 2055 | + self.add_original_scale_contribution_variable( |
| 2056 | + [ |
| 2057 | + "channel_contribution", |
| 2058 | + ] |
| 2059 | + ) |
| 2060 | + |
| 2061 | + # Create pm.Data for spend aligned to current model coords |
| 2062 | + spend_values = spend_ds._channel |
| 2063 | + # Reindex to model coords to ensure ordering matches |
| 2064 | + reindex_coords = {"date": self.model.coords["date"]} |
| 2065 | + for dim in self.dims: |
| 2066 | + reindex_coords[dim] = self.model.coords[dim] |
| 2067 | + reindex_coords["channel"] = self.model.coords["channel"] |
| 2068 | + spend_values = spend_values.reindex(reindex_coords, fill_value=0) |
| 2069 | + # Replace any existing NaNs in spend with zeros to satisfy pm.Data |
| 2070 | + spend_values = spend_values.fillna(0) |
| 2071 | + |
| 2072 | + pm.Data( |
| 2073 | + name="channel_data_spend", |
| 2074 | + value=spend_values.values, |
| 2075 | + dims=("date", *self.dims, "channel"), |
| 2076 | + ) |
| 2077 | + |
| 2078 | + # Build cost_per_target deterministic safely (avoid division by ~0) |
| 2079 | + denom = pt.clip( |
| 2080 | + self.model["channel_contribution_original_scale"], 1e-12, np.inf |
| 2081 | + ) |
| 2082 | + pm.Deterministic( |
| 2083 | + name=cpt_variable_name, |
| 2084 | + var=self.model["channel_data_spend"] / denom, |
| 2085 | + dims=("date", *self.dims, "channel"), |
| 2086 | + ) |
| 2087 | + |
| 2088 | + # Create one Potential per row in calibration_data |
| 2089 | + add_cost_per_target_potentials( |
| 2090 | + calibration_df=calibration_data, |
| 2091 | + model=self.model, |
| 2092 | + cpt_variable_name=cpt_variable_name, |
| 2093 | + name_prefix=name_prefix, |
| 2094 | + ) |
| 2095 | + |
1953 | 2096 | def create_fit_data(
|
1954 | 2097 | self,
|
1955 | 2098 | X: pd.DataFrame | xr.Dataset | xr.DataArray,
|
|
0 commit comments