pymc-labs
diff --git a/‎docs/source/notebooks/mmm/mmm_lift_test.ipynb‎
Lines changed: 1607 additions & 581 deletions b/‎docs/source/notebooks/mmm/mmm_lift_test.ipynb‎
Lines changed: 1607 additions & 581 deletions
diff --git a/‎docs/source/notebooks/mmm/mmm_upper_funnel_causal_approach.ipynb‎
Lines changed: 3616 additions & 0 deletions b/‎docs/source/notebooks/mmm/mmm_upper_funnel_causal_approach.ipynb‎
Lines changed: 3616 additions & 0 deletions
diff --git a/‎pymc_marketing/mmm/lift_test.py‎
Lines changed: 118 additions & 3 deletions b/‎pymc_marketing/mmm/lift_test.py‎
Lines changed: 118 additions & 3 deletions
diff --git a/‎pymc_marketing/mmm/multidimensional.py‎
Lines changed: 143 additions & 0 deletions b/‎pymc_marketing/mmm/multidimensional.py‎
Lines changed: 143 additions & 0 deletions
@@ -13,9 +13,9 @@
 #   limitations under the License.
 """Adding lift tests as observations of saturation function.
 
-This provides the inner workings of `MMM.add_lift_test_measurements` method. Use that
-method directly while working with the `MMM` class.
-
+This provides the inner workings of `MMM.add_lift_test_measurements` method.
+Other methods can be MMM.add_cost_per_target_calibration.
+Use any of these methods directly while working with the `MMM` class.
 """
 
 from collections.abc import Callable, Sequence
@@ -779,3 +779,118 @@ def add_lift_measurements_to_likelihood_from_saturation(
         get_indices=get_indices,
         variable_indexer_factory=variable_indexer_factory,
     )
+
+
+def add_cost_per_target_potentials(
+    calibration_df: pd.DataFrame,
+    *,
+    model: pm.Model | None = None,
+    cpt_variable_name: str = "cost_per_target",
+    name_prefix: str = "cpt_calibration",
+    get_indices: Callable[[pd.DataFrame, pm.Model], Indices] = exact_row_indices,
+) -> None:
+    """Add ``pm.Potential`` penalties to calibrate cost-per-target.
+
+    For each row, we compute the mean of ``cpt_variable_name`` across the date
+    dimension for the specified (dims, channel) slice and add a soft quadratic
+    penalty:
+
+    ``penalty = - |cpt_mean - target|^2 / (2 * sigma^2)``.
+
+    Parameters
+    ----------
+    calibration_df : pd.DataFrame
+        Must include columns ``channel``, ``sigma``, and a target column. By
+        default the target column is assumed to be ``cost_per_target``; if a column
+        matching ``cpt_variable_name`` is present it will be used instead. The
+        DataFrame must also include one column per model dimension found in the
+        CPT variable (excluding ``date``).
+    model : pm.Model, optional
+        Model containing the ``cpt_variable_name`` Deterministic with dims
+        ("date", *dims, "channel"). If None, uses the current model context.
+    cpt_variable_name : str
+        Name of the cost-per-target Deterministic variable.
+    name_prefix : str
+        Prefix for created potential names.
+    get_indices : Callable[[pd.DataFrame, pm.Model], Indices]
+        Alignment function mapping rows to model coordinate indices.
+
+    Examples
+    --------
+    .. code-block:: python
+
+        calibration_df = pd.DataFrame(
+            {
+                "channel": ["C1", "C2"],
+                "geo": ["US", "US"],  # add dims as needed
+                "cost_per_target": [30.0, 45.0],
+                "sigma": [2.0, 3.0],
+            }
+        )
+
+        add_cost_per_target_potentials(
+            calibration_df=calibration_df,
+            model=mmm.model,
+            cpt_variable_name="cost_per_target",
+            name_prefix="cpt_calibration",
+        )
+    """
+    current_model: pm.Model = pm.modelcontext(model)
+
+    # Basic validation
+    target_column = (
+        cpt_variable_name
+        if cpt_variable_name in calibration_df.columns
+        else "cost_per_target"
+    )
+
+    required_cols = {"channel", target_column, "sigma"}
+    missing = required_cols - set(calibration_df.columns)
+    if missing:
+        raise KeyError(f"Missing required columns in calibration_df: {sorted(missing)}")
+
+    if cpt_variable_name not in current_model.named_vars:
+        raise KeyError(
+            f"Variable {cpt_variable_name!r} not found in model; create it before calibration."
+        )
+
+    # Determine dims from the CPT variable in the model
+    cpt_dims = current_model.named_vars_to_dims[cpt_variable_name]
+    non_date_dims = [d for d in cpt_dims if d != "date"]
+
+    # Ensure calibration_df contains all needed dimension columns
+    missing_dims = [d for d in non_date_dims if d not in calibration_df.columns]
+    if missing_dims:
+        raise KeyError(
+            f"Calibration data missing dimension columns: {missing_dims}. Required dims: {non_date_dims}"
+        )
+
+    # Build indices for selection in model coordinates (date excluded: we average over it)
+    indices = get_indices(calibration_df[non_date_dims], current_model)
+
+    targets: npt.NDArray[np.float64] = calibration_df[target_column].to_numpy(
+        dtype=float
+    )
+    sigmas: npt.NDArray[np.float64] = calibration_df["sigma"].to_numpy(dtype=float)
+
+    with current_model:
+        # Compute mean over the date dimension once
+        cpt_full = current_model[cpt_variable_name]
+        date_axis = cpt_dims.index("date")
+        cpt_mean = pt.mean(cpt_full, axis=date_axis)
+
+        # Build advanced indexing arrays for remaining dims (including channel),
+        # preserving the order present in cpt_dims (excluding date)
+        indexers = [
+            pt.as_tensor_variable(indices[dim])  # type: ignore[index]
+            for dim in cpt_dims
+            if dim != "date"
+        ]
+
+        # Gather the cpt mean for each calibration row as a vector
+        gathered_cpt = cpt_mean[tuple(indexers)]
+
+        # Vectorized quadratic penalties and single aggregated Potential
+        deviation = pt.abs(gathered_cpt - targets)
+        penalties = -(deviation**2) / (2 * (sigmas**2))
+        pm.Potential(name_prefix, pt.sum(penalties))
@@ -187,6 +187,7 @@
 from pymc_marketing.mmm.fourier import YearlyFourier
 from pymc_marketing.mmm.hsgp import HSGPBase
 from pymc_marketing.mmm.lift_test import (
+    add_cost_per_target_potentials,
     add_lift_measurements_to_likelihood_from_saturation,
     scale_lift_measurements,
 )
@@ -1613,6 +1614,22 @@ def _set_xarray_data(
             else:
                 data["target_data"] = target_values
 
+        # Handle optional spend data used for CPT calibration if available
+        if (
+            hasattr(self, "_calibration_spend_xarray")
+            and "channel_data_spend" in model.named_vars
+        ):
+            spend_values = self._calibration_spend_xarray._channel
+            # Align to new coords
+            reindex_coords = {"date": coords["date"], "channel": coords["channel"]}
+            for dim in self.dims:
+                reindex_coords[dim] = coords[dim]
+            spend_values = spend_values.reindex(reindex_coords, fill_value=0)
+            # Ensure no NaNs are passed into pm.Data updates
+            spend_values = spend_values.fillna(0)
+            original_dtype = model.named_vars["channel_data_spend"].type.dtype
+            data["channel_data_spend"] = spend_values.astype(original_dtype)
+
         self.new_updated_data = data
         self.new_updated_coords = coords
         self.new_updated_model = model
@@ -1950,6 +1967,132 @@ def add_lift_test_measurements(
             name=name,
         )
 
+    def add_cost_per_target_calibration(
+        self,
+        data: pd.DataFrame,
+        calibration_data: pd.DataFrame,
+        cpt_variable_name: str = "cost_per_target",
+        name_prefix: str = "cpt_calibration",
+    ) -> None:
+        """Calibrate cost-per-target using constraints via ``pm.Potential``.
+
+        This adds a deterministic ``cpt_variable_name`` computed as
+        ``channel_data_spend / channel_contribution_original_scale`` and creates
+        per-row penalty terms based on ``calibration_data`` using a quadratic penalty:
+
+        ``penalty = - |cpt_mean - target|^2 / (2 * sigma^2)``.
+
+        Parameters
+        ----------
+        data : pd.DataFrame
+            Feature-like DataFrame with columns matching training ``X`` but with
+            channel values representing spend (original units). Must include the
+            same ``date`` and any model ``dims`` columns.
+        calibration_data : pd.DataFrame
+            DataFrame with rows specifying calibration targets. Must include:
+              - ``channel``: channel name in ``self.channel_columns``
+              - ``cost_per_target``: desired CPT value
+              - ``sigma``: accepted deviation; larger => weaker penalty
+            and one column per dimension in ``self.dims``.
+        cpt_variable_name : str
+            Name for the cost-per-target Deterministic in the model.
+        name_prefix : str
+            Prefix to use for generated potential names.
+
+        Examples
+        --------
+        Build a model and calibrate CPT for selected (dims, channel):
+
+        .. code-block:: python
+
+            # spend data in original scale with the same structure as X
+            spend_df = X.copy()
+            # e.g., if X contains impressions, replace with monetary spend
+            # spend_df[channels] = ...
+
+            calibration_df = pd.DataFrame(
+                {
+                    "channel": ["C1", "C2"],
+                    "geo": ["US", "US"],  # dims columns as needed
+                    "cost_per_target": [30.0, 45.0],
+                    "sigma": [2.0, 3.0],
+                }
+            )
+
+            mmm.add_cost_per_target_calibration(
+                data=spend_df,
+                calibration_data=calibration_df,
+                cpt_variable_name="cost_per_target",
+                name_prefix="cpt_calibration",
+            )
+        """
+        if not hasattr(self, "model"):
+            raise RuntimeError("Model must be built before adding calibration.")
+
+        # Validate required columns in calibration_data
+        if "channel" not in calibration_data.columns:
+            raise KeyError("'channel' column missing in calibration_data")
+        for dim in self.dims:
+            if dim not in calibration_data.columns:
+                raise KeyError(
+                    f"The {dim} column is required in calibration_data to map to model dims."
+                )
+
+        # Prepare spend data as xarray (original units)
+        spend_ds = self._create_xarray_from_pandas(
+            data=data,
+            date_column=self.date_column,
+            dims=self.dims,
+            metric_list=self.channel_columns,
+            metric_coordinate_name="channel",
+        ).transpose("date", *self.dims, "channel")
+        # Cache for predictive alignment
+        self._calibration_spend_xarray = spend_ds
+
+        with self.model:
+            # Ensure original-scale contribution exists
+            if "channel_contribution_original_scale" not in self.model.named_vars:
+                self.add_original_scale_contribution_variable(
+                    [
+                        "channel_contribution",
+                    ]
+                )
+
+            # Create pm.Data for spend aligned to current model coords
+            spend_values = spend_ds._channel
+            # Reindex to model coords to ensure ordering matches
+            reindex_coords = {"date": self.model.coords["date"]}
+            for dim in self.dims:
+                reindex_coords[dim] = self.model.coords[dim]
+            reindex_coords["channel"] = self.model.coords["channel"]
+            spend_values = spend_values.reindex(reindex_coords, fill_value=0)
+            # Replace any existing NaNs in spend with zeros to satisfy pm.Data
+            spend_values = spend_values.fillna(0)
+
+            pm.Data(
+                name="channel_data_spend",
+                value=spend_values.values,
+                dims=("date", *self.dims, "channel"),
+            )
+
+            # Build cost_per_target deterministic safely (avoid division by ~0)
+            denom = pt.clip(
+                self.model["channel_contribution_original_scale"], 1e-12, np.inf
+            )
+            pm.Deterministic(
+                name=cpt_variable_name,
+                var=self.model["channel_data_spend"] / denom,
+                dims=("date", *self.dims, "channel"),
+            )
+
+        # Create one Potential per row in calibration_data
+        add_cost_per_target_potentials(
+            calibration_df=calibration_data,
+            model=self.model,
+            cpt_variable_name=cpt_variable_name,
+            name_prefix=name_prefix,
+        )
+
     def create_fit_data(
         self,
         X: pd.DataFrame | xr.Dataset | xr.DataArray,