pymc-labs
diff --git a/‎pymc_marketing/mmm/budget_optimizer.py
Lines changed: 159 additions & 7 deletions b/‎pymc_marketing/mmm/budget_optimizer.py
Lines changed: 159 additions & 7 deletions
diff --git a/‎pymc_marketing/mmm/multidimensional.py
Lines changed: 30 additions & 8 deletions b/‎pymc_marketing/mmm/multidimensional.py
Lines changed: 30 additions & 8 deletions
@@ -127,6 +127,10 @@ class BudgetOptimizer(BaseModel):
         Custom constraints for the optimizer.
     default_constraints : bool, optional
         Whether to add a default sum constraint on the total budget. Default is True.
+    budget_distribution_over_period : xarray.DataArray, optional
+        Distribution factors for budget allocation over time. Should have dims ("date", *budget_dims)
+        where date dimension has length num_periods. Values along date dimension should sum to 1 for
+        each combination of other dimensions. If None, budget is distributed evenly across periods.
     """
 
     num_periods: int = Field(
@@ -169,6 +173,15 @@ class BudgetOptimizer(BaseModel):
         description="Whether to add a default sum constraint on the total budget.",
     )
 
+    budget_distribution_over_period: DataArray | None = Field(
+        default=None,
+        description=(
+            "Distribution factors for budget allocation over time. Should have dims ('date', *budget_dims) "
+            "where date dimension has length num_periods. Values along date dimension should sum to 1 for "
+            "each combination of other dimensions. If None, budget is distributed evenly across periods."
+        ),
+    )
+
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
     DEFAULT_MINIMIZE_KWARGS: ClassVar[dict] = {
@@ -230,16 +243,26 @@ def __init__(self, **data):
         bool_mask = np.asarray(self.budgets_to_optimize).astype(bool)
         self._budgets = budgets_zeros[bool_mask].set(self._budgets_flat)
 
-        # 5. Replace channel_data with budgets in the PyMC model
+        # 5. Validate and process budget_distribution_over_period
+        self._budget_distribution_over_period_tensor = (
+            self._validate_and_process_budget_distribution(
+                budget_distribution_over_period=self.budget_distribution_over_period,
+                num_periods=self.num_periods,
+                budget_dims=self._budget_dims,
+                budgets_to_optimize=self.budgets_to_optimize,
+            )
+        )
+
+        # 6. Replace channel_data with budgets in the PyMC model
         self._pymc_model = self._replace_channel_data_by_optimization_variable(
             pymc_model
         )
 
-        # 6. Compile objective & gradient
+        # 7. Compile objective & gradient
         self._compiled_functions = {}
         self._compile_objective_and_grad()
 
-        # 7. Build constraints
+        # 8. Build constraints
         self._constraints = {}
         self.set_constraints(
             default=self.default_constraints, constraints=self.custom_constraints
@@ -272,6 +295,126 @@ def set_constraints(self, constraints, default=None) -> None:
             constraints=self._constraints, optimizer=self
         )
 
+    def _validate_and_process_budget_distribution(
+        self,
+        budget_distribution_over_period: DataArray | None,
+        num_periods: int,
+        budget_dims: list[str],
+        budgets_to_optimize: DataArray,
+    ) -> pt.TensorVariable | None:
+        """Validate and process budget distribution over periods.
+
+        Parameters
+        ----------
+        budget_distribution_over_period : DataArray | None
+            Distribution factors for budget allocation over time.
+        num_periods : int
+            Number of time periods to allocate budget for.
+        budget_dims : list[str]
+            List of budget dimensions (excluding 'date').
+        budgets_to_optimize : DataArray
+            Mask defining which budgets to optimize.
+
+        Returns
+        -------
+        pt.TensorVariable | None
+            Processed tensor containing masked time factors, or None if no distribution provided.
+        """
+        if budget_distribution_over_period is None:
+            return None
+
+        # Validate dimensions - date should be first
+        expected_dims = ("date", *budget_dims)
+        if set(budget_distribution_over_period.dims) != set(expected_dims):
+            raise ValueError(
+                f"budget_distribution_over_period must have dims {expected_dims}, "
+                f"but got {budget_distribution_over_period.dims}"
+            )
+
+        # Validate date dimension length
+        if len(budget_distribution_over_period.coords["date"]) != num_periods:
+            raise ValueError(
+                f"budget_distribution_over_period date dimension must have length {num_periods}, "
+                f"but got {len(budget_distribution_over_period.coords['date'])}"
+            )
+
+        # Validate that factors sum to 1 along date dimension
+        sums = budget_distribution_over_period.sum(dim="date")
+        if not np.allclose(sums.values, 1.0, rtol=1e-5):
+            raise ValueError(
+                "budget_distribution_over_period must sum to 1 along the date dimension "
+                "for each combination of other dimensions"
+            )
+
+        # Pre-process: Apply the mask to get only factors for optimized budgets
+        # This avoids shape mismatches during gradient computation
+        time_factors_full = budget_distribution_over_period.transpose(
+            *expected_dims
+        ).values
+
+        # Reshape to (num_periods, flat_budget_dims) and apply mask
+        time_factors_flat = time_factors_full.reshape((num_periods, -1))
+        bool_mask = budgets_to_optimize.values.flatten()
+        time_factors_masked = time_factors_flat[:, bool_mask]
+
+        # Store only the masked tensor
+        return pt.constant(time_factors_masked, name="budget_distribution_over_period")
+
+    def _apply_budget_distribution_over_period(
+        self,
+        budgets: pt.TensorVariable,
+        num_periods: int,
+        date_dim_idx: int,
+    ) -> pt.TensorVariable:
+        """Apply budget distribution over periods to budgets across time periods.
+
+        Parameters
+        ----------
+        budgets : pt.TensorVariable
+            The scaled budget tensor with shape matching budget dimensions.
+        num_periods : int
+            Number of time periods to distribute budget across.
+        date_dim_idx : int
+            Index position where the date dimension should be inserted.
+
+        Returns
+        -------
+        pt.TensorVariable
+            Budget tensor repeated across time periods with distribution factors applied.
+            Shape will be (*budget_dims[:date_dim_idx], num_periods, *budget_dims[date_dim_idx:])
+        """
+        # Apply time distribution factors
+        # The time factors are already masked and have shape (num_periods, num_optimized_budgets)
+        # budgets has full shape (e.g., (2, 2) for geo x channel)
+        # We need to extract only the optimized budgets
+
+        # Get the optimized budget values
+        bool_mask = np.asarray(self.budgets_to_optimize).astype(bool)
+        budgets_optimized = budgets[bool_mask]  # Shape: (num_optimized_budgets,)
+
+        # Now multiply budgets by time factors
+        budgets_expanded = pt.expand_dims(
+            budgets_optimized, 0
+        )  # Shape: (1, num_optimized_budgets)
+        repeated_budgets_flat = (
+            budgets_expanded * self._budget_distribution_over_period_tensor
+        )  # Shape: (num_periods, num_optimized_budgets)
+
+        # Reconstruct the full shape for each time period
+        repeated_budgets_list = []
+        for t in range(num_periods):
+            # Create a zero tensor with the full budget shape
+            budgets_t = pt.zeros_like(budgets)
+            # Set the optimized values
+            budgets_t = budgets_t[bool_mask].set(repeated_budgets_flat[t])
+            repeated_budgets_list.append(budgets_t)
+
+        # Stack the time periods
+        repeated_budgets = pt.stack(repeated_budgets_list, axis=date_dim_idx)
+        repeated_budgets *= num_periods
+
+        return repeated_budgets
+
     def _replace_channel_data_by_optimization_variable(self, model: Model) -> Model:
         """Replace `channel_data` in the model graph with our newly created `_budgets` variable."""
         num_periods = self.num_periods
@@ -287,10 +430,19 @@ def _replace_channel_data_by_optimization_variable(self, model: Model) -> Model:
         # Repeat budgets over num_periods
         repeated_budgets_shape = list(tuple(budgets.shape))
         repeated_budgets_shape.insert(date_dim_idx, num_periods)
-        repeated_budgets = pt.broadcast_to(
-            pt.expand_dims(budgets, date_dim_idx),
-            shape=repeated_budgets_shape,
-        )
+
+        if self._budget_distribution_over_period_tensor is not None:
+            # Apply time distribution factors
+            repeated_budgets = self._apply_budget_distribution_over_period(
+                budgets, num_periods, date_dim_idx
+            )
+        else:
+            # Default behavior: distribute evenly across periods
+            repeated_budgets = pt.broadcast_to(
+                pt.expand_dims(budgets, date_dim_idx),
+                shape=repeated_budgets_shape,
+            )
+
         repeated_budgets.name = "repeated_budgets"
 
         # Pad the repeated budgets with zeros to account for carry-over effects
 
@@ -1772,6 +1772,7 @@ def optimize_budget(
         constraints: Sequence[dict[str, Any]] = (),
         default_constraints: bool = True,
         budgets_to_optimize: xr.DataArray | None = None,
+        budget_distribution_over_period: xr.DataArray | None = None,
         callback: bool = False,
         **minimize_kwargs,
     ) -> (
@@ -1796,6 +1797,10 @@ def optimize_budget(
             Whether to add default constraints.
         budgets_to_optimize : xr.DataArray | None
             Mask defining which budgets to optimize.
+        budget_distribution_over_period : xr.DataArray | None
+            Distribution factors for budget allocation over time. Should have dims ("date", *budget_dims)
+            where date dimension has length num_periods. Values along date dimension should sum to 1 for
+            each combination of other dimensions. If None, budget is distributed evenly across periods.
         callback : bool
             Whether to return callback information tracking optimization progress.
         **minimize_kwargs
@@ -1816,6 +1821,7 @@ def optimize_budget(
             custom_constraints=constraints,
             default_constraints=default_constraints,
             budgets_to_optimize=budgets_to_optimize,
+            budget_distribution_over_period=budget_distribution_over_period,
             model=self,  # Pass the wrapper instance itself to the BudgetOptimizer
         )
 
@@ -1830,6 +1836,7 @@ def sample_response_distribution(
         self,
         allocation_strategy: xr.DataArray,
         noise_level: float = 0.001,
+        additional_var_names: list[str] | None = None,
     ) -> az.InferenceData:
         """Generate synthetic dataset and sample posterior predictive based on allocation.
 
@@ -1860,11 +1867,26 @@ def sample_response_distribution(
         )
 
         constant_data = allocation_strategy.to_dataset(name="allocation")
-
-        return self.sample_posterior_predictive(
-            X=data_with_noise,
-            extend_idata=False,
-            include_last_observations=True,
-            var_names=["y", "channel_contribution_original_scale"],
-            progressbar=False,
-        ).merge(constant_data)
+        _dataset = data_with_noise.set_index([self.date_column, *list(self.dims)])[
+            self.channel_columns
+        ].to_xarray()
+
+        var_names = [
+            "y",
+            "channel_contribution",
+            "total_media_contribution_original_scale",
+        ]
+        if additional_var_names is not None:
+            var_names.extend(additional_var_names)
+
+        return (
+            self.sample_posterior_predictive(
+                X=data_with_noise,
+                extend_idata=False,
+                include_last_observations=True,
+                var_names=var_names,
+                progressbar=False,
+            )
+            .merge(constant_data)
+            .merge(_dataset)
+        )