diff --git a/pymc_marketing/mmm/base.py b/pymc_marketing/mmm/base.py index 0236ae73a..e63fe7552 100644 --- a/pymc_marketing/mmm/base.py +++ b/pymc_marketing/mmm/base.py @@ -1162,7 +1162,7 @@ def compute_mean_contributions_over_time( ) if getattr(self, "yearly_seasonality", None): - contributions_fourier_over_time = ( + contributions_fourier_over_time = pd.DataFrame( az.extract( self.fit_result, var_names=["fourier_contributions"], @@ -1172,6 +1172,8 @@ def compute_mean_contributions_over_time( .to_dataframe() .squeeze() .unstack() + .sum(axis=1), + columns=["yearly_seasonality"], ) else: contributions_fourier_over_time = pd.DataFrame( @@ -1300,6 +1302,117 @@ def plot_channel_contribution_share_hdi( def graphviz(self, **kwargs): return pm.model_to_graphviz(self.model, **kwargs) + def _process_decomposition_components(self, data: pd.DataFrame) -> pd.DataFrame: + """ + Process data to compute the sum of contributions by component and calculate their percentages. + The output dataframe will have columns for "component", "contribution", and "percentage". + + Parameters + ---------- + data : pd.DataFrame + Dataframe containing the contribution by component from the function "compute_mean_contributions_over_time". + + Returns + ------- + pd.DataFrame + A dataframe with contributions summed up by component, sorted by contribution in ascending order. + With an additional column showing the percentage contribution of each component. + """ + + dataframe = data.copy() + stack_dataframe = dataframe.stack().reset_index() + stack_dataframe.columns = pd.Index(["date", "component", "contribution"]) + stack_dataframe.set_index(["date", "component"], inplace=True) + dataframe = stack_dataframe.groupby("component").sum() + dataframe.sort_values(by="contribution", ascending=True, inplace=True) + dataframe.reset_index(inplace=True) + + total_contribution = dataframe["contribution"].sum() + dataframe["percentage"] = (dataframe["contribution"] / total_contribution) * 100 + + return dataframe + + def plot_waterfall_components_decomposition( + self, + original_scale: bool = True, + figsize: tuple[int, int] = (14, 7), + **kwargs, + ) -> plt.Figure: + """ + This function creates a waterfall plot. The plot shows the decomposition of the target into its components. + + Parameters + ---------- + original_scale : bool, optional + If True, the contributions are plotted in the original scale of the target. + figsize : Tuple, optional + The size of the figure. The default is (14, 7). + **kwargs + Additional keyword arguments to pass to the matplotlib `subplots` function. + + Returns + ------- + fig : matplotlib.figure.Figure + The matplotlib figure object. + """ + + dataframe = self.compute_mean_contributions_over_time( + original_scale=original_scale + ) + + dataframe = self._process_decomposition_components(data=dataframe) + total_contribution = dataframe["contribution"].sum() + + fig, ax = plt.subplots(figsize=figsize, layout="constrained", **kwargs) + + cumulative_contribution = 0 + + for index, row in dataframe.iterrows(): + color = "lightblue" if row["contribution"] >= 0 else "salmon" + + bar_start = ( + cumulative_contribution + row["contribution"] + if row["contribution"] < 0 + else cumulative_contribution + ) + ax.barh(row["component"], row["contribution"], left=bar_start, color=color) + + if row["contribution"] > 0: + cumulative_contribution += row["contribution"] + + label_pos = bar_start + (row["contribution"] / 2) + + if row["contribution"] < 0: + label_pos = bar_start - (row["contribution"] / 2) + + ax.text( + label_pos, + index, + f"{row['contribution']:,.0f}\n({row['percentage']:.1f}%)", + ha="center", + va="center", + color="black", + fontsize=10, + ) + + ax.set_title("Response Decomposition Waterfall by Components") + ax.set_xlabel("Cumulative Contribution") + ax.set_ylabel("Components") + + xticks = np.linspace(0, total_contribution, num=11) + xticklabels = [f"{(x/total_contribution)*100:.0f}%" for x in xticks] + ax.set_xticks(xticks) + ax.set_xticklabels(xticklabels) + + ax.spines["right"].set_visible(False) + ax.spines["top"].set_visible(False) + ax.spines["left"].set_visible(False) + + ax.set_yticks(np.arange(len(dataframe))) + ax.set_yticklabels(dataframe["component"]) + + return fig + class MMM(BaseMMM, ValidateTargetColumn, ValidateDateColumn, ValidateChannelColumns): pass diff --git a/tests/mmm/test_delayed_saturated_mmm.py b/tests/mmm/test_delayed_saturated_mmm.py index 643cdea66..c008bfa31 100644 --- a/tests/mmm/test_delayed_saturated_mmm.py +++ b/tests/mmm/test_delayed_saturated_mmm.py @@ -285,7 +285,6 @@ def test_fit(self, toy_X: pd.DataFrame, toy_y: pd.Series) -> None: assert mmm.model_config is not None n_channel: int = len(mmm.channel_columns) n_control: int = len(mmm.control_columns) - fourier_terms: int = 2 * mmm.yearly_seasonality mmm.fit( X=toy_X, y=toy_y, @@ -322,17 +321,23 @@ def test_fit(self, toy_X: pd.DataFrame, toy_y: pd.Series) -> None: ) assert mean_model_contributions_ts.shape == ( toy_X.shape[0], - n_channel + n_control + fourier_terms + 1, + n_channel + + n_control + + 2, # 2 for yearly seasonality (+1) and intercept (+) ) + + processed_df = mmm._process_decomposition_components( + data=mean_model_contributions_ts + ) + + assert processed_df.shape == (n_channel + n_control + 2, 3) + assert mean_model_contributions_ts.columns.tolist() == [ "channel_1", "channel_2", "control_1", "control_2", - "sin_order_1", - "cos_order_1", - "sin_order_2", - "cos_order_2", + "yearly_seasonality", "intercept", ] diff --git a/tests/mmm/test_plotting.py b/tests/mmm/test_plotting.py index 0de2fb496..619248936 100644 --- a/tests/mmm/test_plotting.py +++ b/tests/mmm/test_plotting.py @@ -92,6 +92,7 @@ class ToyMMM(BaseDelayedSaturatedMMM, MaxAbsScaleTarget): ("plot_posterior_predictive", {"original_scale": True}), ("plot_components_contributions", {}), ("plot_channel_parameter", {"param_name": "alpha"}), + ("plot_waterfall_components_decomposition", {"original_scale": True}), ("plot_direct_contribution_curves", {}), ("plot_direct_contribution_curves", {"same_axes": True}), ("plot_direct_contribution_curves", {"channels": ["channel_2"]}),