ACCLAB · Jacobluke- · Dec 20, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py
@@ -112,7 +112,7 @@ def __init__(
         # Determine the kind of estimation plot we need to produce.
         if all([isinstance(i, (str, int, float)) for i in idx]):
             # flatten out idx.
-            all_plot_groups = pd.unique([t for t in idx]).tolist()
+            all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
             if len(idx) > len(all_plot_groups):
                 err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
                 raise ValueError(err0)
@@ -122,7 +122,7 @@ def __init__(
             self.__idx = (idx,)
 
         elif all([isinstance(i, (tuple, list)) for i in idx]):
-            all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()
+            all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()
 
             actual_groups_given = sum([len(i) for i in idx])
 
@@ -663,9 +663,9 @@ def _get_plot_data(self, x, y, all_plot_groups):
 
 
         if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
-            plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
+            plot_data[self.__xvar].cat.remove_unused_categories()
             plot_data[self.__xvar].cat.reorder_categories(
-                all_plot_groups, ordered=True, inplace=True
+                all_plot_groups, ordered=True
             )
         else:
             plot_data[self.__xvar] = pd.Categorical(

diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py
@@ -167,6 +167,8 @@ def __init__(
         self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
         self.__pct_low = sorted_bootstraps[pct_idx_low]
         self.__pct_high = sorted_bootstraps[pct_idx_high]
+
+        self._get_bootstrap_baseline_ec()
 
         self._perform_statistical_test()
 
@@ -435,6 +437,92 @@ def to_dict(self):
         for a in attrs:
             out[a] = getattr(self, a)
         return out
+
+    def _get_bootstrap_baseline_ec(self):
+        from ._stats_tools import confint_2group_diff as ci2g
+        from ._stats_tools import effsize as es
+
+        # Cannot use self.__is_paired because it's for baseline curve
+        is_paired = None
+
+        difference = es.two_group_difference(
+            self.__control, self.__control, is_paired, self.__effect_size
+        )
+        self.__bec_difference = difference
+
+        jackknives = ci2g.compute_meandiff_jackknife(
+            self.__control, self.__control, is_paired, self.__effect_size
+        )
+
+        acceleration_value = ci2g._calc_accel(jackknives)
+
+        bootstraps = ci2g.compute_bootstrapped_diff(
+            self.__control,
+            self.__control,
+            is_paired,
+            self.__effect_size,
+            self.__resamples,
+            self.__random_seed,
+        )
+        self.__bootstraps_baseline_ec = bootstraps
+
+        sorted_bootstraps = npsort(self.__bootstraps_baseline_ec)
+        # We don't have to consider infinities in bootstrap_baseline_ec
+
+        bias_correction = ci2g.compute_meandiff_bias_correction(
+            self.__bootstraps_baseline_ec, difference
+        )
+
+        # Compute BCa intervals.
+        bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
+            bias_correction,
+            acceleration_value,
+            self.__resamples,
+            self.__ci,
+        )
+
+        self.__bec_bca_interval_idx = (bca_idx_low, bca_idx_high)
+
+        if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
+            self.__bec_bca_low = sorted_bootstraps[bca_idx_low]
+            self.__bec_bca_high = sorted_bootstraps[bca_idx_high]
+
+            err1 = "The $lim_type limit of the interval"
+            err2 = "was in the $loc 10 values."
+            err3 = "The result for baseline curve should be considered unstable."
+            err_temp = Template(" ".join([err1, err2, err3]))
+
+            if bca_idx_low <= 10:
+                warnings.warn(
+                    err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1
+                )
+
+            if bca_idx_high >= self.__resamples - 9:
+                warnings.warn(
+                    err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1
+                )
+
+        else:
+            err1 = "The $lim_type limit of the BCa interval of baseline curve cannot be computed."
+            err2 = "It is set to the effect size itself."
+            err3 = "All bootstrap values were likely all the same."
+            err_temp = Template(" ".join([err1, err2, err3]))
+
+            if isnan(bca_idx_low):
+                self.__bec_bca_low = difference
+                warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0)
+
+            if isnan(bca_idx_high):
+                self.__bec_bca_high = difference
+                warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0)
+
+        # Compute percentile intervals.
+        pct_idx_low = int((self.__alpha / 2) * self.__resamples)
+        pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)
+
+        self.__bec_pct_interval_idx = (pct_idx_low, pct_idx_high)
+        self.__bec_pct_low = sorted_bootstraps[pct_idx_low]
+        self.__bec_pct_high = sorted_bootstraps[pct_idx_high]
 
     @property
     def difference(self):
@@ -671,6 +759,54 @@ def proportional_difference(self):
             return self.__proportional_difference
         except AttributeError:
             return npnan
+
+    @property
+    def bec_difference(self):
+        return self.__bec_difference   
+
+    @property
+    def bec_bootstraps(self):
+        """
+        The generated baseline error bootstraps.
+        """
+        return self.__bootstraps_baseline_ec
+
+    @property
+    def bec_bca_interval_idx(self):
+        return self.__bec_bca_interval_idx
+
+    @property
+    def bec_bca_low(self):
+        """
+        The bias-corrected and accelerated confidence interval lower limit for baseline error.
+        """
+        return self.__bec_bca_low
+
+    @property
+    def bec_bca_high(self):
+        """
+        The bias-corrected and accelerated confidence interval upper limit for baseline error.
+        """
+        return self.__bec_bca_high
+
+    @property
+    def bec_pct_interval_idx(self):
+        return self.__bec_pct_interval_idx
+
+    @property
+    def bec_pct_low(self):
+        """
+        The percentile confidence interval lower limit for baseline error.
+        """
+        return self.__bec_pct_low
+
+    @property
+    def bec_pct_high(self):
+        """
+        The percentile confidence interval lower limit for baseline error.
+        """
+        return self.__bec_pct_high
+
 
 # %% ../nbs/API/effsize_objects.ipynb 10
 class EffectSizeDataFrame(object):
@@ -843,6 +979,14 @@ def __pre_calc(self):
             "pvalue_kruskal",
             "statistic_kruskal",
             "proportional_difference",
+            "bec_difference",
+            "bec_bootstraps",
+            "bec_bca_interval_idx",
+            "bec_bca_low",
+            "bec_bca_high",
+            "bec_pct_interval_idx",
+            "bec_pct_low",
+            "bec_pct_high",
         ]
         self.__results = out_.reindex(columns=columns_in_order)
         self.__results.dropna(axis="columns", how="all", inplace=True)
@@ -1027,6 +1171,7 @@ def plot(
         delta_text_kwargs=None,
         delta_dot=True,
         delta_dot_kwargs=None,
+        show_baseline_ec=False,
     ):
         """
         Creates an estimation plot for the effect size of interest.
@@ -1208,6 +1353,13 @@ def plot(
         delta_dot_kwargs : dict, default None
             Pass relevant keyword arguments. If None, the following keywords are passed:
             {"marker": "^", "alpha": 0.5, "zorder": 2, "size": 3, "side": "right"}
+        show_baseline_ec : boolean, default False
+            Whether or not to display the baseline error curve. The baseline error curve
+            represents the distribution of the effect size when comparing the control
+            group to itself, providing a reference for the inherent variability or noise
+            in the data. When True, this curve is plotted alongside the main effect size
+            distribution, allowing for a visual comparison of the observed effect against
+            the baseline variability.
 
         Returns
         -------

diff --git a/dabest/misc_tools.py b/dabest/misc_tools.py
@@ -154,8 +154,12 @@ def get_params(effectsize_df, plot_kwargs):
     if err_color is None: 
         err_color = "black"
 
+    # Boolean for showing Baseline Curve
+    show_baseline_ec = plot_kwargs["show_baseline_ec"]
+
     return (dabest_obj, plot_data, xvar, yvar, is_paired, effect_size, proportional, all_plot_groups, idx, 
-            show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color)
+            show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color,
+            show_baseline_ec)
 
 def get_kwargs(plot_kwargs, ytick_color):
     """
@@ -627,7 +631,9 @@ def extract_contrast_plotting_ticks(is_paired, show_pairs, two_col_sankey, plot_
                 t for t in range(0, len(plot_groups)) if t not in ticks_to_skip
             ]
 
-    return ticks_to_skip, ticks_to_plot, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
+    ticks_for_baseline_ec = ticks_to_skip
+
+    return ticks_to_skip, ticks_to_plot, ticks_for_baseline_ec, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
 
 def set_xaxis_ticks_and_lims(show_delta2, show_mini_meta, rawdata_axes, contrast_axes, show_pairs, float_contrast,
                              ticks_to_skip, contrast_xtick_labels, plot_kwargs):
@@ -904,70 +910,70 @@ def Cumming_Plot_Aesthetic_Adjustments(plot_kwargs, show_delta2, effect_size_typ
         contrast_axes.axhline(y=0, **reflines_kwargs)
 
     if is_paired == "baseline" and show_pairs:
-                if two_col_sankey:
-                    rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
-                        ticks_to_start_twocol_sankey
-                    )
-                elif proportional and is_paired is not None:
-                    rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
-                        ticks_to_skip
-                    )
-                else:
-                    rightend_ticks_raw = np.array(
-                        [len(i) - 1 for i in temp_idx]
-                    ) + np.array(ticks_to_skip)
-                for ax in [rawdata_axes]:
-                    sns.despine(ax=ax, bottom=True)
-
-                    ylim = ax.get_ylim()
-                    xlim = ax.get_xlim()
-                    redraw_axes_kwargs["y"] = ylim[0]
-
-                    if two_col_sankey:
-                        for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
-                            end_tick = rightend_ticks_raw[k]
-                            ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
-                    else:
-                        for k, start_tick in enumerate(ticks_to_skip):
-                            end_tick = rightend_ticks_raw[k]
-                            ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
-                    ax.set_ylim(ylim)
-                    del redraw_axes_kwargs["y"]
-
-                if not proportional:
-                    temp_length = [(len(i) - 1) for i in idx]
-                else:
-                    temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
-                if two_col_sankey:
-                    rightend_ticks_contrast = np.array(
-                        [len(i) - 2 for i in idx]
-                    ) + np.array(ticks_to_start_twocol_sankey)
-                elif proportional and is_paired is not None:
-                    rightend_ticks_contrast = np.array(
-                        [len(i) - 1 for i in idx]
-                    ) + np.array(ticks_to_skip)
-                else:
-                    rightend_ticks_contrast = np.array(temp_length) + np.array(
-                        ticks_to_skip_contrast
-                    )
-                for ax in [contrast_axes]:
-                    sns.despine(ax=ax, bottom=True)
-
-                    ylim = ax.get_ylim()
-                    xlim = ax.get_xlim()
-                    redraw_axes_kwargs["y"] = ylim[0]
-
-                    if two_col_sankey:
-                        for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
-                            end_tick = rightend_ticks_contrast[k]
-                            ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
-                    else:
-                        for k, start_tick in enumerate(ticks_to_skip_contrast):
-                            end_tick = rightend_ticks_contrast[k]
-                            ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
-
-                    ax.set_ylim(ylim)
-                    del redraw_axes_kwargs["y"]
+        if two_col_sankey:
+            rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
+                ticks_to_start_twocol_sankey
+            )
+        elif proportional and is_paired is not None:
+            rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
+                ticks_to_skip
+            )
+        else:
+            rightend_ticks_raw = np.array(
+                [len(i) - 1 for i in temp_idx]
+            ) + np.array(ticks_to_skip)
+        for ax in [rawdata_axes]:
+            sns.despine(ax=ax, bottom=True)
+
+            ylim = ax.get_ylim()
+            xlim = ax.get_xlim()
+            redraw_axes_kwargs["y"] = ylim[0]
+
+            if two_col_sankey:
+                for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
+                    end_tick = rightend_ticks_raw[k]
+                    ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
+            else:
+                for k, start_tick in enumerate(ticks_to_skip):
+                    end_tick = rightend_ticks_raw[k]
+                    ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
+            ax.set_ylim(ylim)
+            del redraw_axes_kwargs["y"]
+
+        if not proportional:
+            temp_length = [(len(i) - 1) for i in idx]
+        else:
+            temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
+        if two_col_sankey:
+            rightend_ticks_contrast = np.array(
+                [len(i) - 2 for i in idx]
+            ) + np.array(ticks_to_start_twocol_sankey)
+        elif proportional and is_paired is not None:
+            rightend_ticks_contrast = np.array(
+                [len(i) - 1 for i in idx]
+            ) + np.array(ticks_to_skip)
+        else:
+            rightend_ticks_contrast = np.array(temp_length) + np.array(
+                ticks_to_skip_contrast
+            )
+        for ax in [contrast_axes]:
+            sns.despine(ax=ax, bottom=True)
+
+            ylim = ax.get_ylim()
+            xlim = ax.get_xlim()
+            redraw_axes_kwargs["y"] = ylim[0]
+
+            if two_col_sankey:
+                for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
+                    end_tick = rightend_ticks_contrast[k]
+                    ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
+            else:
+                for k, start_tick in enumerate(ticks_to_skip_contrast):
+                    end_tick = rightend_ticks_contrast[k]
+                    ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
+
+            ax.set_ylim(ylim)
+            del redraw_axes_kwargs["y"]
     else:
         # Compute the end of each x-axes line.
         if two_col_sankey: