Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dabest/_dabest_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(
# Determine the kind of estimation plot we need to produce.
if all([isinstance(i, (str, int, float)) for i in idx]):
# flatten out idx.
all_plot_groups = pd.unique([t for t in idx]).tolist()
all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
if len(idx) > len(all_plot_groups):
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
raise ValueError(err0)
Expand All @@ -122,7 +122,7 @@ def __init__(
self.__idx = (idx,)

elif all([isinstance(i, (tuple, list)) for i in idx]):
all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()
all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()

actual_groups_given = sum([len(i) for i in idx])

Expand Down Expand Up @@ -663,9 +663,9 @@ def _get_plot_data(self, x, y, all_plot_groups):


if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
plot_data[self.__xvar].cat.remove_unused_categories()
plot_data[self.__xvar].cat.reorder_categories(
all_plot_groups, ordered=True, inplace=True
all_plot_groups, ordered=True
)
else:
plot_data[self.__xvar] = pd.Categorical(
Expand Down
152 changes: 152 additions & 0 deletions dabest/_effsize_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def __init__(
self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
self.__pct_low = sorted_bootstraps[pct_idx_low]
self.__pct_high = sorted_bootstraps[pct_idx_high]

self._get_bootstrap_baseline_ec()

self._perform_statistical_test()

Expand Down Expand Up @@ -435,6 +437,92 @@ def to_dict(self):
for a in attrs:
out[a] = getattr(self, a)
return out

def _get_bootstrap_baseline_ec(self):
from ._stats_tools import confint_2group_diff as ci2g
from ._stats_tools import effsize as es

# Cannot use self.__is_paired because it's for baseline curve
is_paired = None

difference = es.two_group_difference(
self.__control, self.__control, is_paired, self.__effect_size
)
self.__bec_difference = difference

jackknives = ci2g.compute_meandiff_jackknife(
self.__control, self.__control, is_paired, self.__effect_size
)

acceleration_value = ci2g._calc_accel(jackknives)

bootstraps = ci2g.compute_bootstrapped_diff(
self.__control,
self.__control,
is_paired,
self.__effect_size,
self.__resamples,
self.__random_seed,
)
self.__bootstraps_baseline_ec = bootstraps

sorted_bootstraps = npsort(self.__bootstraps_baseline_ec)
# We don't have to consider infinities in bootstrap_baseline_ec

bias_correction = ci2g.compute_meandiff_bias_correction(
self.__bootstraps_baseline_ec, difference
)

# Compute BCa intervals.
bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
bias_correction,
acceleration_value,
self.__resamples,
self.__ci,
)

self.__bec_bca_interval_idx = (bca_idx_low, bca_idx_high)

if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
self.__bec_bca_low = sorted_bootstraps[bca_idx_low]
self.__bec_bca_high = sorted_bootstraps[bca_idx_high]

err1 = "The $lim_type limit of the interval"
err2 = "was in the $loc 10 values."
err3 = "The result for baseline curve should be considered unstable."
err_temp = Template(" ".join([err1, err2, err3]))

if bca_idx_low <= 10:
warnings.warn(
err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1
)

if bca_idx_high >= self.__resamples - 9:
warnings.warn(
err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1
)

else:
err1 = "The $lim_type limit of the BCa interval of baseline curve cannot be computed."
err2 = "It is set to the effect size itself."
err3 = "All bootstrap values were likely all the same."
err_temp = Template(" ".join([err1, err2, err3]))

if isnan(bca_idx_low):
self.__bec_bca_low = difference
warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0)

if isnan(bca_idx_high):
self.__bec_bca_high = difference
warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0)

# Compute percentile intervals.
pct_idx_low = int((self.__alpha / 2) * self.__resamples)
pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)

self.__bec_pct_interval_idx = (pct_idx_low, pct_idx_high)
self.__bec_pct_low = sorted_bootstraps[pct_idx_low]
self.__bec_pct_high = sorted_bootstraps[pct_idx_high]

@property
def difference(self):
Expand Down Expand Up @@ -671,6 +759,54 @@ def proportional_difference(self):
return self.__proportional_difference
except AttributeError:
return npnan

@property
def bec_difference(self):
return self.__bec_difference

@property
def bec_bootstraps(self):
"""
The generated baseline error bootstraps.
"""
return self.__bootstraps_baseline_ec

@property
def bec_bca_interval_idx(self):
return self.__bec_bca_interval_idx

@property
def bec_bca_low(self):
"""
The bias-corrected and accelerated confidence interval lower limit for baseline error.
"""
return self.__bec_bca_low

@property
def bec_bca_high(self):
"""
The bias-corrected and accelerated confidence interval upper limit for baseline error.
"""
return self.__bec_bca_high

@property
def bec_pct_interval_idx(self):
return self.__bec_pct_interval_idx

@property
def bec_pct_low(self):
"""
The percentile confidence interval lower limit for baseline error.
"""
return self.__bec_pct_low

@property
def bec_pct_high(self):
"""
The percentile confidence interval lower limit for baseline error.
"""
return self.__bec_pct_high


# %% ../nbs/API/effsize_objects.ipynb 10
class EffectSizeDataFrame(object):
Expand Down Expand Up @@ -843,6 +979,14 @@ def __pre_calc(self):
"pvalue_kruskal",
"statistic_kruskal",
"proportional_difference",
"bec_difference",
"bec_bootstraps",
"bec_bca_interval_idx",
"bec_bca_low",
"bec_bca_high",
"bec_pct_interval_idx",
"bec_pct_low",
"bec_pct_high",
]
self.__results = out_.reindex(columns=columns_in_order)
self.__results.dropna(axis="columns", how="all", inplace=True)
Expand Down Expand Up @@ -1027,6 +1171,7 @@ def plot(
delta_text_kwargs=None,
delta_dot=True,
delta_dot_kwargs=None,
show_baseline_ec=False,
):
"""
Creates an estimation plot for the effect size of interest.
Expand Down Expand Up @@ -1208,6 +1353,13 @@ def plot(
delta_dot_kwargs : dict, default None
Pass relevant keyword arguments. If None, the following keywords are passed:
{"marker": "^", "alpha": 0.5, "zorder": 2, "size": 3, "side": "right"}
show_baseline_ec : boolean, default False
Whether or not to display the baseline error curve. The baseline error curve
represents the distribution of the effect size when comparing the control
group to itself, providing a reference for the inherent variability or noise
in the data. When True, this curve is plotted alongside the main effect size
distribution, allowing for a visual comparison of the observed effect against
the baseline variability.

Returns
-------
Expand Down
138 changes: 72 additions & 66 deletions dabest/misc_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,12 @@ def get_params(effectsize_df, plot_kwargs):
if err_color is None:
err_color = "black"

# Boolean for showing Baseline Curve
show_baseline_ec = plot_kwargs["show_baseline_ec"]

return (dabest_obj, plot_data, xvar, yvar, is_paired, effect_size, proportional, all_plot_groups, idx,
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color)
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color,
show_baseline_ec)

def get_kwargs(plot_kwargs, ytick_color):
"""
Expand Down Expand Up @@ -627,7 +631,9 @@ def extract_contrast_plotting_ticks(is_paired, show_pairs, two_col_sankey, plot_
t for t in range(0, len(plot_groups)) if t not in ticks_to_skip
]

return ticks_to_skip, ticks_to_plot, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
ticks_for_baseline_ec = ticks_to_skip

return ticks_to_skip, ticks_to_plot, ticks_for_baseline_ec, ticks_to_skip_contrast, ticks_to_start_twocol_sankey

def set_xaxis_ticks_and_lims(show_delta2, show_mini_meta, rawdata_axes, contrast_axes, show_pairs, float_contrast,
ticks_to_skip, contrast_xtick_labels, plot_kwargs):
Expand Down Expand Up @@ -904,70 +910,70 @@ def Cumming_Plot_Aesthetic_Adjustments(plot_kwargs, show_delta2, effect_size_typ
contrast_axes.axhline(y=0, **reflines_kwargs)

if is_paired == "baseline" and show_pairs:
if two_col_sankey:
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
ticks_to_start_twocol_sankey
)
elif proportional and is_paired is not None:
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
ticks_to_skip
)
else:
rightend_ticks_raw = np.array(
[len(i) - 1 for i in temp_idx]
) + np.array(ticks_to_skip)
for ax in [rawdata_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]

if not proportional:
temp_length = [(len(i) - 1) for i in idx]
else:
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
if two_col_sankey:
rightend_ticks_contrast = np.array(
[len(i) - 2 for i in idx]
) + np.array(ticks_to_start_twocol_sankey)
elif proportional and is_paired is not None:
rightend_ticks_contrast = np.array(
[len(i) - 1 for i in idx]
) + np.array(ticks_to_skip)
else:
rightend_ticks_contrast = np.array(temp_length) + np.array(
ticks_to_skip_contrast
)
for ax in [contrast_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip_contrast):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)

ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]
if two_col_sankey:
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
ticks_to_start_twocol_sankey
)
elif proportional and is_paired is not None:
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
ticks_to_skip
)
else:
rightend_ticks_raw = np.array(
[len(i) - 1 for i in temp_idx]
) + np.array(ticks_to_skip)
for ax in [rawdata_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]

if not proportional:
temp_length = [(len(i) - 1) for i in idx]
else:
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
if two_col_sankey:
rightend_ticks_contrast = np.array(
[len(i) - 2 for i in idx]
) + np.array(ticks_to_start_twocol_sankey)
elif proportional and is_paired is not None:
rightend_ticks_contrast = np.array(
[len(i) - 1 for i in idx]
) + np.array(ticks_to_skip)
else:
rightend_ticks_contrast = np.array(temp_length) + np.array(
ticks_to_skip_contrast
)
for ax in [contrast_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip_contrast):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)

ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]
else:
# Compute the end of each x-axes line.
if two_col_sankey:
Expand Down
Loading
Loading