Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dabest/_dabest_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(
# Determine the kind of estimation plot we need to produce.
if all([isinstance(i, (str, int, float)) for i in idx]):
# flatten out idx.
all_plot_groups = pd.unique([t for t in idx]).tolist()
all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
if len(idx) > len(all_plot_groups):
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
raise ValueError(err0)
Expand All @@ -122,7 +122,7 @@ def __init__(
self.__idx = (idx,)

elif all([isinstance(i, (tuple, list)) for i in idx]):
all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()
all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()

actual_groups_given = sum([len(i) for i in idx])

Expand Down
152 changes: 152 additions & 0 deletions dabest/_effsize_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def __init__(
self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
self.__pct_low = sorted_bootstraps[pct_idx_low]
self.__pct_high = sorted_bootstraps[pct_idx_high]

self._get_bootstrap_baseline_ec()

self._perform_statistical_test()

Expand Down Expand Up @@ -435,6 +437,92 @@ def to_dict(self):
for a in attrs:
out[a] = getattr(self, a)
return out

def _get_bootstrap_baseline_ec(self):
from ._stats_tools import confint_2group_diff as ci2g
from ._stats_tools import effsize as es

# Cannot use self.__is_paired because it's for baseline curve
is_paired = None

difference = es.two_group_difference(
self.__control, self.__control, is_paired, self.__effect_size
)
self.__bec_difference = difference

jackknives = ci2g.compute_meandiff_jackknife(
self.__control, self.__control, is_paired, self.__effect_size
)

acceleration_value = ci2g._calc_accel(jackknives)

bootstraps = ci2g.compute_bootstrapped_diff(
self.__control,
self.__control,
is_paired,
self.__effect_size,
self.__resamples,
self.__random_seed,
)
self.__bootstraps_baseline_ec = bootstraps

sorted_bootstraps = npsort(self.__bootstraps_baseline_ec)
# We don't have to consider infinities in bootstrap_baseline_ec

bias_correction = ci2g.compute_meandiff_bias_correction(
self.__bootstraps_baseline_ec, difference
)

# Compute BCa intervals.
bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
bias_correction,
acceleration_value,
self.__resamples,
self.__ci,
)

self.__bec_bca_interval_idx = (bca_idx_low, bca_idx_high)

if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
self.__bec_bca_low = sorted_bootstraps[bca_idx_low]
self.__bec_bca_high = sorted_bootstraps[bca_idx_high]

err1 = "The $lim_type limit of the interval"
err2 = "was in the $loc 10 values."
err3 = "The result for baseline curve should be considered unstable."
err_temp = Template(" ".join([err1, err2, err3]))

if bca_idx_low <= 10:
warnings.warn(
err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1
)

if bca_idx_high >= self.__resamples - 9:
warnings.warn(
err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1
)

else:
err1 = "The $lim_type limit of the BCa interval of baseline curve cannot be computed."
err2 = "It is set to the effect size itself."
err3 = "All bootstrap values were likely all the same."
err_temp = Template(" ".join([err1, err2, err3]))

if isnan(bca_idx_low):
self.__bec_bca_low = difference
warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0)

if isnan(bca_idx_high):
self.__bec_bca_high = difference
warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0)

# Compute percentile intervals.
pct_idx_low = int((self.__alpha / 2) * self.__resamples)
pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)

self.__bec_pct_interval_idx = (pct_idx_low, pct_idx_high)
self.__bec_pct_low = sorted_bootstraps[pct_idx_low]
self.__bec_pct_high = sorted_bootstraps[pct_idx_high]

@property
def difference(self):
Expand Down Expand Up @@ -671,6 +759,54 @@ def proportional_difference(self):
return self.__proportional_difference
except AttributeError:
return npnan

@property
def bec_difference(self):
return self.__bec_difference

@property
def bec_bootstraps(self):
"""
The generated baseline error bootstraps.
"""
return self.__bootstraps_baseline_ec

@property
def bec_bca_interval_idx(self):
return self.__bec_bca_interval_idx

@property
def bec_bca_low(self):
"""
The bias-corrected and accelerated confidence interval lower limit for baseline error.
"""
return self.__bec_bca_low

@property
def bec_bca_high(self):
"""
The bias-corrected and accelerated confidence interval upper limit for baseline error.
"""
return self.__bec_bca_high

@property
def bec_pct_interval_idx(self):
return self.__bec_pct_interval_idx

@property
def bec_pct_low(self):
"""
The percentile confidence interval lower limit for baseline error.
"""
return self.__bec_pct_low

@property
def bec_pct_high(self):
"""
The percentile confidence interval lower limit for baseline error.
"""
return self.__bec_pct_high


# %% ../nbs/API/effsize_objects.ipynb 10
class EffectSizeDataFrame(object):
Expand Down Expand Up @@ -843,6 +979,14 @@ def __pre_calc(self):
"pvalue_kruskal",
"statistic_kruskal",
"proportional_difference",
"bec_difference",
"bec_bootstraps",
"bec_bca_interval_idx",
"bec_bca_low",
"bec_bca_high",
"bec_pct_interval_idx",
"bec_pct_low",
"bec_pct_high",
]
self.__results = out_.reindex(columns=columns_in_order)
self.__results.dropna(axis="columns", how="all", inplace=True)
Expand Down Expand Up @@ -1027,6 +1171,7 @@ def plot(
delta_text_kwargs=None,
delta_dot=True,
delta_dot_kwargs=None,
show_baseline_ec=False,
):
"""
Creates an estimation plot for the effect size of interest.
Expand Down Expand Up @@ -1208,6 +1353,13 @@ def plot(
delta_dot_kwargs : dict, default None
Pass relevant keyword arguments. If None, the following keywords are passed:
{"marker": "^", "alpha": 0.5, "zorder": 2, "size": 3, "side": "right"}
show_baseline_ec : boolean, default False
Whether or not to display the baseline error curve. The baseline error curve
represents the distribution of the effect size when comparing the control
group to itself, providing a reference for the inherent variability or noise
in the data. When True, this curve is plotted alongside the main effect size
distribution, allowing for a visual comparison of the observed effect against
the baseline variability.

Returns
-------
Expand Down
138 changes: 72 additions & 66 deletions dabest/misc_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,12 @@ def get_params(effectsize_df, plot_kwargs):
if err_color is None:
err_color = "black"

# Boolean for showing Baseline Curve
show_baseline_ec = plot_kwargs["show_baseline_ec"]

return (dabest_obj, plot_data, xvar, yvar, is_paired, effect_size, proportional, all_plot_groups, idx,
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color)
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color,
show_baseline_ec)

def get_kwargs(plot_kwargs, ytick_color):
"""
Expand Down Expand Up @@ -627,7 +631,9 @@ def extract_contrast_plotting_ticks(is_paired, show_pairs, two_col_sankey, plot_
t for t in range(0, len(plot_groups)) if t not in ticks_to_skip
]

return ticks_to_skip, ticks_to_plot, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
ticks_for_baseline_ec = ticks_to_skip

return ticks_to_skip, ticks_to_plot, ticks_for_baseline_ec, ticks_to_skip_contrast, ticks_to_start_twocol_sankey

def set_xaxis_ticks_and_lims(show_delta2, show_mini_meta, rawdata_axes, contrast_axes, show_pairs, float_contrast,
ticks_to_skip, contrast_xtick_labels, plot_kwargs):
Expand Down Expand Up @@ -904,70 +910,70 @@ def Cumming_Plot_Aesthetic_Adjustments(plot_kwargs, show_delta2, effect_size_typ
contrast_axes.axhline(y=0, **reflines_kwargs)

if is_paired == "baseline" and show_pairs:
if two_col_sankey:
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
ticks_to_start_twocol_sankey
)
elif proportional and is_paired is not None:
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
ticks_to_skip
)
else:
rightend_ticks_raw = np.array(
[len(i) - 1 for i in temp_idx]
) + np.array(ticks_to_skip)
for ax in [rawdata_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]

if not proportional:
temp_length = [(len(i) - 1) for i in idx]
else:
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
if two_col_sankey:
rightend_ticks_contrast = np.array(
[len(i) - 2 for i in idx]
) + np.array(ticks_to_start_twocol_sankey)
elif proportional and is_paired is not None:
rightend_ticks_contrast = np.array(
[len(i) - 1 for i in idx]
) + np.array(ticks_to_skip)
else:
rightend_ticks_contrast = np.array(temp_length) + np.array(
ticks_to_skip_contrast
)
for ax in [contrast_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip_contrast):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)

ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]
if two_col_sankey:
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
ticks_to_start_twocol_sankey
)
elif proportional and is_paired is not None:
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
ticks_to_skip
)
else:
rightend_ticks_raw = np.array(
[len(i) - 1 for i in temp_idx]
) + np.array(ticks_to_skip)
for ax in [rawdata_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip):
end_tick = rightend_ticks_raw[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]

if not proportional:
temp_length = [(len(i) - 1) for i in idx]
else:
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
if two_col_sankey:
rightend_ticks_contrast = np.array(
[len(i) - 2 for i in idx]
) + np.array(ticks_to_start_twocol_sankey)
elif proportional and is_paired is not None:
rightend_ticks_contrast = np.array(
[len(i) - 1 for i in idx]
) + np.array(ticks_to_skip)
else:
rightend_ticks_contrast = np.array(temp_length) + np.array(
ticks_to_skip_contrast
)
for ax in [contrast_axes]:
sns.despine(ax=ax, bottom=True)

ylim = ax.get_ylim()
xlim = ax.get_xlim()
redraw_axes_kwargs["y"] = ylim[0]

if two_col_sankey:
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
else:
for k, start_tick in enumerate(ticks_to_skip_contrast):
end_tick = rightend_ticks_contrast[k]
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)

ax.set_ylim(ylim)
del redraw_axes_kwargs["y"]
else:
# Compute the end of each x-axes line.
if two_col_sankey:
Expand Down
Loading
Loading