Skip to content

Commit 7c2e9c9

Browse files
authored
Merge pull request #191 from ACCLAB/baseline_ec
Baseline ec
2 parents d3d531f + 1c660f8 commit 7c2e9c9

File tree

96 files changed

+595
-213
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+595
-213
lines changed

dabest/_dabest_object.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def __init__(
112112
# Determine the kind of estimation plot we need to produce.
113113
if all([isinstance(i, (str, int, float)) for i in idx]):
114114
# flatten out idx.
115-
all_plot_groups = pd.unique([t for t in idx]).tolist()
115+
all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
116116
if len(idx) > len(all_plot_groups):
117117
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
118118
raise ValueError(err0)
@@ -122,7 +122,7 @@ def __init__(
122122
self.__idx = (idx,)
123123

124124
elif all([isinstance(i, (tuple, list)) for i in idx]):
125-
all_plot_groups = pd.unique([tt for t in idx for tt in t]).tolist()
125+
all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()
126126

127127
actual_groups_given = sum([len(i) for i in idx])
128128

@@ -663,9 +663,9 @@ def _get_plot_data(self, x, y, all_plot_groups):
663663

664664

665665
if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
666-
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
666+
plot_data[self.__xvar].cat.remove_unused_categories()
667667
plot_data[self.__xvar].cat.reorder_categories(
668-
all_plot_groups, ordered=True, inplace=True
668+
all_plot_groups, ordered=True
669669
)
670670
else:
671671
plot_data[self.__xvar] = pd.Categorical(

dabest/_effsize_objects.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ def __init__(
167167
self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
168168
self.__pct_low = sorted_bootstraps[pct_idx_low]
169169
self.__pct_high = sorted_bootstraps[pct_idx_high]
170+
171+
self._get_bootstrap_baseline_ec()
170172

171173
self._perform_statistical_test()
172174

@@ -435,6 +437,92 @@ def to_dict(self):
435437
for a in attrs:
436438
out[a] = getattr(self, a)
437439
return out
440+
441+
def _get_bootstrap_baseline_ec(self):
442+
from ._stats_tools import confint_2group_diff as ci2g
443+
from ._stats_tools import effsize as es
444+
445+
# Cannot use self.__is_paired because it's for baseline curve
446+
is_paired = None
447+
448+
difference = es.two_group_difference(
449+
self.__control, self.__control, is_paired, self.__effect_size
450+
)
451+
self.__bec_difference = difference
452+
453+
jackknives = ci2g.compute_meandiff_jackknife(
454+
self.__control, self.__control, is_paired, self.__effect_size
455+
)
456+
457+
acceleration_value = ci2g._calc_accel(jackknives)
458+
459+
bootstraps = ci2g.compute_bootstrapped_diff(
460+
self.__control,
461+
self.__control,
462+
is_paired,
463+
self.__effect_size,
464+
self.__resamples,
465+
self.__random_seed,
466+
)
467+
self.__bootstraps_baseline_ec = bootstraps
468+
469+
sorted_bootstraps = npsort(self.__bootstraps_baseline_ec)
470+
# We don't have to consider infinities in bootstrap_baseline_ec
471+
472+
bias_correction = ci2g.compute_meandiff_bias_correction(
473+
self.__bootstraps_baseline_ec, difference
474+
)
475+
476+
# Compute BCa intervals.
477+
bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
478+
bias_correction,
479+
acceleration_value,
480+
self.__resamples,
481+
self.__ci,
482+
)
483+
484+
self.__bec_bca_interval_idx = (bca_idx_low, bca_idx_high)
485+
486+
if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
487+
self.__bec_bca_low = sorted_bootstraps[bca_idx_low]
488+
self.__bec_bca_high = sorted_bootstraps[bca_idx_high]
489+
490+
err1 = "The $lim_type limit of the interval"
491+
err2 = "was in the $loc 10 values."
492+
err3 = "The result for baseline curve should be considered unstable."
493+
err_temp = Template(" ".join([err1, err2, err3]))
494+
495+
if bca_idx_low <= 10:
496+
warnings.warn(
497+
err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1
498+
)
499+
500+
if bca_idx_high >= self.__resamples - 9:
501+
warnings.warn(
502+
err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1
503+
)
504+
505+
else:
506+
err1 = "The $lim_type limit of the BCa interval of baseline curve cannot be computed."
507+
err2 = "It is set to the effect size itself."
508+
err3 = "All bootstrap values were likely all the same."
509+
err_temp = Template(" ".join([err1, err2, err3]))
510+
511+
if isnan(bca_idx_low):
512+
self.__bec_bca_low = difference
513+
warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0)
514+
515+
if isnan(bca_idx_high):
516+
self.__bec_bca_high = difference
517+
warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0)
518+
519+
# Compute percentile intervals.
520+
pct_idx_low = int((self.__alpha / 2) * self.__resamples)
521+
pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)
522+
523+
self.__bec_pct_interval_idx = (pct_idx_low, pct_idx_high)
524+
self.__bec_pct_low = sorted_bootstraps[pct_idx_low]
525+
self.__bec_pct_high = sorted_bootstraps[pct_idx_high]
438526

439527
@property
440528
def difference(self):
@@ -671,6 +759,54 @@ def proportional_difference(self):
671759
return self.__proportional_difference
672760
except AttributeError:
673761
return npnan
762+
763+
@property
764+
def bec_difference(self):
765+
return self.__bec_difference
766+
767+
@property
768+
def bec_bootstraps(self):
769+
"""
770+
The generated baseline error bootstraps.
771+
"""
772+
return self.__bootstraps_baseline_ec
773+
774+
@property
775+
def bec_bca_interval_idx(self):
776+
return self.__bec_bca_interval_idx
777+
778+
@property
779+
def bec_bca_low(self):
780+
"""
781+
The bias-corrected and accelerated confidence interval lower limit for baseline error.
782+
"""
783+
return self.__bec_bca_low
784+
785+
@property
786+
def bec_bca_high(self):
787+
"""
788+
The bias-corrected and accelerated confidence interval upper limit for baseline error.
789+
"""
790+
return self.__bec_bca_high
791+
792+
@property
793+
def bec_pct_interval_idx(self):
794+
return self.__bec_pct_interval_idx
795+
796+
@property
797+
def bec_pct_low(self):
798+
"""
799+
The percentile confidence interval lower limit for baseline error.
800+
"""
801+
return self.__bec_pct_low
802+
803+
@property
804+
def bec_pct_high(self):
805+
"""
806+
The percentile confidence interval lower limit for baseline error.
807+
"""
808+
return self.__bec_pct_high
809+
674810

675811
# %% ../nbs/API/effsize_objects.ipynb 10
676812
class EffectSizeDataFrame(object):
@@ -843,6 +979,14 @@ def __pre_calc(self):
843979
"pvalue_kruskal",
844980
"statistic_kruskal",
845981
"proportional_difference",
982+
"bec_difference",
983+
"bec_bootstraps",
984+
"bec_bca_interval_idx",
985+
"bec_bca_low",
986+
"bec_bca_high",
987+
"bec_pct_interval_idx",
988+
"bec_pct_low",
989+
"bec_pct_high",
846990
]
847991
self.__results = out_.reindex(columns=columns_in_order)
848992
self.__results.dropna(axis="columns", how="all", inplace=True)
@@ -1027,6 +1171,7 @@ def plot(
10271171
delta_text_kwargs=None,
10281172
delta_dot=True,
10291173
delta_dot_kwargs=None,
1174+
show_baseline_ec=False,
10301175
):
10311176
"""
10321177
Creates an estimation plot for the effect size of interest.
@@ -1208,6 +1353,13 @@ def plot(
12081353
delta_dot_kwargs : dict, default None
12091354
Pass relevant keyword arguments. If None, the following keywords are passed:
12101355
{"marker": "^", "alpha": 0.5, "zorder": 2, "size": 3, "side": "right"}
1356+
show_baseline_ec : boolean, default False
1357+
Whether or not to display the baseline error curve. The baseline error curve
1358+
represents the distribution of the effect size when comparing the control
1359+
group to itself, providing a reference for the inherent variability or noise
1360+
in the data. When True, this curve is plotted alongside the main effect size
1361+
distribution, allowing for a visual comparison of the observed effect against
1362+
the baseline variability.
12111363
12121364
Returns
12131365
-------

dabest/misc_tools.py

Lines changed: 72 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,12 @@ def get_params(effectsize_df, plot_kwargs):
154154
if err_color is None:
155155
err_color = "black"
156156

157+
# Boolean for showing Baseline Curve
158+
show_baseline_ec = plot_kwargs["show_baseline_ec"]
159+
157160
return (dabest_obj, plot_data, xvar, yvar, is_paired, effect_size, proportional, all_plot_groups, idx,
158-
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color)
161+
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color,
162+
show_baseline_ec)
159163

160164
def get_kwargs(plot_kwargs, ytick_color):
161165
"""
@@ -627,7 +631,9 @@ def extract_contrast_plotting_ticks(is_paired, show_pairs, two_col_sankey, plot_
627631
t for t in range(0, len(plot_groups)) if t not in ticks_to_skip
628632
]
629633

630-
return ticks_to_skip, ticks_to_plot, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
634+
ticks_for_baseline_ec = ticks_to_skip
635+
636+
return ticks_to_skip, ticks_to_plot, ticks_for_baseline_ec, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
631637

632638
def set_xaxis_ticks_and_lims(show_delta2, show_mini_meta, rawdata_axes, contrast_axes, show_pairs, float_contrast,
633639
ticks_to_skip, contrast_xtick_labels, plot_kwargs):
@@ -904,70 +910,70 @@ def Cumming_Plot_Aesthetic_Adjustments(plot_kwargs, show_delta2, effect_size_typ
904910
contrast_axes.axhline(y=0, **reflines_kwargs)
905911

906912
if is_paired == "baseline" and show_pairs:
907-
if two_col_sankey:
908-
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
909-
ticks_to_start_twocol_sankey
910-
)
911-
elif proportional and is_paired is not None:
912-
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
913-
ticks_to_skip
914-
)
915-
else:
916-
rightend_ticks_raw = np.array(
917-
[len(i) - 1 for i in temp_idx]
918-
) + np.array(ticks_to_skip)
919-
for ax in [rawdata_axes]:
920-
sns.despine(ax=ax, bottom=True)
921-
922-
ylim = ax.get_ylim()
923-
xlim = ax.get_xlim()
924-
redraw_axes_kwargs["y"] = ylim[0]
925-
926-
if two_col_sankey:
927-
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
928-
end_tick = rightend_ticks_raw[k]
929-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
930-
else:
931-
for k, start_tick in enumerate(ticks_to_skip):
932-
end_tick = rightend_ticks_raw[k]
933-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
934-
ax.set_ylim(ylim)
935-
del redraw_axes_kwargs["y"]
936-
937-
if not proportional:
938-
temp_length = [(len(i) - 1) for i in idx]
939-
else:
940-
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
941-
if two_col_sankey:
942-
rightend_ticks_contrast = np.array(
943-
[len(i) - 2 for i in idx]
944-
) + np.array(ticks_to_start_twocol_sankey)
945-
elif proportional and is_paired is not None:
946-
rightend_ticks_contrast = np.array(
947-
[len(i) - 1 for i in idx]
948-
) + np.array(ticks_to_skip)
949-
else:
950-
rightend_ticks_contrast = np.array(temp_length) + np.array(
951-
ticks_to_skip_contrast
952-
)
953-
for ax in [contrast_axes]:
954-
sns.despine(ax=ax, bottom=True)
955-
956-
ylim = ax.get_ylim()
957-
xlim = ax.get_xlim()
958-
redraw_axes_kwargs["y"] = ylim[0]
959-
960-
if two_col_sankey:
961-
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
962-
end_tick = rightend_ticks_contrast[k]
963-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
964-
else:
965-
for k, start_tick in enumerate(ticks_to_skip_contrast):
966-
end_tick = rightend_ticks_contrast[k]
967-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
968-
969-
ax.set_ylim(ylim)
970-
del redraw_axes_kwargs["y"]
913+
if two_col_sankey:
914+
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
915+
ticks_to_start_twocol_sankey
916+
)
917+
elif proportional and is_paired is not None:
918+
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
919+
ticks_to_skip
920+
)
921+
else:
922+
rightend_ticks_raw = np.array(
923+
[len(i) - 1 for i in temp_idx]
924+
) + np.array(ticks_to_skip)
925+
for ax in [rawdata_axes]:
926+
sns.despine(ax=ax, bottom=True)
927+
928+
ylim = ax.get_ylim()
929+
xlim = ax.get_xlim()
930+
redraw_axes_kwargs["y"] = ylim[0]
931+
932+
if two_col_sankey:
933+
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
934+
end_tick = rightend_ticks_raw[k]
935+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
936+
else:
937+
for k, start_tick in enumerate(ticks_to_skip):
938+
end_tick = rightend_ticks_raw[k]
939+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
940+
ax.set_ylim(ylim)
941+
del redraw_axes_kwargs["y"]
942+
943+
if not proportional:
944+
temp_length = [(len(i) - 1) for i in idx]
945+
else:
946+
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
947+
if two_col_sankey:
948+
rightend_ticks_contrast = np.array(
949+
[len(i) - 2 for i in idx]
950+
) + np.array(ticks_to_start_twocol_sankey)
951+
elif proportional and is_paired is not None:
952+
rightend_ticks_contrast = np.array(
953+
[len(i) - 1 for i in idx]
954+
) + np.array(ticks_to_skip)
955+
else:
956+
rightend_ticks_contrast = np.array(temp_length) + np.array(
957+
ticks_to_skip_contrast
958+
)
959+
for ax in [contrast_axes]:
960+
sns.despine(ax=ax, bottom=True)
961+
962+
ylim = ax.get_ylim()
963+
xlim = ax.get_xlim()
964+
redraw_axes_kwargs["y"] = ylim[0]
965+
966+
if two_col_sankey:
967+
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
968+
end_tick = rightend_ticks_contrast[k]
969+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
970+
else:
971+
for k, start_tick in enumerate(ticks_to_skip_contrast):
972+
end_tick = rightend_ticks_contrast[k]
973+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
974+
975+
ax.set_ylim(ylim)
976+
del redraw_axes_kwargs["y"]
971977
else:
972978
# Compute the end of each x-axes line.
973979
if two_col_sankey:

0 commit comments

Comments
 (0)