Skip to content

Commit e1b9a0b

Browse files
committed
Squashed commit of the following:
commit 7c2e9c9 Merge: d3d531f 1c660f8 Author: Jacob Luke <[email protected]> Date: Fri Dec 20 11:25:45 2024 +0800 Merge pull request #191 from ACCLAB/baseline_ec Baseline ec commit 1c660f8 Author: Jacobluke- <[email protected]> Date: Fri Oct 25 14:15:32 2024 +0800 pandas deprecation related to categories commit 5df90f8 Author: Jacobluke- <[email protected]> Date: Mon Oct 21 17:09:15 2024 +0800 Bug fix and zero-dot effect size for baseline Fixed a bug where some baseline line error curve for certain controls are wrongly plotted as the previous one. The implementation is a bit different from dabestr becuase R allows nesting DataFrame where Python doesn’t. commit 994003c Author: Jacobluke- <[email protected]> Date: Fri Oct 18 14:51:50 2024 +0800 Add tests for baseline curve commit 91ff9be Author: Jacobluke- <[email protected]> Date: Fri Oct 18 14:51:38 2024 +0800 Prevent deprecation warning for pandas commit 2fc8052 Author: Jacobluke- <[email protected]> Date: Fri Oct 18 14:15:02 2024 +0800 Add support for baseline_curve
1 parent c849aca commit e1b9a0b

File tree

96 files changed

+591
-209
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+591
-209
lines changed

dabest/_dabest_object.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -663,9 +663,9 @@ def _get_plot_data(self, x, y, all_plot_groups):
663663

664664

665665
if isinstance(plot_data[self.__xvar].dtype, pd.CategoricalDtype):
666-
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
666+
plot_data[self.__xvar].cat.remove_unused_categories()
667667
plot_data[self.__xvar].cat.reorder_categories(
668-
all_plot_groups, ordered=True, inplace=True
668+
all_plot_groups, ordered=True
669669
)
670670
else:
671671
plot_data[self.__xvar] = pd.Categorical(

dabest/_effsize_objects.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ def __init__(
168168
self.__pct_interval_idx = (pct_idx_low, pct_idx_high)
169169
self.__pct_low = sorted_bootstraps[pct_idx_low]
170170
self.__pct_high = sorted_bootstraps[pct_idx_high]
171+
172+
self._get_bootstrap_baseline_ec()
171173

172174
self._perform_statistical_test()
173175

@@ -441,6 +443,92 @@ def to_dict(self):
441443
for a in attrs:
442444
out[a] = getattr(self, a)
443445
return out
446+
447+
def _get_bootstrap_baseline_ec(self):
448+
from ._stats_tools import confint_2group_diff as ci2g
449+
from ._stats_tools import effsize as es
450+
451+
# Cannot use self.__is_paired because it's for baseline curve
452+
is_paired = None
453+
454+
difference = es.two_group_difference(
455+
self.__control, self.__control, is_paired, self.__effect_size
456+
)
457+
self.__bec_difference = difference
458+
459+
jackknives = ci2g.compute_meandiff_jackknife(
460+
self.__control, self.__control, is_paired, self.__effect_size
461+
)
462+
463+
acceleration_value = ci2g._calc_accel(jackknives)
464+
465+
bootstraps = ci2g.compute_bootstrapped_diff(
466+
self.__control,
467+
self.__control,
468+
is_paired,
469+
self.__effect_size,
470+
self.__resamples,
471+
self.__random_seed,
472+
)
473+
self.__bootstraps_baseline_ec = bootstraps
474+
475+
sorted_bootstraps = npsort(self.__bootstraps_baseline_ec)
476+
# We don't have to consider infinities in bootstrap_baseline_ec
477+
478+
bias_correction = ci2g.compute_meandiff_bias_correction(
479+
self.__bootstraps_baseline_ec, difference
480+
)
481+
482+
# Compute BCa intervals.
483+
bca_idx_low, bca_idx_high = ci2g.compute_interval_limits(
484+
bias_correction,
485+
acceleration_value,
486+
self.__resamples,
487+
self.__ci,
488+
)
489+
490+
self.__bec_bca_interval_idx = (bca_idx_low, bca_idx_high)
491+
492+
if ~isnan(bca_idx_low) and ~isnan(bca_idx_high):
493+
self.__bec_bca_low = sorted_bootstraps[bca_idx_low]
494+
self.__bec_bca_high = sorted_bootstraps[bca_idx_high]
495+
496+
err1 = "The $lim_type limit of the interval"
497+
err2 = "was in the $loc 10 values."
498+
err3 = "The result for baseline curve should be considered unstable."
499+
err_temp = Template(" ".join([err1, err2, err3]))
500+
501+
if bca_idx_low <= 10:
502+
warnings.warn(
503+
err_temp.substitute(lim_type="lower", loc="bottom"), stacklevel=1
504+
)
505+
506+
if bca_idx_high >= self.__resamples - 9:
507+
warnings.warn(
508+
err_temp.substitute(lim_type="upper", loc="top"), stacklevel=1
509+
)
510+
511+
else:
512+
err1 = "The $lim_type limit of the BCa interval of baseline curve cannot be computed."
513+
err2 = "It is set to the effect size itself."
514+
err3 = "All bootstrap values were likely all the same."
515+
err_temp = Template(" ".join([err1, err2, err3]))
516+
517+
if isnan(bca_idx_low):
518+
self.__bec_bca_low = difference
519+
warnings.warn(err_temp.substitute(lim_type="lower"), stacklevel=0)
520+
521+
if isnan(bca_idx_high):
522+
self.__bec_bca_high = difference
523+
warnings.warn(err_temp.substitute(lim_type="upper"), stacklevel=0)
524+
525+
# Compute percentile intervals.
526+
pct_idx_low = int((self.__alpha / 2) * self.__resamples)
527+
pct_idx_high = int((1 - (self.__alpha / 2)) * self.__resamples)
528+
529+
self.__bec_pct_interval_idx = (pct_idx_low, pct_idx_high)
530+
self.__bec_pct_low = sorted_bootstraps[pct_idx_low]
531+
self.__bec_pct_high = sorted_bootstraps[pct_idx_high]
444532

445533
@property
446534
def difference(self):
@@ -677,6 +765,54 @@ def proportional_difference(self):
677765
return self.__proportional_difference
678766
except AttributeError:
679767
return npnan
768+
769+
@property
770+
def bec_difference(self):
771+
return self.__bec_difference
772+
773+
@property
774+
def bec_bootstraps(self):
775+
"""
776+
The generated baseline error bootstraps.
777+
"""
778+
return self.__bootstraps_baseline_ec
779+
780+
@property
781+
def bec_bca_interval_idx(self):
782+
return self.__bec_bca_interval_idx
783+
784+
@property
785+
def bec_bca_low(self):
786+
"""
787+
The bias-corrected and accelerated confidence interval lower limit for baseline error.
788+
"""
789+
return self.__bec_bca_low
790+
791+
@property
792+
def bec_bca_high(self):
793+
"""
794+
The bias-corrected and accelerated confidence interval upper limit for baseline error.
795+
"""
796+
return self.__bec_bca_high
797+
798+
@property
799+
def bec_pct_interval_idx(self):
800+
return self.__bec_pct_interval_idx
801+
802+
@property
803+
def bec_pct_low(self):
804+
"""
805+
The percentile confidence interval lower limit for baseline error.
806+
"""
807+
return self.__bec_pct_low
808+
809+
@property
810+
def bec_pct_high(self):
811+
"""
812+
The percentile confidence interval lower limit for baseline error.
813+
"""
814+
return self.__bec_pct_high
815+
680816

681817
# %% ../nbs/API/effsize_objects.ipynb 10
682818
class EffectSizeDataFrame(object):
@@ -850,6 +986,14 @@ def __pre_calc(self):
850986
"pvalue_kruskal",
851987
"statistic_kruskal",
852988
"proportional_difference",
989+
"bec_difference",
990+
"bec_bootstraps",
991+
"bec_bca_interval_idx",
992+
"bec_bca_low",
993+
"bec_bca_high",
994+
"bec_pct_interval_idx",
995+
"bec_pct_low",
996+
"bec_pct_high",
853997
]
854998
self.__results = out_.reindex(columns=columns_in_order)
855999
self.__results.dropna(axis="columns", how="all", inplace=True)
@@ -1036,6 +1180,7 @@ def plot(
10361180
delta_text_kwargs=None,
10371181
delta_dot=True,
10381182
delta_dot_kwargs=None,
1183+
show_baseline_ec=False,
10391184
):
10401185
"""
10411186
Creates an estimation plot for the effect size of interest.
@@ -1217,6 +1362,13 @@ def plot(
12171362
delta_dot_kwargs : dict, default None
12181363
Pass relevant keyword arguments. If None, the following keywords are passed:
12191364
{"marker": "^", "alpha": 0.5, "zorder": 2, "size": 3, "side": "right"}
1365+
show_baseline_ec : boolean, default False
1366+
Whether or not to display the baseline error curve. The baseline error curve
1367+
represents the distribution of the effect size when comparing the control
1368+
group to itself, providing a reference for the inherent variability or noise
1369+
in the data. When True, this curve is plotted alongside the main effect size
1370+
distribution, allowing for a visual comparison of the observed effect against
1371+
the baseline variability.
12201372
12211373
Returns
12221374
-------

dabest/misc_tools.py

Lines changed: 72 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,12 @@ def get_params(effectsize_df, plot_kwargs):
154154
if err_color is None:
155155
err_color = "black"
156156

157+
# Boolean for showing Baseline Curve
158+
show_baseline_ec = plot_kwargs["show_baseline_ec"]
159+
157160
return (dabest_obj, plot_data, xvar, yvar, is_paired, effect_size, proportional, all_plot_groups, idx,
158-
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color)
161+
show_delta2, show_mini_meta, float_contrast, show_pairs, effect_size_type, group_summaries, err_color,
162+
show_baseline_ec)
159163

160164
def get_kwargs(plot_kwargs, ytick_color):
161165
"""
@@ -627,7 +631,9 @@ def extract_contrast_plotting_ticks(is_paired, show_pairs, two_col_sankey, plot_
627631
t for t in range(0, len(plot_groups)) if t not in ticks_to_skip
628632
]
629633

630-
return ticks_to_skip, ticks_to_plot, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
634+
ticks_for_baseline_ec = ticks_to_skip
635+
636+
return ticks_to_skip, ticks_to_plot, ticks_for_baseline_ec, ticks_to_skip_contrast, ticks_to_start_twocol_sankey
631637

632638
def set_xaxis_ticks_and_lims(show_delta2, show_mini_meta, rawdata_axes, contrast_axes, show_pairs, float_contrast,
633639
ticks_to_skip, contrast_xtick_labels, plot_kwargs):
@@ -904,70 +910,70 @@ def Cumming_Plot_Aesthetic_Adjustments(plot_kwargs, show_delta2, effect_size_typ
904910
contrast_axes.axhline(y=0, **reflines_kwargs)
905911

906912
if is_paired == "baseline" and show_pairs:
907-
if two_col_sankey:
908-
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
909-
ticks_to_start_twocol_sankey
910-
)
911-
elif proportional and is_paired is not None:
912-
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
913-
ticks_to_skip
914-
)
915-
else:
916-
rightend_ticks_raw = np.array(
917-
[len(i) - 1 for i in temp_idx]
918-
) + np.array(ticks_to_skip)
919-
for ax in [rawdata_axes]:
920-
sns.despine(ax=ax, bottom=True)
921-
922-
ylim = ax.get_ylim()
923-
xlim = ax.get_xlim()
924-
redraw_axes_kwargs["y"] = ylim[0]
925-
926-
if two_col_sankey:
927-
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
928-
end_tick = rightend_ticks_raw[k]
929-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
930-
else:
931-
for k, start_tick in enumerate(ticks_to_skip):
932-
end_tick = rightend_ticks_raw[k]
933-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
934-
ax.set_ylim(ylim)
935-
del redraw_axes_kwargs["y"]
936-
937-
if not proportional:
938-
temp_length = [(len(i) - 1) for i in idx]
939-
else:
940-
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
941-
if two_col_sankey:
942-
rightend_ticks_contrast = np.array(
943-
[len(i) - 2 for i in idx]
944-
) + np.array(ticks_to_start_twocol_sankey)
945-
elif proportional and is_paired is not None:
946-
rightend_ticks_contrast = np.array(
947-
[len(i) - 1 for i in idx]
948-
) + np.array(ticks_to_skip)
949-
else:
950-
rightend_ticks_contrast = np.array(temp_length) + np.array(
951-
ticks_to_skip_contrast
952-
)
953-
for ax in [contrast_axes]:
954-
sns.despine(ax=ax, bottom=True)
955-
956-
ylim = ax.get_ylim()
957-
xlim = ax.get_xlim()
958-
redraw_axes_kwargs["y"] = ylim[0]
959-
960-
if two_col_sankey:
961-
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
962-
end_tick = rightend_ticks_contrast[k]
963-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
964-
else:
965-
for k, start_tick in enumerate(ticks_to_skip_contrast):
966-
end_tick = rightend_ticks_contrast[k]
967-
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
968-
969-
ax.set_ylim(ylim)
970-
del redraw_axes_kwargs["y"]
913+
if two_col_sankey:
914+
rightend_ticks_raw = np.array([len(i) - 2 for i in idx]) + np.array(
915+
ticks_to_start_twocol_sankey
916+
)
917+
elif proportional and is_paired is not None:
918+
rightend_ticks_raw = np.array([len(i) - 1 for i in idx]) + np.array(
919+
ticks_to_skip
920+
)
921+
else:
922+
rightend_ticks_raw = np.array(
923+
[len(i) - 1 for i in temp_idx]
924+
) + np.array(ticks_to_skip)
925+
for ax in [rawdata_axes]:
926+
sns.despine(ax=ax, bottom=True)
927+
928+
ylim = ax.get_ylim()
929+
xlim = ax.get_xlim()
930+
redraw_axes_kwargs["y"] = ylim[0]
931+
932+
if two_col_sankey:
933+
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
934+
end_tick = rightend_ticks_raw[k]
935+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
936+
else:
937+
for k, start_tick in enumerate(ticks_to_skip):
938+
end_tick = rightend_ticks_raw[k]
939+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
940+
ax.set_ylim(ylim)
941+
del redraw_axes_kwargs["y"]
942+
943+
if not proportional:
944+
temp_length = [(len(i) - 1) for i in idx]
945+
else:
946+
temp_length = [(len(i) - 1) * 2 - 1 for i in idx]
947+
if two_col_sankey:
948+
rightend_ticks_contrast = np.array(
949+
[len(i) - 2 for i in idx]
950+
) + np.array(ticks_to_start_twocol_sankey)
951+
elif proportional and is_paired is not None:
952+
rightend_ticks_contrast = np.array(
953+
[len(i) - 1 for i in idx]
954+
) + np.array(ticks_to_skip)
955+
else:
956+
rightend_ticks_contrast = np.array(temp_length) + np.array(
957+
ticks_to_skip_contrast
958+
)
959+
for ax in [contrast_axes]:
960+
sns.despine(ax=ax, bottom=True)
961+
962+
ylim = ax.get_ylim()
963+
xlim = ax.get_xlim()
964+
redraw_axes_kwargs["y"] = ylim[0]
965+
966+
if two_col_sankey:
967+
for k, start_tick in enumerate(ticks_to_start_twocol_sankey):
968+
end_tick = rightend_ticks_contrast[k]
969+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
970+
else:
971+
for k, start_tick in enumerate(ticks_to_skip_contrast):
972+
end_tick = rightend_ticks_contrast[k]
973+
ax.hlines(xmin=start_tick, xmax=end_tick, **redraw_axes_kwargs)
974+
975+
ax.set_ylim(ylim)
976+
del redraw_axes_kwargs["y"]
971977
else:
972978
# Compute the end of each x-axes line.
973979
if two_col_sankey:

0 commit comments

Comments
 (0)