ACCLAB · Jacobluke- · Dec 20, 2024 · Mar 31, 2024 · Apr 2, 2024 · Apr 9, 2024
diff --git a/dabest/__init__.py b/dabest/__init__.py
@@ -1,6 +1,14 @@
 from ._api import load, prop_dataset
 from ._stats_tools import effsize as effsize
+from ._stats_tools import confint_2group_diff as ci_2g
 from ._effsize_objects import TwoGroupsEffectSize, PermutationTest
 from ._dabest_object import Dabest
 
-__version__ = "2024.03.29"
+
+import os
+if os.environ.get('SKIP_NUMBA_COMPILE') != '1':
+    from ._stats_tools.precompile import precompile_all, _NUMBA_COMPILED
+    if not _NUMBA_COMPILED:
+        precompile_all()
+
+__version__ = "2024.03.30"
diff --git a/dabest/_bootstrap_tools.py b/dabest/_bootstrap_tools.py
@@ -66,7 +66,9 @@ def __init__(
         reps: int = 5000,  # Number of bootstrap iterations to perform.
     ):
         # Turn to pandas series.
-        x1 = pd.Series(x1).dropna()
+        # x1 = pd.Series(x1).dropna()
+        x1 = x1[~np.isnan(x1)]
+
         diff = False
 
         # Initialise stat_function
@@ -89,7 +91,9 @@ def __init__(
             if x2 is None:
                 raise ValueError("Please specify x2.")
 
-            x2 = pd.Series(x2).dropna()
+            # x2 = pd.Series(x2).dropna()
+            x2 = x1[~np.isnan(x2)]
+
             if len(x1) != len(x2):
                 raise ValueError("x1 and x2 are not the same length.")
 
@@ -134,7 +138,8 @@ def __init__(
 
         elif x2 is not None and paired is None:
             diff = True
-            x2 = pd.Series(x2).dropna()
+            # x2 = pd.Series(x2).dropna()
+            x2 = x2[~np.isnan(x2)]
             # Generate statarrays for both arrays.
             ref_statarray = sns.algorithms.bootstrap(x1, **sns_bootstrap_kwargs)
             exp_statarray = sns.algorithms.bootstrap(x2, **sns_bootstrap_kwargs)

diff --git a/dabest/_dabest_object.py b/dabest/_dabest_object.py
@@ -112,7 +112,7 @@ def __init__(
         # Determine the kind of estimation plot we need to produce.
         if all([isinstance(i, (str, int, float)) for i in idx]):
             # flatten out idx.
-            all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
+            all_plot_groups = pd.Series([t for t in idx]).unique().tolist()
             if len(idx) > len(all_plot_groups):
                 err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
                 raise ValueError(err0)
@@ -122,7 +122,7 @@ def __init__(
             self.__idx = (idx,)
 
         elif all([isinstance(i, (tuple, list)) for i in idx]):
-            all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()
+            all_plot_groups = pd.Series([tt for t in idx for tt in t]).unique().tolist()
 
             actual_groups_given = sum([len(i) for i in idx])
 

diff --git a/dabest/_delta_objects.py b/dabest/_delta_objects.py
@@ -388,13 +388,14 @@ def __init__(self, effectsizedataframe, permutation_count,
         # compute the variances of each control group and each test group
         control_var=[]
         test_var=[]
+        grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}
         for j, current_tuple in enumerate(idx):
             cname = current_tuple[0]
-            control = dat[dat[xvar] == cname][yvar].copy()
+            control = grouped_data[cname]
             control_var.append(np.var(control, ddof=1))
 
             tname = current_tuple[1]
-            test = dat[dat[xvar] == tname][yvar].copy()
+            test = grouped_data[tname]
             test_var.append(np.var(test, ddof=1))
         self.__control_var = np.array(control_var)
         self.__test_var    = np.array(test_var)
@@ -414,7 +415,7 @@ def __init__(self, effectsizedataframe, permutation_count,
                                                           self.__bootstraps)
 
         # Compute the weighted average mean difference based on the raw data
-        self.__difference = es.weighted_delta(self.__effsizedf["difference"],
+        self.__difference = es.weighted_delta(np.array(self.__effsizedf["difference"]),
                                                    self.__group_var)
 
         sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)

diff --git a/dabest/_effsize_objects.py b/dabest/_effsize_objects.py
@@ -9,6 +9,7 @@
 import pandas as pd
 import lqrt
 from scipy.stats import norm
+import numpy as np
 from numpy import array, isnan, isinf, repeat, random, isin, abs, var
 from numpy import sort as npsort
 from numpy import nan as npnan
@@ -357,12 +358,17 @@ def _perform_statistical_test(self):
             # References:
             # https://en.wikipedia.org/wiki/McNemar%27s_test
 
-            df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
-            x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
-            x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
-            x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
-            x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
-            table = [[x1, x2], [x3, x4]]
+            # df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
+            # x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
+            # x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
+            # x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
+            # x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
+            # table = [[x1, x2], [x3, x4]]
+            x1 = np.sum((self.__control == 0) & (self.__test == 0))
+            x2 = np.sum((self.__control == 0) & (self.__test == 1))
+            x3 = np.sum((self.__control == 1) & (self.__test == 0))
+            x4 = np.sum((self.__control == 1) & (self.__test == 1))
+            table = np.array([[x1, x2], [x3, x4]])
             _mcnemar = mcnemar(table, exact=True, correction=True)
             self.__pvalue_mcnemar = _mcnemar.pvalue
             self.__statistic_mcnemar = _mcnemar.statistic
@@ -861,18 +867,19 @@ def __pre_calc(self):
         out = []
         reprs = []
 
+        grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}
         if self.__delta2:
             mixed_data = []
             for j, current_tuple in enumerate(idx):
                 if self.__is_paired != "sequential":
                     cname = current_tuple[0]
-                    control = dat[dat[xvar] == cname][yvar].copy()
+                    control = grouped_data[cname]
 
                 for ix, tname in enumerate(current_tuple[1:]):
                     if self.__is_paired == "sequential":
                         cname = current_tuple[ix]
-                        control = dat[dat[xvar] == cname][yvar].copy()
-                    test = dat[dat[xvar] == tname][yvar].copy()
+                        control = grouped_data[cname]
+                    test = grouped_data[tname]
                     mixed_data.append(control)
                     mixed_data.append(test)
             bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(
@@ -888,13 +895,13 @@ def __pre_calc(self):
         for j, current_tuple in enumerate(idx):
             if self.__is_paired != "sequential":
                 cname = current_tuple[0]
-                control = dat[dat[xvar] == cname][yvar].copy()
+                control = grouped_data[cname]
 
             for ix, tname in enumerate(current_tuple[1:]):
                 if self.__is_paired == "sequential":
                     cname = current_tuple[ix]
-                    control = dat[dat[xvar] == cname][yvar].copy()
-                test = dat[dat[xvar] == tname][yvar].copy()
+                    control = grouped_data[cname]
+                test = grouped_data[tname]
 
                 result = TwoGroupsEffectSize(
                     control,
@@ -1055,16 +1062,18 @@ def __calc_lqrt(self):
 
         out = []
 
+        grouped_data = {name:group[yvar].copy() for name, group in dat.groupby(xvar)}
+
         for j, current_tuple in enumerate(db_obj.idx):
             if self.__is_paired != "sequential":
                 cname = current_tuple[0]
-                control = dat[dat[xvar] == cname][yvar].copy()
+                control = grouped_data[cname]
 
             for ix, tname in enumerate(current_tuple[1:]):
                 if self.__is_paired == "sequential":
                     cname = current_tuple[ix]
-                    control = dat[dat[xvar] == cname][yvar].copy()
-                test = dat[dat[xvar] == tname][yvar].copy()
+                    control = grouped_data[cname]
+                test = grouped_data[tname]
 
                 if self.__is_paired:
                     # Refactored here in v0.3.0 for performance issues.

diff --git a/dabest/_modidx.py b/dabest/_modidx.py
@@ -25,6 +25,8 @@
                                                                                                                         'dabest/_stats_tools/confint_2group_diff.py'),
                                                          'dabest._stats_tools.confint_2group_diff._create_two_group_jackknife_indexes': ( 'API/confint_2group_diff.html#_create_two_group_jackknife_indexes',
                                                                                                                                           'dabest/_stats_tools/confint_2group_diff.py'),
+                                                         'dabest._stats_tools.confint_2group_diff.bootstrap_indices': ( 'API/confint_2group_diff.html#bootstrap_indices',
+                                                                                                                        'dabest/_stats_tools/confint_2group_diff.py'),
                                                          'dabest._stats_tools.confint_2group_diff.calculate_group_var': ( 'API/confint_2group_diff.html#calculate_group_var',
                                                                                                                           'dabest/_stats_tools/confint_2group_diff.py'),
                                                          'dabest._stats_tools.confint_2group_diff.calculate_weighted_delta': ( 'API/confint_2group_diff.html#calculate_weighted_delta',
@@ -42,11 +44,17 @@
                                                          'dabest._stats_tools.confint_2group_diff.create_jackknife_indexes': ( 'API/confint_2group_diff.html#create_jackknife_indexes',
                                                                                                                                'dabest/_stats_tools/confint_2group_diff.py'),
                                                          'dabest._stats_tools.confint_2group_diff.create_repeated_indexes': ( 'API/confint_2group_diff.html#create_repeated_indexes',
-                                                                                                                              'dabest/_stats_tools/confint_2group_diff.py')},
-            'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor',
+                                                                                                                              'dabest/_stats_tools/confint_2group_diff.py'),
+                                                         'dabest._stats_tools.confint_2group_diff.delta2_bootstrap_loop': ( 'API/confint_2group_diff.html#delta2_bootstrap_loop',
+                                                                                                                            'dabest/_stats_tools/confint_2group_diff.py')},
+            'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._cliffs_delta_core': ( 'API/effsize.html#_cliffs_delta_core',
+                                                                                                 'dabest/_stats_tools/effsize.py'),
+                                             'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor',
                                                                                                                 'dabest/_stats_tools/effsize.py'),
                                              'dabest._stats_tools.effsize._compute_standardizers': ( 'API/effsize.html#_compute_standardizers',
                                                                                                      'dabest/_stats_tools/effsize.py'),
+                                             'dabest._stats_tools.effsize._mann_whitney_u': ( 'API/effsize.html#_mann_whitney_u',
+                                                                                              'dabest/_stats_tools/effsize.py'),
                                              'dabest._stats_tools.effsize.cliffs_delta': ( 'API/effsize.html#cliffs_delta',
                                                                                            'dabest/_stats_tools/effsize.py'),
                                              'dabest._stats_tools.effsize.cohens_d': ( 'API/effsize.html#cohens_d',
@@ -61,6 +69,8 @@
                                                                                                    'dabest/_stats_tools/effsize.py'),
                                              'dabest._stats_tools.effsize.weighted_delta': ( 'API/effsize.html#weighted_delta',
                                                                                              'dabest/_stats_tools/effsize.py')},
+            'dabest._stats_tools.precompile': { 'dabest._stats_tools.precompile.precompile_all': ( 'API/precompile.html#precompile_all',
+                                                                                                   'dabest/_stats_tools/precompile.py')},
             'dabest.forest_plot': { 'dabest.forest_plot.extract_plot_data': ( 'API/forest_plot.html#extract_plot_data',
                                                                               'dabest/forest_plot.py'),
                                     'dabest.forest_plot.forest_plot': ('API/forest_plot.html#forest_plot', 'dabest/forest_plot.py'),