Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion dabest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
from ._api import load, prop_dataset
from ._stats_tools import effsize as effsize
from ._stats_tools import confint_2group_diff as ci_2g
from ._effsize_objects import TwoGroupsEffectSize, PermutationTest
from ._dabest_object import Dabest

__version__ = "2024.03.29"

import os
if os.environ.get('SKIP_NUMBA_COMPILE') != '1':
from ._stats_tools.precompile import precompile_all, _NUMBA_COMPILED
if not _NUMBA_COMPILED:
precompile_all()

__version__ = "2024.03.30"
11 changes: 8 additions & 3 deletions dabest/_bootstrap_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ def __init__(
reps: int = 5000, # Number of bootstrap iterations to perform.
):
# Turn to pandas series.
x1 = pd.Series(x1).dropna()
# x1 = pd.Series(x1).dropna()
x1 = x1[~np.isnan(x1)]

diff = False

# Initialise stat_function
Expand All @@ -89,7 +91,9 @@ def __init__(
if x2 is None:
raise ValueError("Please specify x2.")

x2 = pd.Series(x2).dropna()
# x2 = pd.Series(x2).dropna()
x2 = x1[~np.isnan(x2)]

if len(x1) != len(x2):
raise ValueError("x1 and x2 are not the same length.")

Expand Down Expand Up @@ -134,7 +138,8 @@ def __init__(

elif x2 is not None and paired is None:
diff = True
x2 = pd.Series(x2).dropna()
# x2 = pd.Series(x2).dropna()
x2 = x2[~np.isnan(x2)]
# Generate statarrays for both arrays.
ref_statarray = sns.algorithms.bootstrap(x1, **sns_bootstrap_kwargs)
exp_statarray = sns.algorithms.bootstrap(x2, **sns_bootstrap_kwargs)
Expand Down
4 changes: 2 additions & 2 deletions dabest/_dabest_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(
# Determine the kind of estimation plot we need to produce.
if all([isinstance(i, (str, int, float)) for i in idx]):
# flatten out idx.
all_plot_groups = pd.unique(pd.Series([t for t in idx])).tolist()
all_plot_groups = pd.Series([t for t in idx]).unique().tolist()
if len(idx) > len(all_plot_groups):
err0 = "`idx` contains duplicated groups. Please remove any duplicates and try again."
raise ValueError(err0)
Expand All @@ -122,7 +122,7 @@ def __init__(
self.__idx = (idx,)

elif all([isinstance(i, (tuple, list)) for i in idx]):
all_plot_groups = pd.unique(pd.Series([tt for t in idx for tt in t])).tolist()
all_plot_groups = pd.Series([tt for t in idx for tt in t]).unique().tolist()

actual_groups_given = sum([len(i) for i in idx])

Expand Down
7 changes: 4 additions & 3 deletions dabest/_delta_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,14 @@ def __init__(self, effectsizedataframe, permutation_count,
# compute the variances of each control group and each test group
control_var=[]
test_var=[]
grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}
for j, current_tuple in enumerate(idx):
cname = current_tuple[0]
control = dat[dat[xvar] == cname][yvar].copy()
control = grouped_data[cname]
control_var.append(np.var(control, ddof=1))

tname = current_tuple[1]
test = dat[dat[xvar] == tname][yvar].copy()
test = grouped_data[tname]
test_var.append(np.var(test, ddof=1))
self.__control_var = np.array(control_var)
self.__test_var = np.array(test_var)
Expand All @@ -414,7 +415,7 @@ def __init__(self, effectsizedataframe, permutation_count,
self.__bootstraps)

# Compute the weighted average mean difference based on the raw data
self.__difference = es.weighted_delta(self.__effsizedf["difference"],
self.__difference = es.weighted_delta(np.array(self.__effsizedf["difference"]),
self.__group_var)

sorted_weighted_deltas = npsort(self.__bootstraps_weighted_delta)
Expand Down
39 changes: 24 additions & 15 deletions dabest/_effsize_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pandas as pd
import lqrt
from scipy.stats import norm
import numpy as np
from numpy import array, isnan, isinf, repeat, random, isin, abs, var
from numpy import sort as npsort
from numpy import nan as npnan
Expand Down Expand Up @@ -357,12 +358,17 @@ def _perform_statistical_test(self):
# References:
# https://en.wikipedia.org/wiki/McNemar%27s_test

df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
table = [[x1, x2], [x3, x4]]
# df_temp = pd.DataFrame({"control": self.__control, "test": self.__test})
# x1 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 0)])
# x2 = len(df_temp[(df_temp["control"] == 0) & (df_temp["test"] == 1)])
# x3 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 0)])
# x4 = len(df_temp[(df_temp["control"] == 1) & (df_temp["test"] == 1)])
# table = [[x1, x2], [x3, x4]]
x1 = np.sum((self.__control == 0) & (self.__test == 0))
x2 = np.sum((self.__control == 0) & (self.__test == 1))
x3 = np.sum((self.__control == 1) & (self.__test == 0))
x4 = np.sum((self.__control == 1) & (self.__test == 1))
table = np.array([[x1, x2], [x3, x4]])
_mcnemar = mcnemar(table, exact=True, correction=True)
self.__pvalue_mcnemar = _mcnemar.pvalue
self.__statistic_mcnemar = _mcnemar.statistic
Expand Down Expand Up @@ -861,18 +867,19 @@ def __pre_calc(self):
out = []
reprs = []

grouped_data = {name: group[yvar].copy() for name, group in dat.groupby(xvar, observed=False)}
if self.__delta2:
mixed_data = []
for j, current_tuple in enumerate(idx):
if self.__is_paired != "sequential":
cname = current_tuple[0]
control = dat[dat[xvar] == cname][yvar].copy()
control = grouped_data[cname]

for ix, tname in enumerate(current_tuple[1:]):
if self.__is_paired == "sequential":
cname = current_tuple[ix]
control = dat[dat[xvar] == cname][yvar].copy()
test = dat[dat[xvar] == tname][yvar].copy()
control = grouped_data[cname]
test = grouped_data[tname]
mixed_data.append(control)
mixed_data.append(test)
bootstraps_delta_delta = ci2g.compute_delta2_bootstrapped_diff(
Expand All @@ -888,13 +895,13 @@ def __pre_calc(self):
for j, current_tuple in enumerate(idx):
if self.__is_paired != "sequential":
cname = current_tuple[0]
control = dat[dat[xvar] == cname][yvar].copy()
control = grouped_data[cname]

for ix, tname in enumerate(current_tuple[1:]):
if self.__is_paired == "sequential":
cname = current_tuple[ix]
control = dat[dat[xvar] == cname][yvar].copy()
test = dat[dat[xvar] == tname][yvar].copy()
control = grouped_data[cname]
test = grouped_data[tname]

result = TwoGroupsEffectSize(
control,
Expand Down Expand Up @@ -1055,16 +1062,18 @@ def __calc_lqrt(self):

out = []

grouped_data = {name:group[yvar].copy() for name, group in dat.groupby(xvar)}

for j, current_tuple in enumerate(db_obj.idx):
if self.__is_paired != "sequential":
cname = current_tuple[0]
control = dat[dat[xvar] == cname][yvar].copy()
control = grouped_data[cname]

for ix, tname in enumerate(current_tuple[1:]):
if self.__is_paired == "sequential":
cname = current_tuple[ix]
control = dat[dat[xvar] == cname][yvar].copy()
test = dat[dat[xvar] == tname][yvar].copy()
control = grouped_data[cname]
test = grouped_data[tname]

if self.__is_paired:
# Refactored here in v0.3.0 for performance issues.
Expand Down
14 changes: 12 additions & 2 deletions dabest/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
'dabest/_stats_tools/confint_2group_diff.py'),
'dabest._stats_tools.confint_2group_diff._create_two_group_jackknife_indexes': ( 'API/confint_2group_diff.html#_create_two_group_jackknife_indexes',
'dabest/_stats_tools/confint_2group_diff.py'),
'dabest._stats_tools.confint_2group_diff.bootstrap_indices': ( 'API/confint_2group_diff.html#bootstrap_indices',
'dabest/_stats_tools/confint_2group_diff.py'),
'dabest._stats_tools.confint_2group_diff.calculate_group_var': ( 'API/confint_2group_diff.html#calculate_group_var',
'dabest/_stats_tools/confint_2group_diff.py'),
'dabest._stats_tools.confint_2group_diff.calculate_weighted_delta': ( 'API/confint_2group_diff.html#calculate_weighted_delta',
Expand All @@ -42,11 +44,17 @@
'dabest._stats_tools.confint_2group_diff.create_jackknife_indexes': ( 'API/confint_2group_diff.html#create_jackknife_indexes',
'dabest/_stats_tools/confint_2group_diff.py'),
'dabest._stats_tools.confint_2group_diff.create_repeated_indexes': ( 'API/confint_2group_diff.html#create_repeated_indexes',
'dabest/_stats_tools/confint_2group_diff.py')},
'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor',
'dabest/_stats_tools/confint_2group_diff.py'),
'dabest._stats_tools.confint_2group_diff.delta2_bootstrap_loop': ( 'API/confint_2group_diff.html#delta2_bootstrap_loop',
'dabest/_stats_tools/confint_2group_diff.py')},
'dabest._stats_tools.effsize': { 'dabest._stats_tools.effsize._cliffs_delta_core': ( 'API/effsize.html#_cliffs_delta_core',
'dabest/_stats_tools/effsize.py'),
'dabest._stats_tools.effsize._compute_hedges_correction_factor': ( 'API/effsize.html#_compute_hedges_correction_factor',
'dabest/_stats_tools/effsize.py'),
'dabest._stats_tools.effsize._compute_standardizers': ( 'API/effsize.html#_compute_standardizers',
'dabest/_stats_tools/effsize.py'),
'dabest._stats_tools.effsize._mann_whitney_u': ( 'API/effsize.html#_mann_whitney_u',
'dabest/_stats_tools/effsize.py'),
'dabest._stats_tools.effsize.cliffs_delta': ( 'API/effsize.html#cliffs_delta',
'dabest/_stats_tools/effsize.py'),
'dabest._stats_tools.effsize.cohens_d': ( 'API/effsize.html#cohens_d',
Expand All @@ -61,6 +69,8 @@
'dabest/_stats_tools/effsize.py'),
'dabest._stats_tools.effsize.weighted_delta': ( 'API/effsize.html#weighted_delta',
'dabest/_stats_tools/effsize.py')},
'dabest._stats_tools.precompile': { 'dabest._stats_tools.precompile.precompile_all': ( 'API/precompile.html#precompile_all',
'dabest/_stats_tools/precompile.py')},
'dabest.forest_plot': { 'dabest.forest_plot.extract_plot_data': ( 'API/forest_plot.html#extract_plot_data',
'dabest/forest_plot.py'),
'dabest.forest_plot.forest_plot': ('API/forest_plot.html#forest_plot', 'dabest/forest_plot.py'),
Expand Down
Loading
Loading