Skip to content

Commit 6ff4592

Browse files
Jacobluke-JAnns98
authored andcommitted
Sync changes to nbdev notebooks
1 parent 8c7a33f commit 6ff4592

File tree

6 files changed

+98
-63
lines changed

6 files changed

+98
-63
lines changed

nbs/API/confint_2group_diff.ipynb

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -213,24 +213,26 @@
213213
"\n",
214214
" return out\n",
215215
"\n",
216-
"@njit(cache=True) # parallelization must be turned off for random number generation\n",
217-
"def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired):\n",
216+
"\n",
217+
"@njit(cache=True)\n",
218+
"def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired, proportional=False):\n",
219+
" \"\"\"\n",
220+
" Compute bootstrapped differences for delta-delta, handling both regular and proportional data\n",
221+
" \"\"\"\n",
218222
" np.random.seed(rng_seed)\n",
219-
" out_delta_g = np.empty(resamples)\n",
220223
" deltadelta = np.empty(resamples)\n",
224+
" out_delta_g = np.empty(resamples)\n",
221225
" \n",
222226
" n1, n2, n3, n4 = len(x1), len(x2), len(x3), len(x4)\n",
223-
" if is_paired:\n",
224-
" if n1 != n2 or n3 != n4:\n",
225-
" raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n",
226-
" \n",
227+
" if is_paired and (n1 != n2 or n3 != n4):\n",
228+
" raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n",
227229
"\n",
228230
" # Bootstrapping\n",
229231
" for i in range(resamples):\n",
230232
" # Paired or unpaired resampling\n",
231233
" if is_paired:\n",
232-
" indices_1 = np.random.choice(len(x1),len(x1))\n",
233-
" indices_2 = np.random.choice(len(x3),len(x3))\n",
234+
" indices_1 = np.random.choice(len(x1), len(x1))\n",
235+
" indices_2 = np.random.choice(len(x3), len(x3))\n",
234236
" x1_sample, x2_sample = x1[indices_1], x2[indices_1]\n",
235237
" x3_sample, x4_sample = x3[indices_2], x4[indices_2]\n",
236238
" else:\n",
@@ -241,13 +243,14 @@
241243
" x1_sample, x2_sample = x1[indices_1], x2[indices_2]\n",
242244
" x3_sample, x4_sample = x3[indices_3], x4[indices_4]\n",
243245
"\n",
244-
" # Calculating deltas\n",
246+
" # Calculate deltas\n",
245247
" delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n",
246248
" delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n",
247249
" delta_delta = delta_2 - delta_1\n",
248-
"\n",
250+
" \n",
249251
" deltadelta[i] = delta_delta\n",
250-
" out_delta_g[i] = delta_delta / pooled_sd\n",
252+
"\n",
253+
" out_delta_g[i] = delta_delta if proportional else delta_delta/pooled_sd\n",
251254
"\n",
252255
" return out_delta_g, deltadelta\n",
253256
"\n",
@@ -258,39 +261,42 @@
258261
" x3: np.ndarray, # Control group 2\n",
259262
" x4: np.ndarray, # Test group 2\n",
260263
" is_paired: str = None,\n",
261-
" resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n",
262-
" random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n",
263-
") -> (\n",
264-
" tuple\n",
265-
"): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n",
264+
" resamples: int = 5000,\n",
265+
" random_seed: int = 12345,\n",
266+
" proportional: bool = False\n",
267+
") -> tuple:\n",
266268
" \"\"\"\n",
267-
" Bootstraps the effect size deltas' g.\n",
268-
"\n",
269+
" Bootstraps the effect size deltas' g or proportional delta-delta\n",
269270
" \"\"\"\n",
270-
"\n",
271271
" x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4])\n",
272-
"\n",
273-
" # Calculating pooled sample standard deviation\n",
274-
" stds = [np.std(x) for x in [x1, x2, x3, x4]]\n",
275-
" ns = [len(x) for x in [x1, x2, x3, x4]]\n",
276-
"\n",
277-
" sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))\n",
278-
" sd_denominator = sum(n - 1 for n in ns)\n",
279-
"\n",
280-
" # Avoid division by zero\n",
281-
" if sd_denominator == 0:\n",
282-
" raise ValueError(\"Insufficient data to compute pooled standard deviation.\")\n",
283-
"\n",
284-
" pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n",
285-
"\n",
286-
" # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)\n",
287-
" if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n",
288-
" raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n",
289-
"\n",
290-
" out_delta_g, deltadelta = delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired)\n",
291-
"\n",
292-
" # Empirical delta_g calculation\n",
293-
" delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n",
272+
" \n",
273+
" if proportional:\n",
274+
" # For proportional data, pass 1.0 as dummy pooled_sd (won't be used)\n",
275+
" out_delta_g, deltadelta = delta2_bootstrap_loop(\n",
276+
" x1, x2, x3, x4, resamples, 1.0, random_seed, is_paired, proportional=True\n",
277+
" )\n",
278+
" # For proportional data, delta_g is the empirical delta-delta\n",
279+
" delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)))\n",
280+
" else:\n",
281+
" # Calculate pooled sample standard deviation for non-proportional data\n",
282+
" stds = [np.std(x) for x in [x1, x2, x3, x4]]\n",
283+
" ns = [len(x) for x in [x1, x2, x3, x4]]\n",
284+
" \n",
285+
" sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))\n",
286+
" sd_denominator = sum(n - 1 for n in ns)\n",
287+
" \n",
288+
" if sd_denominator == 0:\n",
289+
" raise ValueError(\"Insufficient data to compute pooled standard deviation.\")\n",
290+
" \n",
291+
" pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n",
292+
" \n",
293+
" if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n",
294+
" raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n",
295+
" \n",
296+
" out_delta_g, deltadelta = delta2_bootstrap_loop(\n",
297+
" x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired, proportional=False\n",
298+
" )\n",
299+
" delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n",
294300
"\n",
295301
" return out_delta_g, delta_g, deltadelta\n",
296302
"\n",

nbs/API/dabest_object.ipynb

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
"source": [
6565
"#| export\n",
6666
"# Import standard data science libraries\n",
67+
"import warnings\n",
6768
"from numpy import array, repeat, random, issubdtype, number\n",
6869
"import numpy as np\n",
6970
"import pandas as pd\n",
@@ -138,7 +139,6 @@
138139
"\n",
139140
" # Check if there is NaN under any of the paired settings\n",
140141
" if self.__is_paired and self.__output_data.isnull().values.any():\n",
141-
" import warnings\n",
142142
" warn1 = f\"NaN values detected under paired setting and removed,\"\n",
143143
" warn2 = f\" please check your data.\"\n",
144144
" warnings.warn(warn1 + warn2)\n",
@@ -576,10 +576,10 @@
576576
" if x is None:\n",
577577
" error_msg = \"If `delta2` is True. `x` parameter cannot be None. String or list expected\"\n",
578578
" raise ValueError(error_msg)\n",
579-
" \n",
579+
" \n",
580580
" if self.__proportional:\n",
581-
" err0 = \"`proportional` and `delta2` cannot be True at the same time.\"\n",
582-
" raise ValueError(err0)\n",
581+
" mes1 = \"Only mean_diff is supported for proportional data when `delta2` is True\"\n",
582+
" warnings.warn(message=mes1, category=UserWarning)\n",
583583
"\n",
584584
" # idx should not be specified\n",
585585
" if idx:\n",
@@ -657,8 +657,6 @@
657657
" \"\"\"\n",
658658
" # Check if there is NaN under any of the paired settings\n",
659659
" if self.__is_paired is not None and self.__output_data.isnull().values.any():\n",
660-
" print(\"Nan\")\n",
661-
" import warnings\n",
662660
" warn1 = f\"NaN values detected under paired setting and removed,\"\n",
663661
" warn2 = f\" please check your data.\"\n",
664662
" warnings.warn(warn1 + warn2)\n",
@@ -710,7 +708,6 @@
710708
"\n",
711709
" # Check if there is NaN under any of the paired settings\n",
712710
" if self.__is_paired is not None and self.__output_data.isnull().values.any():\n",
713-
" import warnings\n",
714711
" warn1 = f\"NaN values detected under paired setting and removed,\"\n",
715712
" warn2 = f\" please check your data.\"\n",
716713
" warnings.warn(warn1 + warn2)\n",

nbs/API/effsize_objects.ipynb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,8 @@
317317
" raise ValueError(err1)\n",
318318
"\n",
319319
" if self.__proportional and self.__effect_size not in [\"mean_diff\", \"cohens_h\"]:\n",
320-
" err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\"\n",
320+
" err1 = \"`proportional` is True; therefore effect size other than mean_diff and cohens_h is not defined.\" + \\\n",
321+
" \"If you are calculating deltas' g, it's the same as delta-delta when `proportional` is True\"\n",
321322
" raise ValueError(err1)\n",
322323
"\n",
323324
" if self.__proportional and (\n",
@@ -1043,6 +1044,7 @@
10431044
" self.__is_paired,\n",
10441045
" self.__resamples,\n",
10451046
" self.__random_seed,\n",
1047+
" self.__proportional,\n",
10461048
" )\n",
10471049
"\n",
10481050
" for j, current_tuple in enumerate(idx):\n",

nbs/API/misc_tools.ipynb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -643,12 +643,12 @@
643643
" if color_by_subgroups:\n",
644644
" plot_palette_raw = dict()\n",
645645
" plot_palette_contrast = dict()\n",
646-
" # plot_palette_bar set to None because currently there is no empty_circle toggle for proportion plots\n",
647-
" plot_palette_bar = None\n",
646+
" plot_palette_bar = dict()\n",
648647
" for i in range(len(idx)):\n",
649648
" for names_i in idx[i]:\n",
650649
" plot_palette_raw[names_i] = swarm_colors[i]\n",
651650
" plot_palette_contrast[names_i] = contrast_colors[i]\n",
651+
" plot_palette_bar[names_i] = bar_color[i]\n",
652652
" else:\n",
653653
" plot_palette_raw = dict(zip(categories, swarm_colors))\n",
654654
" plot_palette_contrast = dict(zip(categories, contrast_colors))\n",
@@ -665,11 +665,12 @@
665665
" if color_by_subgroups:\n",
666666
" plot_palette_raw = dict()\n",
667667
" plot_palette_contrast = dict()\n",
668-
" plot_palette_bar = None # plot_palette_bar set to None because currently there is no empty_circle toggle for proportion plots\n",
668+
" plot_palette_bar = dict()\n",
669669
" for i in range(len(idx)):\n",
670670
" for names_i in idx[i]:\n",
671671
" plot_palette_raw[names_i] = swarm_colors[i]\n",
672672
" plot_palette_contrast[names_i] = contrast_colors[i]\n",
673+
" plot_palette_bar[names_i] = bar_color[i]\n",
673674
" else:\n",
674675
" plot_palette_raw = dict(zip(names, swarm_colors))\n",
675676
" plot_palette_contrast = dict(zip(names, contrast_colors))\n",
@@ -1071,6 +1072,7 @@
10711072
" ticks_with_counts.append(f\"{t}\\n(N={value})\")\n",
10721073
"\n",
10731074
" fontsize_rawxlabel = plot_kwargs.get(\"fontsize_rawxlabel\")\n",
1075+
" set_major_loc_method(plt.FixedLocator(get_ticks()))\n",
10741076
" set_label(ticks_with_counts, fontsize=fontsize_rawxlabel)\n",
10751077
"\n",
10761078
" # Ensure ticks are at the correct locations\n",

nbs/API/plot_tools.ipynb

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -781,14 +781,17 @@
781781
" right_idx = []\n",
782782
" # Design for Sankey Flow Diagram\n",
783783
" sankey_idx = (\n",
784-
" [\n",
785-
" (control, test)\n",
786-
" for i in idx\n",
787-
" for control, test in zip(i[:], (i[1:] + (i[0],)))\n",
788-
" ]\n",
789-
" if flow\n",
790-
" else temp_idx\n",
791-
" )\n",
784+
" [\n",
785+
" (control, test)\n",
786+
" for i in idx\n",
787+
" for control, test in zip(\n",
788+
" i[:],\n",
789+
" (tuple(i[1:]) + (i[0],)) if isinstance(i, tuple) else (list(i[1:]) + [i[0]])\n",
790+
" )\n",
791+
" ]\n",
792+
" if flow\n",
793+
" else temp_idx\n",
794+
")\n",
792795
" for i in sankey_idx:\n",
793796
" left_idx.append(i[0])\n",
794797
" right_idx.append(i[1])\n",
@@ -2115,6 +2118,7 @@
21152118
" plot_data: pd.DataFrame, \n",
21162119
" bar_color: str, \n",
21172120
" plot_palette_bar: dict, \n",
2121+
" color_col: str,\n",
21182122
" plot_kwargs: dict, \n",
21192123
" barplot_kwargs: dict, \n",
21202124
" horizontal: bool\n",
@@ -2138,6 +2142,8 @@
21382142
" Color of the bar.\n",
21392143
" plot_palette_bar : dict\n",
21402144
" Dictionary of colors used in the bar plot.\n",
2145+
" color_col : str\n",
2146+
" Column name of the color column.\n",
21412147
" plot_kwargs : dict\n",
21422148
" Keyword arguments for the plot.\n",
21432149
" barplot_kwargs : dict\n",
@@ -2152,7 +2158,26 @@
21522158
" else:\n",
21532159
" x_var, y_var, orient = all_plot_groups, np.ones(len(all_plot_groups)), \"v\"\n",
21542160
"\n",
2155-
" bar1_df = pd.DataFrame({xvar: x_var, \"proportion\": y_var})\n",
2161+
" # Create bar1_df with basic columns\n",
2162+
" bar1_df = pd.DataFrame({\n",
2163+
" xvar: x_var, \n",
2164+
" \"proportion\": y_var\n",
2165+
" })\n",
2166+
"\n",
2167+
" # Handle colors\n",
2168+
" if color_col:\n",
2169+
" # Get first color value for each group\n",
2170+
" color_mapping = plot_data.groupby(xvar, observed=False)[color_col].first()\n",
2171+
" bar1_df[color_col] = [color_mapping.get(group) for group in all_plot_groups]\n",
2172+
" \n",
2173+
" # Map colors, defaulting to bar_color if no match\n",
2174+
" edge_colors = [\n",
2175+
" plot_palette_bar.get(hue_val, bar_color) \n",
2176+
" for hue_val in bar1_df[color_col]\n",
2177+
" ]\n",
2178+
" else:\n",
2179+
" edge_colors = bar_color\n",
2180+
"\n",
21562181
"\n",
21572182
" bar1 = sns.barplot(\n",
21582183
" data=bar1_df,\n",
@@ -2162,7 +2187,7 @@
21622187
" order=all_plot_groups,\n",
21632188
" linewidth=2,\n",
21642189
" facecolor=(1, 1, 1, 0),\n",
2165-
" edgecolor=bar_color,\n",
2190+
" edgecolor=edge_colors,\n",
21662191
" zorder=1,\n",
21672192
" orient=orient,\n",
21682193
" )\n",
@@ -2173,6 +2198,8 @@
21732198
" ax=rawdata_axes,\n",
21742199
" order=all_plot_groups,\n",
21752200
" palette=plot_palette_bar,\n",
2201+
" hue=color_col,\n",
2202+
" dodge=False,\n",
21762203
" zorder=1,\n",
21772204
" orient=orient,\n",
21782205
" **barplot_kwargs\n",

nbs/API/plotter.ipynb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@
334334
" plot_data = plot_data, \n",
335335
" bar_color = bar_color, \n",
336336
" plot_palette_bar = plot_palette_bar, \n",
337+
" color_col = color_col,\n",
337338
" plot_kwargs = plot_kwargs, \n",
338339
" barplot_kwargs = barplot_kwargs,\n",
339340
" horizontal = horizontal,\n",

0 commit comments

Comments
 (0)