|
213 | 213 | "\n", |
214 | 214 | " return out\n", |
215 | 215 | "\n", |
216 | | - "@njit(cache=True) # parallelization must be turned off for random number generation\n", |
217 | | - "def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired):\n", |
| 216 | + "\n", |
| 217 | + "@njit(cache=True)\n", |
| 218 | + "def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_paired, proportional=False):\n", |
| 219 | + " \"\"\"\n", |
| 220 | + " Compute bootstrapped differences for delta-delta, handling both regular and proportional data\n", |
| 221 | + " \"\"\"\n", |
218 | 222 | " np.random.seed(rng_seed)\n", |
219 | | - " out_delta_g = np.empty(resamples)\n", |
220 | 223 | " deltadelta = np.empty(resamples)\n", |
| 224 | + " out_delta_g = np.empty(resamples)\n", |
221 | 225 | " \n", |
222 | 226 | " n1, n2, n3, n4 = len(x1), len(x2), len(x3), len(x4)\n", |
223 | | - " if is_paired:\n", |
224 | | - " if n1 != n2 or n3 != n4:\n", |
225 | | - " raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n", |
226 | | - " \n", |
| 227 | + " if is_paired and (n1 != n2 or n3 != n4):\n", |
| 228 | + " raise ValueError(\"Each control group must have the same length as its corresponding test group in paired analysis.\")\n", |
227 | 229 | "\n", |
228 | 230 | " # Bootstrapping\n", |
229 | 231 | " for i in range(resamples):\n", |
230 | 232 | " # Paired or unpaired resampling\n", |
231 | 233 | " if is_paired:\n", |
232 | | - " indices_1 = np.random.choice(len(x1),len(x1))\n", |
233 | | - " indices_2 = np.random.choice(len(x3),len(x3))\n", |
| 234 | + " indices_1 = np.random.choice(len(x1), len(x1))\n", |
| 235 | + " indices_2 = np.random.choice(len(x3), len(x3))\n", |
234 | 236 | " x1_sample, x2_sample = x1[indices_1], x2[indices_1]\n", |
235 | 237 | " x3_sample, x4_sample = x3[indices_2], x4[indices_2]\n", |
236 | 238 | " else:\n", |
|
241 | 243 | " x1_sample, x2_sample = x1[indices_1], x2[indices_2]\n", |
242 | 244 | " x3_sample, x4_sample = x3[indices_3], x4[indices_4]\n", |
243 | 245 | "\n", |
244 | | - " # Calculating deltas\n", |
| 246 | + " # Calculate deltas\n", |
245 | 247 | " delta_1 = np.mean(x2_sample) - np.mean(x1_sample)\n", |
246 | 248 | " delta_2 = np.mean(x4_sample) - np.mean(x3_sample)\n", |
247 | 249 | " delta_delta = delta_2 - delta_1\n", |
248 | | - "\n", |
| 250 | + " \n", |
249 | 251 | " deltadelta[i] = delta_delta\n", |
250 | | - " out_delta_g[i] = delta_delta / pooled_sd\n", |
| 252 | + "\n", |
| 253 | + " out_delta_g[i] = delta_delta if proportional else delta_delta/pooled_sd\n", |
251 | 254 | "\n", |
252 | 255 | " return out_delta_g, deltadelta\n", |
253 | 256 | "\n", |
|
258 | 261 | " x3: np.ndarray, # Control group 2\n", |
259 | 262 | " x4: np.ndarray, # Test group 2\n", |
260 | 263 | " is_paired: str = None,\n", |
261 | | - " resamples: int = 5000, # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.\n", |
262 | | - " random_seed: int = 12345, # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.\n", |
263 | | - ") -> (\n", |
264 | | - " tuple\n", |
265 | | - "): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta\n", |
| 264 | + " resamples: int = 5000,\n", |
| 265 | + " random_seed: int = 12345,\n", |
| 266 | + " proportional: bool = False\n", |
| 267 | + ") -> tuple:\n", |
266 | 268 | " \"\"\"\n", |
267 | | - " Bootstraps the effect size deltas' g.\n", |
268 | | - "\n", |
| 269 | + " Bootstraps the effect size deltas' g or proportional delta-delta\n", |
269 | 270 | " \"\"\"\n", |
270 | | - "\n", |
271 | 271 | " x1, x2, x3, x4 = map(np.asarray, [x1, x2, x3, x4])\n", |
272 | | - "\n", |
273 | | - " # Calculating pooled sample standard deviation\n", |
274 | | - " stds = [np.std(x) for x in [x1, x2, x3, x4]]\n", |
275 | | - " ns = [len(x) for x in [x1, x2, x3, x4]]\n", |
276 | | - "\n", |
277 | | - " sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))\n", |
278 | | - " sd_denominator = sum(n - 1 for n in ns)\n", |
279 | | - "\n", |
280 | | - " # Avoid division by zero\n", |
281 | | - " if sd_denominator == 0:\n", |
282 | | - " raise ValueError(\"Insufficient data to compute pooled standard deviation.\")\n", |
283 | | - "\n", |
284 | | - " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", |
285 | | - "\n", |
286 | | - " # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)\n", |
287 | | - " if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n", |
288 | | - " raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n", |
289 | | - "\n", |
290 | | - " out_delta_g, deltadelta = delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired)\n", |
291 | | - "\n", |
292 | | - " # Empirical delta_g calculation\n", |
293 | | - " delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n", |
| 272 | + " \n", |
| 273 | + " if proportional:\n", |
| 274 | + " # For proportional data, pass 1.0 as dummy pooled_sd (won't be used)\n", |
| 275 | + " out_delta_g, deltadelta = delta2_bootstrap_loop(\n", |
| 276 | + " x1, x2, x3, x4, resamples, 1.0, random_seed, is_paired, proportional=True\n", |
| 277 | + " )\n", |
| 278 | + " # For proportional data, delta_g is the empirical delta-delta\n", |
| 279 | + " delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1)))\n", |
| 280 | + " else:\n", |
| 281 | + " # Calculate pooled sample standard deviation for non-proportional data\n", |
| 282 | + " stds = [np.std(x) for x in [x1, x2, x3, x4]]\n", |
| 283 | + " ns = [len(x) for x in [x1, x2, x3, x4]]\n", |
| 284 | + " \n", |
| 285 | + " sd_numerator = sum((n - 1) * s**2 for n, s in zip(ns, stds))\n", |
| 286 | + " sd_denominator = sum(n - 1 for n in ns)\n", |
| 287 | + " \n", |
| 288 | + " if sd_denominator == 0:\n", |
| 289 | + " raise ValueError(\"Insufficient data to compute pooled standard deviation.\")\n", |
| 290 | + " \n", |
| 291 | + " pooled_sample_sd = np.sqrt(sd_numerator / sd_denominator)\n", |
| 292 | + " \n", |
| 293 | + " if np.isnan(pooled_sample_sd) or pooled_sample_sd == 0:\n", |
| 294 | + " raise ValueError(\"Pooled sample standard deviation is NaN or zero.\")\n", |
| 295 | + " \n", |
| 296 | + " out_delta_g, deltadelta = delta2_bootstrap_loop(\n", |
| 297 | + " x1, x2, x3, x4, resamples, pooled_sample_sd, random_seed, is_paired, proportional=False\n", |
| 298 | + " )\n", |
| 299 | + " delta_g = ((np.mean(x4) - np.mean(x3)) - (np.mean(x2) - np.mean(x1))) / pooled_sample_sd\n", |
294 | 300 | "\n", |
295 | 301 | " return out_delta_g, delta_g, deltadelta\n", |
296 | 302 | "\n", |
|
0 commit comments