@@ -159,24 +159,26 @@ def compute_bootstrapped_diff(
159159
160160 return out
161161
162- @njit (cache = True ) # parallelization must be turned off for random number generation
163- def delta2_bootstrap_loop (x1 , x2 , x3 , x4 , resamples , pooled_sd , rng_seed , is_paired ):
162+
163+ @njit (cache = True )
164+ def delta2_bootstrap_loop (x1 , x2 , x3 , x4 , resamples , pooled_sd , rng_seed , is_paired , proportional = False ):
165+ """
166+ Compute bootstrapped differences for delta-delta, handling both regular and proportional data
167+ """
164168 np .random .seed (rng_seed )
165- out_delta_g = np .empty (resamples )
166169 deltadelta = np .empty (resamples )
170+ out_delta_g = np .empty (resamples )
167171
168172 n1 , n2 , n3 , n4 = len (x1 ), len (x2 ), len (x3 ), len (x4 )
169- if is_paired :
170- if n1 != n2 or n3 != n4 :
171- raise ValueError ("Each control group must have the same length as its corresponding test group in paired analysis." )
172-
173+ if is_paired and (n1 != n2 or n3 != n4 ):
174+ raise ValueError ("Each control group must have the same length as its corresponding test group in paired analysis." )
173175
174176 # Bootstrapping
175177 for i in range (resamples ):
176178 # Paired or unpaired resampling
177179 if is_paired :
178- indices_1 = np .random .choice (len (x1 ),len (x1 ))
179- indices_2 = np .random .choice (len (x3 ),len (x3 ))
180+ indices_1 = np .random .choice (len (x1 ), len (x1 ))
181+ indices_2 = np .random .choice (len (x3 ), len (x3 ))
180182 x1_sample , x2_sample = x1 [indices_1 ], x2 [indices_1 ]
181183 x3_sample , x4_sample = x3 [indices_2 ], x4 [indices_2 ]
182184 else :
@@ -187,13 +189,14 @@ def delta2_bootstrap_loop(x1, x2, x3, x4, resamples, pooled_sd, rng_seed, is_pai
187189 x1_sample , x2_sample = x1 [indices_1 ], x2 [indices_2 ]
188190 x3_sample , x4_sample = x3 [indices_3 ], x4 [indices_4 ]
189191
190- # Calculating deltas
192+ # Calculate deltas
191193 delta_1 = np .mean (x2_sample ) - np .mean (x1_sample )
192194 delta_2 = np .mean (x4_sample ) - np .mean (x3_sample )
193195 delta_delta = delta_2 - delta_1
194-
196+
195197 deltadelta [i ] = delta_delta
196- out_delta_g [i ] = delta_delta / pooled_sd
198+
199+ out_delta_g [i ] = delta_delta if proportional else delta_delta / pooled_sd
197200
198201 return out_delta_g , deltadelta
199202
@@ -204,39 +207,42 @@ def compute_delta2_bootstrapped_diff(
204207 x3 : np .ndarray , # Control group 2
205208 x4 : np .ndarray , # Test group 2
206209 is_paired : str = None ,
207- resamples : int = 5000 , # The number of bootstrap resamples to be taken for the calculation of the confidence interval limits.
208- random_seed : int = 12345 , # `random_seed` is used to seed the random number generator during bootstrap resampling. This ensures that the confidence intervals reported are replicable.
209- ) -> (
210- tuple
211- ): # bootstraped result and empirical result of deltas' g, and the bootstraped result of delta-delta
210+ resamples : int = 5000 ,
211+ random_seed : int = 12345 ,
212+ proportional : bool = False
213+ ) -> tuple :
212214 """
213- Bootstraps the effect size deltas' g.
214-
215+ Bootstraps the effect size deltas' g or proportional delta-delta
215216 """
216-
217217 x1 , x2 , x3 , x4 = map (np .asarray , [x1 , x2 , x3 , x4 ])
218-
219- # Calculating pooled sample standard deviation
220- stds = [np .std (x ) for x in [x1 , x2 , x3 , x4 ]]
221- ns = [len (x ) for x in [x1 , x2 , x3 , x4 ]]
222-
223- sd_numerator = sum ((n - 1 ) * s ** 2 for n , s in zip (ns , stds ))
224- sd_denominator = sum (n - 1 for n in ns )
225-
226- # Avoid division by zero
227- if sd_denominator == 0 :
228- raise ValueError ("Insufficient data to compute pooled standard deviation." )
229-
230- pooled_sample_sd = np .sqrt (sd_numerator / sd_denominator )
231-
232- # Ensure pooled_sample_sd is not NaN or zero (to avoid division by zero later)
233- if np .isnan (pooled_sample_sd ) or pooled_sample_sd == 0 :
234- raise ValueError ("Pooled sample standard deviation is NaN or zero." )
235-
236- out_delta_g , deltadelta = delta2_bootstrap_loop (x1 , x2 , x3 , x4 , resamples , pooled_sample_sd , random_seed , is_paired )
237-
238- # Empirical delta_g calculation
239- delta_g = ((np .mean (x4 ) - np .mean (x3 )) - (np .mean (x2 ) - np .mean (x1 ))) / pooled_sample_sd
218+
219+ if proportional :
220+ # For proportional data, pass 1.0 as dummy pooled_sd (won't be used)
221+ out_delta_g , deltadelta = delta2_bootstrap_loop (
222+ x1 , x2 , x3 , x4 , resamples , 1.0 , random_seed , is_paired , proportional = True
223+ )
224+ # For proportional data, delta_g is the empirical delta-delta
225+ delta_g = ((np .mean (x4 ) - np .mean (x3 )) - (np .mean (x2 ) - np .mean (x1 )))
226+ else :
227+ # Calculate pooled sample standard deviation for non-proportional data
228+ stds = [np .std (x ) for x in [x1 , x2 , x3 , x4 ]]
229+ ns = [len (x ) for x in [x1 , x2 , x3 , x4 ]]
230+
231+ sd_numerator = sum ((n - 1 ) * s ** 2 for n , s in zip (ns , stds ))
232+ sd_denominator = sum (n - 1 for n in ns )
233+
234+ if sd_denominator == 0 :
235+ raise ValueError ("Insufficient data to compute pooled standard deviation." )
236+
237+ pooled_sample_sd = np .sqrt (sd_numerator / sd_denominator )
238+
239+ if np .isnan (pooled_sample_sd ) or pooled_sample_sd == 0 :
240+ raise ValueError ("Pooled sample standard deviation is NaN or zero." )
241+
242+ out_delta_g , deltadelta = delta2_bootstrap_loop (
243+ x1 , x2 , x3 , x4 , resamples , pooled_sample_sd , random_seed , is_paired , proportional = False
244+ )
245+ delta_g = ((np .mean (x4 ) - np .mean (x3 )) - (np .mean (x2 ) - np .mean (x1 ))) / pooled_sample_sd
240246
241247 return out_delta_g , delta_g , deltadelta
242248
0 commit comments