1+ use nalgebra_sparse:: CsrMatrix ;
2+ use num_traits:: { Float , NumCast , FromPrimitive } ;
3+ use std:: fmt:: Debug ;
4+ use single_utilities:: traits:: FloatOpsTS ;
5+
6+ /// Calculate log2 fold change between two groups
7+ pub fn calculate_log2_fold_change < T > (
8+ matrix : & CsrMatrix < T > ,
9+ row : usize ,
10+ group1_indices : & [ usize ] ,
11+ group2_indices : & [ usize ] ,
12+ pseudo_count : f64 ,
13+ ) -> anyhow:: Result < f64 >
14+ where
15+ T : FloatOpsTS ,
16+ {
17+ if group1_indices. is_empty ( ) || group2_indices. is_empty ( ) {
18+ return Err ( anyhow:: anyhow!( "Group indices cannot be empty" ) ) ;
19+ }
20+
21+ // Calculate mean for group 1
22+ let mut sum1 = 0.0 ;
23+ for & col in group1_indices {
24+ if let Some ( entry) = matrix. get_entry ( row, col) {
25+ let value = entry. into_value ( ) ;
26+ sum1 += value. to_f64 ( ) . unwrap ( ) ;
27+ }
28+ // Missing entries are treated as zero
29+ }
30+ let mean1 = sum1 / group1_indices. len ( ) as f64 + pseudo_count;
31+
32+ // Calculate mean for group 2
33+ let mut sum2 = 0.0 ;
34+ for & col in group2_indices {
35+ if let Some ( entry) = matrix. get_entry ( row, col) {
36+ let value = entry. into_value ( ) ;
37+ sum2 += value. to_f64 ( ) . unwrap ( ) ;
38+ }
39+ // Missing entries are treated as zero
40+ }
41+ let mean2 = sum2 / group2_indices. len ( ) as f64 + pseudo_count;
42+
43+ // Calculate log2 fold change
44+ let log2_fc = ( mean2 / mean1) . log2 ( ) ;
45+
46+ Ok ( log2_fc)
47+ }
48+
49+ /// Calculate Cohen's d effect size for a row
50+ pub fn calculate_cohens_d < T > (
51+ matrix : & CsrMatrix < T > ,
52+ row : usize ,
53+ group1_indices : & [ usize ] ,
54+ group2_indices : & [ usize ] ,
55+ ) -> anyhow:: Result < f64 >
56+ where
57+ T : FloatOpsTS ,
58+ {
59+ if group1_indices. len ( ) < 2 || group2_indices. len ( ) < 2 {
60+ return Err ( anyhow:: anyhow!( "Each group must have at least 2 samples for Cohen's d" ) ) ;
61+ }
62+
63+ // Extract values for group 1
64+ let mut group1_values = Vec :: with_capacity ( group1_indices. len ( ) ) ;
65+ for & col in group1_indices {
66+ if let Some ( entry) = matrix. get_entry ( row, col) {
67+ let value = entry. into_value ( ) ;
68+ group1_values. push ( value. to_f64 ( ) . unwrap ( ) ) ;
69+ } else {
70+ group1_values. push ( 0.0 ) ; // Use zero for missing entries
71+ }
72+ }
73+
74+ // Extract values for group 2
75+ let mut group2_values = Vec :: with_capacity ( group2_indices. len ( ) ) ;
76+ for & col in group2_indices {
77+ if let Some ( entry) = matrix. get_entry ( row, col) {
78+ let value = entry. into_value ( ) ;
79+ group2_values. push ( value. to_f64 ( ) . unwrap ( ) ) ;
80+ } else {
81+ group2_values. push ( 0.0 ) ; // Use zero for missing entries
82+ }
83+ }
84+
85+ // Calculate means
86+ let mean1 = group1_values. iter ( ) . sum :: < f64 > ( ) / group1_values. len ( ) as f64 ;
87+ let mean2 = group2_values. iter ( ) . sum :: < f64 > ( ) / group2_values. len ( ) as f64 ;
88+
89+ // Calculate variances
90+ let var1 = group1_values. iter ( )
91+ . map ( |& x| ( x - mean1) . powi ( 2 ) )
92+ . sum :: < f64 > ( ) / ( group1_values. len ( ) - 1 ) as f64 ;
93+
94+ let var2 = group2_values. iter ( )
95+ . map ( |& x| ( x - mean2) . powi ( 2 ) )
96+ . sum :: < f64 > ( ) / ( group2_values. len ( ) - 1 ) as f64 ;
97+
98+ // Calculate pooled standard deviation
99+ let n1 = group1_values. len ( ) as f64 ;
100+ let n2 = group2_values. len ( ) as f64 ;
101+ let pooled_sd = ( ( n1 - 1.0 ) * var1 + ( n2 - 1.0 ) * var2) . sqrt ( ) / ( ( n1 + n2 - 2.0 ) . sqrt ( ) ) ;
102+
103+ // Calculate Cohen's d
104+ let d = ( mean2 - mean1) / pooled_sd;
105+
106+ Ok ( d)
107+ }
108+
109+ /// Calculate Hedge's g (bias-corrected effect size)
110+ pub fn calculate_hedges_g < T > (
111+ matrix : & CsrMatrix < T > ,
112+ row : usize ,
113+ group1_indices : & [ usize ] ,
114+ group2_indices : & [ usize ] ,
115+ ) -> anyhow:: Result < f64 >
116+ where
117+ T : FloatOpsTS ,
118+ {
119+ // First calculate Cohen's d
120+ let d = calculate_cohens_d ( matrix, row, group1_indices, group2_indices) ?;
121+
122+ // Apply correction factor
123+ let n1 = group1_indices. len ( ) as f64 ;
124+ let n2 = group2_indices. len ( ) as f64 ;
125+ let n = n1 + n2;
126+
127+ // Correction factor J
128+ let j = 1.0 - 3.0 / ( 4.0 * ( n - 2.0 ) - 1.0 ) ;
129+
130+ // Calculate Hedge's g
131+ let g = j * d;
132+
133+ Ok ( g)
134+ }
135+
136+ #[ cfg( test) ]
137+ mod tests {
138+ use super :: * ;
139+ use approx:: assert_abs_diff_eq;
140+ use nalgebra_sparse:: { CooMatrix , CsrMatrix } ;
141+
142+ fn create_test_matrix ( ) -> CsrMatrix < f64 > {
143+ // Create a simple test matrix for differential expression analysis:
144+ // Two groups (columns 0,1,2 vs 3,4,5) with different expression patterns
145+ // Row 0: Clear difference between groups
146+ // Row 1: No difference between groups
147+ // Row 2: Moderate difference
148+ // Row 3: Extreme difference
149+ // Row 4: All zeros in group 1
150+ let rows = vec ! [
151+ 0 , 0 , 0 , 0 , 0 , 0 , // Row 0: all positions filled
152+ 1 , 1 , 1 , 1 , 1 , 1 , // Row 1: all positions filled
153+ 2 , 2 , 2 , 2 , 2 , 2 , // Row 2: all positions filled
154+ 3 , 3 , 3 , 3 , 3 , 3 , // Row 3: all positions filled
155+ 4 , 4 , 4 // Row 4: sparse (no entries for group 1)
156+ ] ;
157+ let cols = vec ! [
158+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 0
159+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 1
160+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 2
161+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 3
162+ 3 , 4 , 5 // Row 4 (only group 2 values)
163+ ] ;
164+ let vals = vec ! [
165+ 2.0 , 2.2 , 1.8 , 8.0 , 7.5 , 8.5 , // Row 0: ~2 vs ~8 = big difference
166+ 5.0 , 5.1 , 4.9 , 5.0 , 5.1 , 4.9 , // Row 1: ~5 vs ~5 = no difference
167+ 3.0 , 3.3 , 2.7 , 5.0 , 4.7 , 5.3 , // Row 2: ~3 vs ~5 = moderate
168+ 0.1 , 0.2 , 0.1 , 20.0 , 19.0 , 21.0 , // Row 3: ~0.1 vs ~20 = extreme
169+ 10.0 , 8.0 , 12.0 // Row 4: 0 vs ~10 = missing data test
170+ ] ;
171+
172+ let coo = CooMatrix :: try_from_triplets (
173+ 5 , // 5 rows
174+ 6 , // 6 columns
175+ rows,
176+ cols,
177+ vals,
178+ )
179+ . unwrap ( ) ;
180+
181+ CsrMatrix :: from ( & coo)
182+ }
183+
184+ #[ test]
185+ fn test_log2_fold_change ( ) {
186+ let matrix = create_test_matrix ( ) ;
187+ let group1 = vec ! [ 0 , 1 , 2 ] ; // First group indices
188+ let group2 = vec ! [ 3 , 4 , 5 ] ; // Second group indices
189+ let pseudo_count = 0.01 ; // Small pseudo count
190+
191+ // Row 0: Clear difference (~2 vs ~8)
192+ let fc0 = calculate_log2_fold_change ( & matrix, 0 , & group1, & group2, pseudo_count) . unwrap ( ) ;
193+ assert_abs_diff_eq ! ( fc0, 2.0 , epsilon = 0.1 ) ; // log2(8/2) ≈ 2
194+
195+ // Row 1: No difference (~5 vs ~5)
196+ let fc1 = calculate_log2_fold_change ( & matrix, 1 , & group1, & group2, pseudo_count) . unwrap ( ) ;
197+ assert_abs_diff_eq ! ( fc1, 0.0 , epsilon = 0.01 ) ; // log2(5/5) = 0
198+
199+ // Row 2: Moderate difference (~3 vs ~5)
200+ let fc2 = calculate_log2_fold_change ( & matrix, 2 , & group1, & group2, pseudo_count) . unwrap ( ) ;
201+ assert_abs_diff_eq ! ( fc2, 0.737 , epsilon = 0.01 ) ; // log2(5/3) ≈ 0.737
202+
203+ // Row 3: Extreme difference (~0.1 vs ~20)
204+ let fc3 = calculate_log2_fold_change ( & matrix, 3 , & group1, & group2, pseudo_count) . unwrap ( ) ;
205+ assert_abs_diff_eq ! ( fc3, 7.13 , epsilon = 0.1 ) ; // log2(20/0.1) ≈ 7.13
206+
207+ // Row 4: All zeros in group 1 (tests handling of missing data)
208+ let fc4 = calculate_log2_fold_change ( & matrix, 4 , & group1, & group2, pseudo_count) . unwrap ( ) ;
209+ // Group 1 is all 0s + pseudo_count, Group 2 is ~10 + pseudo_count
210+ assert ! ( fc4 > 9.0 ) ; // log2((10+0.01)/(0+0.01)) ≈ 9.97
211+ }
212+
213+ #[ test]
214+ fn test_empty_groups ( ) {
215+ let matrix = create_test_matrix ( ) ;
216+
217+ // Test with empty groups
218+ let result = calculate_log2_fold_change ( & matrix, 0 , & [ ] , & [ 3 , 4 , 5 ] , 0.01 ) ;
219+ assert ! ( result. is_err( ) ) ;
220+
221+ let result = calculate_log2_fold_change ( & matrix, 0 , & [ 0 , 1 , 2 ] , & [ ] , 0.01 ) ;
222+ assert ! ( result. is_err( ) ) ;
223+
224+ // Test with small groups for Cohen's d
225+ let result = calculate_cohens_d ( & matrix, 0 , & [ 0 ] , & [ 3 , 4 , 5 ] ) ;
226+ assert ! ( result. is_err( ) ) ;
227+
228+ let result = calculate_cohens_d ( & matrix, 0 , & [ 0 , 1 , 2 ] , & [ 3 ] ) ;
229+ assert ! ( result. is_err( ) ) ;
230+ }
231+
232+ #[ test]
233+ fn test_cohens_d ( ) {
234+ let matrix = create_test_matrix ( ) ;
235+ let group1 = vec ! [ 0 , 1 , 2 ] ; // First group indices
236+ let group2 = vec ! [ 3 , 4 , 5 ] ; // Second group indices
237+
238+ // Row 0: Clear difference (~2 vs ~8)
239+ let d0 = calculate_cohens_d ( & matrix, 0 , & group1, & group2) . unwrap ( ) ;
240+ assert_abs_diff_eq ! ( d0, 15.76 , epsilon = 0.1 ) ; // Large effect
241+
242+ // Row 1: No difference (~5 vs ~5)
243+ let d1 = calculate_cohens_d ( & matrix, 1 , & group1, & group2) . unwrap ( ) ;
244+ assert_abs_diff_eq ! ( d1, 0.0 , epsilon = 0.01 ) ; // No effect
245+
246+ // Row 2: Moderate difference (~3 vs ~5)
247+ let d2 = calculate_cohens_d ( & matrix, 2 , & group1, & group2) . unwrap ( ) ;
248+ assert_abs_diff_eq ! ( d2, 6.67 , epsilon = 0.1 ) ; // Large effect
249+
250+ // Row 3: Extreme difference (~0.1 vs ~20)
251+ let d3 = calculate_cohens_d ( & matrix, 3 , & group1, & group2) . unwrap ( ) ;
252+ // The exact value may vary depending on implementation details,
253+ // but should definitely show a very large effect
254+ assert ! ( d3 > 20.0 ) ; // Very large effect
255+ }
256+
257+ #[ test]
258+ fn test_hedges_g ( ) {
259+ let matrix = create_test_matrix ( ) ;
260+ let group1 = vec ! [ 0 , 1 , 2 ] ; // First group indices
261+ let group2 = vec ! [ 3 , 4 , 5 ] ; // Second group indices
262+
263+ // Row 0: Compare with Cohen's d
264+ let d0 = calculate_cohens_d ( & matrix, 0 , & group1, & group2) . unwrap ( ) ;
265+ let g0 = calculate_hedges_g ( & matrix, 0 , & group1, & group2) . unwrap ( ) ;
266+
267+ // Hedge's g should be slightly smaller than Cohen's d due to correction
268+ assert ! ( g0 < d0) ;
269+ // But for large samples they shouldn't be too different
270+ assert_abs_diff_eq ! ( g0 / d0, 0.75 , epsilon = 0.25 ) ;
271+
272+ // Row 1: No difference case
273+ let g1 = calculate_hedges_g ( & matrix, 1 , & group1, & group2) . unwrap ( ) ;
274+ assert_abs_diff_eq ! ( g1, 0.0 , epsilon = 0.01 ) ;
275+
276+ // Test correction factor works
277+ // For larger groups, correction should be smaller
278+ let large_group1 = vec ! [ 0 , 1 , 2 , 6 , 7 , 8 , 9 , 10 ] ;
279+ let large_group2 = vec ! [ 3 , 4 , 5 , 11 , 12 , 13 , 14 , 15 ] ;
280+
281+ // This will fail if sample sizes are out of range, but the test is to show
282+ // that larger sample sizes bring Hedge's g closer to Cohen's d
283+ if matrix. ncols ( ) >= 16 {
284+ let d_large = calculate_cohens_d ( & matrix, 0 , & large_group1, & large_group2) . unwrap_or ( d0) ;
285+ let g_large = calculate_hedges_g ( & matrix, 0 , & large_group1, & large_group2) . unwrap_or ( g0) ;
286+
287+ // With more samples, g should be closer to d
288+ assert ! ( g_large / d_large > g0 / d0) ;
289+ }
290+ }
291+
292+ #[ test]
293+ fn test_zero_variance_cases ( ) {
294+ // Create a matrix with zero variance in one or both groups
295+ let rows = vec ! [ 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 ] ;
296+ let cols = vec ! [ 0 , 1 , 2 , 3 , 4 , 5 , 0 , 1 , 2 , 3 , 4 , 5 ] ;
297+ let vals = vec ! [
298+ 5.0 , 5.0 , 5.0 , 10.0 , 10.0 , 10.0 , // Row 0: all values identical within groups
299+ 1.0 , 2.0 , 3.0 , 5.0 , 5.0 , 5.0 // Row 1: variance in group 1, none in group 2
300+ ] ;
301+
302+ let coo = CooMatrix :: try_from_triplets ( 2 , 6 , rows, cols, vals) . unwrap ( ) ;
303+ let matrix = CsrMatrix :: from ( & coo) ;
304+
305+ let group1 = vec ! [ 0 , 1 , 2 ] ;
306+ let group2 = vec ! [ 3 , 4 , 5 ] ;
307+
308+ // Test log2fc - should work fine with zero variance
309+ let fc0 = calculate_log2_fold_change ( & matrix, 0 , & group1, & group2, 0.01 ) . unwrap ( ) ;
310+ assert_abs_diff_eq ! ( fc0, 1.0 , epsilon = 0.01 ) ; // log2(10/5) = 1
311+
312+ // Cohen's d with zero variance in both groups
313+ // In theory this should be infinity, but in practice we might get very large values
314+ // or potential numerical issues
315+ let d0 = calculate_cohens_d ( & matrix, 0 , & group1, & group2) ;
316+ match d0 {
317+ Ok ( value) => assert ! ( value. abs( ) > 100.0 || value. is_infinite( ) ) , // Should be very large or infinity
318+ Err ( _) => { } // It's also acceptable if the function determines this is an error case
319+ }
320+
321+ // Cohen's d with zero variance in one group
322+ let d1 = calculate_cohens_d ( & matrix, 1 , & group1, & group2) ;
323+ if let Ok ( value) = d1 {
324+ assert ! ( value. abs( ) > 1.0 ) ; // Should show a substantial effect
325+ }
326+ }
327+
328+ #[ test]
329+ fn test_negative_values ( ) {
330+ // Test with negative values to ensure functions handle them correctly
331+ let rows = vec ! [ 0 , 0 , 0 , 0 , 0 , 0 ] ;
332+ let cols = vec ! [ 0 , 1 , 2 , 3 , 4 , 5 ] ;
333+ let vals = vec ! [ -2.0 , -2.2 , -1.8 , -8.0 , -7.5 , -8.5 ] ; // Negative values
334+
335+ let coo = CooMatrix :: try_from_triplets ( 1 , 6 , rows, cols, vals) . unwrap ( ) ;
336+ let matrix = CsrMatrix :: from ( & coo) ;
337+
338+ let group1 = vec ! [ 0 , 1 , 2 ] ;
339+ let group2 = vec ! [ 3 , 4 , 5 ] ;
340+
341+ // Log2 fold change with negative values
342+ // For negative values, the fold change would be the ratio of absolute means
343+ // with a sign to indicate direction
344+ let fc = calculate_log2_fold_change ( & matrix, 0 , & group1, & group2, 0.0 ) ;
345+ // Expected: log2(|-8|/|-2|) ≈ 2, but sign needs handling in function
346+ assert ! ( fc. is_ok( ) ) ; // Just check it doesn't crash
347+
348+ // Cohen's d should work with negative values
349+ let d = calculate_cohens_d ( & matrix, 0 , & group1, & group2) ;
350+ assert ! ( d. is_ok( ) ) ;
351+ if let Ok ( value) = d {
352+ // Cohen's d should have the same magnitude as with positive values but negative sign
353+ assert ! ( value < 0.0 ) ;
354+ assert_abs_diff_eq ! ( value. abs( ) , 15.76 , epsilon = 0.1 ) ;
355+ }
356+ }
357+ }
0 commit comments