Skip to content

Commit 4e7f494

Browse files
committed
add Holm and Hochberg FWER correction
1 parent 9620a9c commit 4e7f494

File tree

7 files changed

+3850
-1
lines changed

7 files changed

+3850
-1
lines changed

src/FSharp.Stats/Testing/MultipleTesting.fs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,83 @@
11
namespace FSharp.Stats.Testing
22

3+
open System
34
open FSharp.Stats
45

56
/// This module contains functions to adjust for multiple testing errors in statistical tests.
67
module MultipleTesting =
78

89

10+
/// Holm–Bonferroni (step-down) FWER adjustment (NaN-safe)
11+
/// NaN p-values are ignored in the computation and preserved at their original indices.
12+
let inline holmFWER (p : float[]) : float[] =
13+
let m = p.Length
14+
let indexed = p |> Array.mapi (fun i v -> i, v)
15+
16+
// keep only finite p-values for the adjustment
17+
let valid = indexed |> Array.filter (fun (_, v) -> not (Double.IsNaN v))
18+
let nValid = valid.Length
19+
20+
// start with an all-NaN result and fill only valid positions
21+
let result = Array.create m Double.NaN
22+
23+
if nValid = 0 then result else
24+
let sorted = valid |> Array.sortBy snd
25+
26+
// (nValid - i)·p_(i) for i = 0…nValid-1
27+
let raw =
28+
sorted
29+
|> Array.mapi (fun i (_, pv) -> float (nValid - i) * pv)
30+
31+
// running max from the left, capped at 1.0
32+
let adjAsc =
33+
raw
34+
|> Array.scan (fun runningMax r -> max runningMax r) 0.0
35+
|> Array.tail
36+
|> Array.map (min 1.0)
37+
38+
// write adjusted values back into their original indices
39+
Array.zip (sorted |> Array.map fst) adjAsc
40+
|> Array.iter (fun (i, adj) -> result.[i] <- adj)
41+
42+
result
43+
44+
/// Hochberg (step-up) FWER adjustment (NaN-safe)
45+
/// NaN p-values are ignored in the computation and preserved at their original indices.
46+
let inline hochbergFWER (p : float[]) : float[] =
47+
let m = p.Length
48+
let indexed = p |> Array.mapi (fun i v -> i, v)
49+
50+
// keep only finite p-values for the adjustment
51+
let valid = indexed |> Array.filter (fun (_, v) -> not (Double.IsNaN v))
52+
let nValid = valid.Length
53+
54+
// start with an all-NaN result and fill only valid positions
55+
let result = Array.create m Double.NaN
56+
57+
if nValid = 0 then result else
58+
let sorted = valid |> Array.sortBy snd
59+
60+
// (nValid - i)·p_(i), same raw values pattern as Holm
61+
let raw =
62+
sorted
63+
|> Array.mapi (fun i (_, pv) -> float (nValid - i) * pv)
64+
65+
// running min from the right, capped at 1.0
66+
let adjDesc =
67+
raw
68+
|> Array.rev
69+
|> Array.scan (fun runningMin r -> min runningMin r) 1.0
70+
|> Array.tail
71+
|> Array.rev
72+
|> Array.map (min 1.0)
73+
74+
// write adjusted values back into their original indices
75+
Array.zip (sorted |> Array.map fst) adjDesc
76+
|> Array.iter (fun (i, adj) -> result.[i] <- adj)
77+
78+
result
79+
80+
981
/// Benjamini-Hochberg Correction (BH)
1082
/// 'projection' should return a tuple of any identifier and the pValues as float, when applied to 'rawP'
1183
/// This function applies the Benjamini-Hochberg multiple testing correcture and returns all False Discovery Rates to which the given p-values are still

tests/FSharp.Stats.Tests/FSharp.Stats.Tests.fsproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@
6262
<EmbeddedResource Include="data/SparseMatrixFormat1NoInfo.txt" CopyToOutputDirectory="PreserveNewest" />
6363
<EmbeddedResource Include="data/SparseMatrixFormat1WithInfo.txt" CopyToOutputDirectory="PreserveNewest" />
6464
<EmbeddedResource Include="data/TableFormat.txt" CopyToOutputDirectory="PreserveNewest" />
65+
<EmbeddedResource Include="data/fwer_hochberg_results.csv" CopyToOutputDirectory="PreserveNewest" />
66+
<EmbeddedResource Include="data/fwer_holm_results.csv" CopyToOutputDirectory="PreserveNewest" />
67+
<EmbeddedResource Include="data/holmHochberg_Input_nan.csv" CopyToOutputDirectory="PreserveNewest" />
6568
</ItemGroup>
6669
<ItemGroup />
6770
<ItemGroup>

tests/FSharp.Stats.Tests/TestExtensions.fs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@
6666
| [|a;b|] -> a, float b
6767
| _ -> failwith "invalid csv format"
6868
)
69-
7069

7170
let comparisonMetricsEqualRounded (digits : int) (actual: ComparisonMetrics) (expected: ComparisonMetrics) message =
7271
let actual =

tests/FSharp.Stats.Tests/Testing.fs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,88 @@ let pearsonTests =
403403
]
404404

405405

406+
[<Tests>]
407+
let holmTests =
408+
409+
410+
let largeSetnan =
411+
Frame.ReadCsv(location = @"data/holmHochberg_Input_nan.csv",hasHeaders = true,separators = ",").GetColumn<float>("pValues")
412+
|> Series.valuesAll
413+
|> Array.ofSeq
414+
|> Array.map (fun x -> if x.IsSome then x.Value else nan )
415+
416+
let largeSet =
417+
largeSetnan |> Array.filter (fun x -> not (nan.Equals x))
418+
419+
let largeSet_Expectednan =
420+
Frame.ReadCsv(location = @"data/fwer_holm_results.csv",hasHeaders = true,separators = ",").GetColumn<float>("pValues")
421+
|> Series.valuesAll
422+
|> Array.ofSeq
423+
|> Array.map (fun x -> if x.IsSome then x.Value else nan )
424+
425+
let largeSet_Expected =
426+
largeSet_Expectednan
427+
|> Array.filter (fun x -> not (nan.Equals x))
428+
429+
testList "Testing.MultipleTesting.holmFWER" [
430+
431+
testCase "testHolmLarge" (fun () ->
432+
Expect.sequenceEqual
433+
(largeSet |> MultipleTesting.holmFWER |> Seq.map (fun x -> Math.Round(x,9)))
434+
(largeSet_Expected |> Seq.map (fun x -> Math.Round(x,9)))
435+
"adjusted pValues should be equal to the reference implementation."
436+
)
437+
438+
testCase "testHolmLargeNaN" (fun () ->
439+
TestExtensions.sequenceEqualRoundedNaN 9
440+
(largeSetnan |> MultipleTesting.holmFWER |> Seq.ofArray)
441+
(largeSet_Expectednan |> Seq.ofArray)
442+
"adjusted pValues should be equal to the reference implementation."
443+
)
444+
445+
]
446+
447+
[<Tests>]
448+
let hochbergTests =
449+
450+
451+
let largeSetnan =
452+
Frame.ReadCsv(location = @"data/holmHochberg_Input_nan.csv",hasHeaders = true,separators = ",").GetColumn<float>("pValues")
453+
|> Series.valuesAll
454+
|> Array.ofSeq
455+
|> Array.map (fun x -> if x.IsSome then x.Value else nan )
456+
457+
let largeSet =
458+
largeSetnan |> Array.filter (fun x -> not (nan.Equals x))
459+
460+
let largeSet_Expectednan =
461+
Frame.ReadCsv(location = @"data/fwer_hochberg_results.csv",hasHeaders = true,separators = ",").GetColumn<float>("pValues")
462+
|> Series.valuesAll
463+
|> Array.ofSeq
464+
|> Array.map (fun x -> if x.IsSome then x.Value else nan )
465+
466+
let largeSet_Expected =
467+
largeSet_Expectednan
468+
|> Array.filter (fun x -> not (nan.Equals x))
469+
470+
testList "Testing.MultipleTesting.hochbergFWER" [
471+
472+
testCase "testHochbergLarge" (fun () ->
473+
Expect.sequenceEqual
474+
(largeSet |> MultipleTesting.hochbergFWER |> Seq.map (fun x -> Math.Round(x,9)))
475+
(largeSet_Expected |> Seq.map (fun x -> Math.Round(x,9)))
476+
"adjusted pValues should be equal to the reference implementation."
477+
)
478+
479+
testCase "testHochbergLargeNaN" (fun () ->
480+
TestExtensions.sequenceEqualRoundedNaN 9
481+
(largeSetnan |> MultipleTesting.hochbergFWER |> Seq.ofArray)
482+
(largeSet_Expectednan |> Seq.ofArray)
483+
"adjusted pValues should be equal to the reference implementation."
484+
)
485+
486+
]
487+
406488
[<Tests>]
407489
let benjaminiHochbergTests =
408490

0 commit comments

Comments
 (0)