Skip to content

Commit 01f36b3

Browse files
committed
ticket #124 - fixed issue with one sided mann-whitney. Z value was always negative and incorrect indicator of sided test. Used U1 and U2 values to determine which p-value to calculate.
1 parent 36dad5f commit 01f36b3

File tree

1 file changed

+64
-4
lines changed

1 file changed

+64
-4
lines changed

EnrichmentMapPlugin/src/main/java/org/baderlab/csplugins/mannwhit/MannWhitneyUTestSided.java

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,62 @@ public double mannWhitneyU(final double[] x, final double[] y)
164164
return FastMath.max(U1, U2);
165165
}
166166

167+
168+
/**
169+
* Computes the Mann-Whitney
170+
* U1 statistic</a> comparing mean for two independent samples possibly of
171+
* different length.
172+
* <p>
173+
* Let X<sub>i</sub> denote the i'th individual of the first sample and
174+
* Y<sub>j</sub> the j'th individual in the second sample. Note that the
175+
* samples would often have different length.
176+
* </p>
177+
* <p>
178+
* <strong>Preconditions</strong>:
179+
* <ul>
180+
* <li>All observations in the two samples are independent.</li>
181+
* <li>The observations are at least ordinal (continuous are also ordinal).</li>
182+
* </ul>
183+
* </p>
184+
*
185+
* @param x the first sample
186+
* @param y the second sample
187+
* @return Mann-Whitney U statistic (maximum of U<sup>x</sup> and U<sup>y</sup>)
188+
* @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
189+
* @throws NoDataException if {@code x} or {@code y} are zero-length.
190+
*/
191+
public double mannWhitneyU1(final double[] x, final double[] y)
192+
throws NullArgumentException, NoDataException {
193+
194+
ensureDataConformance(x, y);
195+
196+
final double[] z = concatenateSamples(x, y);
197+
final double[] ranks = naturalRanking.rank(z);
198+
199+
double sumRankX = 0;
200+
201+
/*
202+
* The ranks for x is in the first x.length entries in ranks because x
203+
* is in the first x.length entries in z
204+
*/
205+
for (int i = 0; i < x.length; ++i) {
206+
sumRankX += ranks[i];
207+
}
208+
209+
/*
210+
* U1 = R1 - (n1 * (n1 + 1)) / 2 where R1 is sum of ranks for sample 1,
211+
* e.g. x, n1 is the number of observations in sample 1.
212+
*/
213+
final double U1 = sumRankX - (x.length * (x.length + 1)) / 2;
214+
215+
216+
return U1;
217+
}
218+
167219
/**
168220
* @param Umin smallest Mann-Whitney U value
221+
* @param Umin smallest Mann-Whitney U1 value
222+
* @param Umin smallest Mann-Whitney U2 value
169223
* @param n1 number of subjects in first sample
170224
* @param n2 number of subjects in second sample
171225
* @return two-sided asymptotic p-value
@@ -175,6 +229,8 @@ public double mannWhitneyU(final double[] x, final double[] y)
175229
* iterations is exceeded
176230
*/
177231
private double calculateAsymptoticPValue(final double Umin,
232+
final double U1,
233+
final double U2,
178234
final int n1,
179235
final int n2,
180236
final Type side)
@@ -195,19 +251,19 @@ private double calculateAsymptoticPValue(final double Umin,
195251
final NormalDistribution standardNormal = new NormalDistribution(0, 1);
196252

197253
double p = 2 * standardNormal.cumulativeProbability(z);
198-
254+
199255
if(side == Type.TWO_SIDED) {
200256
return p;
201257
}
202258

203259
if(side == Type.LESS) {
204-
if(z > 0) {
260+
if(U1 < U2) {
205261
return 0.5 * p;
206262
} else {
207263
return 1.0 - (0.5 * p);
208264
}
209265
} else {
210-
if(z < 0) {
266+
if(U1 > U2) {
211267
return 0.5 * p;
212268
} else {
213269
return 1.0 - (0.5 * p);
@@ -259,8 +315,12 @@ public double mannWhitneyUTest(final double[] x, final double[] y, Type side)
259315
* It can be shown that U1 + U2 = n1 * n2
260316
*/
261317
final double Umin = x.length * y.length - Umax;
318+
319+
//we require the U1 and U2 values in order to determine which p-value to calculate for the sided tests
320+
final double U1 = mannWhitneyU1(x, y);
321+
final double U2 = x.length * y.length - U1;
262322

263-
return calculateAsymptoticPValue(Umin, x.length, y.length, side);
323+
return calculateAsymptoticPValue(Umin, U1, U2, x.length, y.length, side);
264324
}
265325

266326
}

0 commit comments

Comments
 (0)