@@ -164,8 +164,62 @@ public double mannWhitneyU(final double[] x, final double[] y)
164
164
return FastMath .max (U1 , U2 );
165
165
}
166
166
167
+
168
+ /**
169
+ * Computes the Mann-Whitney
170
+ * U1 statistic</a> comparing mean for two independent samples possibly of
171
+ * different length.
172
+ * <p>
173
+ * Let X<sub>i</sub> denote the i'th individual of the first sample and
174
+ * Y<sub>j</sub> the j'th individual in the second sample. Note that the
175
+ * samples would often have different length.
176
+ * </p>
177
+ * <p>
178
+ * <strong>Preconditions</strong>:
179
+ * <ul>
180
+ * <li>All observations in the two samples are independent.</li>
181
+ * <li>The observations are at least ordinal (continuous are also ordinal).</li>
182
+ * </ul>
183
+ * </p>
184
+ *
185
+ * @param x the first sample
186
+ * @param y the second sample
187
+ * @return Mann-Whitney U statistic (maximum of U<sup>x</sup> and U<sup>y</sup>)
188
+ * @throws NullArgumentException if {@code x} or {@code y} are {@code null}.
189
+ * @throws NoDataException if {@code x} or {@code y} are zero-length.
190
+ */
191
+ public double mannWhitneyU1 (final double [] x , final double [] y )
192
+ throws NullArgumentException , NoDataException {
193
+
194
+ ensureDataConformance (x , y );
195
+
196
+ final double [] z = concatenateSamples (x , y );
197
+ final double [] ranks = naturalRanking .rank (z );
198
+
199
+ double sumRankX = 0 ;
200
+
201
+ /*
202
+ * The ranks for x is in the first x.length entries in ranks because x
203
+ * is in the first x.length entries in z
204
+ */
205
+ for (int i = 0 ; i < x .length ; ++i ) {
206
+ sumRankX += ranks [i ];
207
+ }
208
+
209
+ /*
210
+ * U1 = R1 - (n1 * (n1 + 1)) / 2 where R1 is sum of ranks for sample 1,
211
+ * e.g. x, n1 is the number of observations in sample 1.
212
+ */
213
+ final double U1 = sumRankX - (x .length * (x .length + 1 )) / 2 ;
214
+
215
+
216
+ return U1 ;
217
+ }
218
+
167
219
/**
168
220
* @param Umin smallest Mann-Whitney U value
221
+ * @param Umin smallest Mann-Whitney U1 value
222
+ * @param Umin smallest Mann-Whitney U2 value
169
223
* @param n1 number of subjects in first sample
170
224
* @param n2 number of subjects in second sample
171
225
* @return two-sided asymptotic p-value
@@ -175,6 +229,8 @@ public double mannWhitneyU(final double[] x, final double[] y)
175
229
* iterations is exceeded
176
230
*/
177
231
private double calculateAsymptoticPValue (final double Umin ,
232
+ final double U1 ,
233
+ final double U2 ,
178
234
final int n1 ,
179
235
final int n2 ,
180
236
final Type side )
@@ -195,19 +251,19 @@ private double calculateAsymptoticPValue(final double Umin,
195
251
final NormalDistribution standardNormal = new NormalDistribution (0 , 1 );
196
252
197
253
double p = 2 * standardNormal .cumulativeProbability (z );
198
-
254
+
199
255
if (side == Type .TWO_SIDED ) {
200
256
return p ;
201
257
}
202
258
203
259
if (side == Type .LESS ) {
204
- if (z > 0 ) {
260
+ if (U1 < U2 ) {
205
261
return 0.5 * p ;
206
262
} else {
207
263
return 1.0 - (0.5 * p );
208
264
}
209
265
} else {
210
- if (z < 0 ) {
266
+ if (U1 > U2 ) {
211
267
return 0.5 * p ;
212
268
} else {
213
269
return 1.0 - (0.5 * p );
@@ -259,8 +315,12 @@ public double mannWhitneyUTest(final double[] x, final double[] y, Type side)
259
315
* It can be shown that U1 + U2 = n1 * n2
260
316
*/
261
317
final double Umin = x .length * y .length - Umax ;
318
+
319
+ //we require the U1 and U2 values in order to determine which p-value to calculate for the sided tests
320
+ final double U1 = mannWhitneyU1 (x , y );
321
+ final double U2 = x .length * y .length - U1 ;
262
322
263
- return calculateAsymptoticPValue (Umin , x .length , y .length , side );
323
+ return calculateAsymptoticPValue (Umin , U1 , U2 , x .length , y .length , side );
264
324
}
265
325
266
326
}
0 commit comments