Skip to content

Commit 870712d

Browse files
authored
Make corspearman return NaN when there are NaNs in the input (#659)
Make `corspearman` return `NaN` when there are `NaN`s in the input, instead of silently sorting them at the end. This is consistent with what `corkendall` and `cor` do.
1 parent 5bf2b8f commit 870712d

File tree

2 files changed

+150
-8
lines changed

2 files changed

+150
-8
lines changed

src/rankcorr.jl

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,103 @@ Compute Spearman's rank correlation coefficient. If `x` and `y` are vectors, the
1717
output is a float, otherwise it's a matrix corresponding to the pairwise correlations
1818
of the columns of `x` and `y`.
1919
"""
20-
corspearman(x::RealVector, y::RealVector) = cor(tiedrank(x), tiedrank(y))
20+
function corspearman(x::RealVector, y::RealVector)
21+
n = length(x)
22+
n == length(y) || throw(DimensionMismatch("vectors must have same length"))
23+
(any(isnan, x) || any(isnan, y)) && return NaN
24+
return cor(tiedrank(x), tiedrank(y))
25+
end
26+
27+
function corspearman(X::RealMatrix, y::RealVector)
28+
size(X, 1) == length(y) ||
29+
throw(DimensionMismatch("X and y have inconsistent dimensions"))
30+
n = size(X, 2)
31+
C = Matrix{Float64}(I, n, 1)
32+
any(isnan, y) && return fill!(C, NaN)
33+
yrank = tiedrank(y)
34+
for j = 1:n
35+
Xj = view(X, :, j)
36+
if any(isnan, Xj)
37+
C[j,1] = NaN
38+
else
39+
Xjrank = tiedrank(Xj)
40+
C[j,1] = cor(Xjrank, yrank)
41+
end
42+
end
43+
return C
44+
end
45+
46+
function corspearman(x::RealVector, Y::RealMatrix)
47+
size(Y, 1) == length(x) ||
48+
throw(DimensionMismatch("x and Y have inconsistent dimensions"))
49+
n = size(Y, 2)
50+
C = Matrix{Float64}(I, 1, n)
51+
any(isnan, x) && return fill!(C, NaN)
52+
xrank = tiedrank(x)
53+
for j = 1:n
54+
Yj = view(Y, :, j)
55+
if any(isnan, Yj)
56+
C[1,j] = NaN
57+
else
58+
Yjrank = tiedrank(Yj)
59+
C[1,j] = cor(xrank, Yjrank)
60+
end
61+
end
62+
return C
63+
end
2164

22-
corspearman(X::RealMatrix, Y::RealMatrix) =
23-
cor(mapslices(tiedrank, X, dims=1), mapslices(tiedrank, Y, dims=1))
24-
corspearman(X::RealMatrix, y::RealVector) = cor(mapslices(tiedrank, X, dims=1), tiedrank(y))
25-
corspearman(x::RealVector, Y::RealMatrix) = cor(tiedrank(x), mapslices(tiedrank, Y, dims=1))
65+
function corspearman(X::RealMatrix)
66+
n = size(X, 2)
67+
C = Matrix{Float64}(I, n, n)
68+
anynan = Vector{Bool}(undef, n)
69+
for j = 1:n
70+
Xj = view(X, :, j)
71+
anynan[j] = any(isnan, Xj)
72+
if anynan[j]
73+
C[:,j] .= NaN
74+
C[j,:] .= NaN
75+
C[j,j] = 1
76+
continue
77+
end
78+
Xjrank = tiedrank(Xj)
79+
for i = 1:(j-1)
80+
Xi = view(X, :, i)
81+
if anynan[i]
82+
C[i,j] = C[j,i] = NaN
83+
else
84+
Xirank = tiedrank(Xi)
85+
C[i,j] = C[j,i] = cor(Xjrank, Xirank)
86+
end
87+
end
88+
end
89+
return C
90+
end
2691

27-
corspearman(X::RealMatrix) = (Z = mapslices(tiedrank, X, dims=1); cor(Z, Z))
92+
function corspearman(X::RealMatrix, Y::RealMatrix)
93+
size(X, 1) == size(Y, 1) ||
94+
throw(ArgumentError("number of columns in each array must match"))
95+
nr = size(X, 2)
96+
nc = size(Y, 2)
97+
C = Matrix{Float64}(undef, nr, nc)
98+
for j = 1:nr
99+
Xj = view(X, :, j)
100+
if any(isnan, Xj)
101+
C[j,:] .= NaN
102+
continue
103+
end
104+
Xjrank = tiedrank(Xj)
105+
for i = 1:nc
106+
Yi = view(Y, :, i)
107+
if any(isnan, Yi)
108+
C[j,i] = NaN
109+
else
110+
Yirank = tiedrank(Yi)
111+
C[j,i] = cor(Xjrank, Yirank)
112+
end
113+
end
114+
end
115+
return C
116+
end
28117

29118

30119
#######################################
@@ -44,7 +133,7 @@ function corkendall!(x::RealVector, y::RealVector, permx::AbstractVector{<:Integ
44133
# Initial sorting
45134
permute!(x, permx)
46135
permute!(y, permx)
47-
136+
48137
# Use widen to avoid overflows on both 32bit and 64bit
49138
npairs = div(widen(n) * (n - 1), 2)
50139
ntiesx = ndoubleties = nswaps = widen(0)

test/rankcorr.jl

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ using StatsBase
22
using Test
33

44
X = Float64[1 0; 2 1; 3 0; 4 1; 5 10]
5+
Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10]
56

67
x1 = X[:,1]
78
x2 = X[:,2]
8-
y = [5, 3, 4, 2, 5]
9+
y = Y[:,1]
910

1011
# corspearman
1112

@@ -23,6 +24,9 @@ c22 = corspearman(x2, x2)
2324
@test corspearman(X, X) [c11 c12; c12 c22]
2425
@test corspearman(X) [c11 c12; c12 c22]
2526

27+
@test corspearman(X, Y) ==
28+
[corspearman(X[:,i], Y[:,j]) for i in axes(X, 2), j in axes(Y, 2)]
29+
2630
# corkendall
2731

2832
# Check error, handling of NaN, Inf etc
@@ -106,3 +110,52 @@ w = repeat(z, n)
106110

107111
StatsBase.midpoint(1,10) == 5
108112
StatsBase.midpoint(1,widen(10)) == 5
113+
114+
115+
# NaN handling
116+
117+
Xnan = copy(X)
118+
Xnan[1,1] = NaN
119+
Ynan = copy(Y)
120+
Ynan[2,1] = NaN
121+
122+
for f in (corspearman, corkendall)
123+
@test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4]))
124+
@test all(isnan, f([1.0, NaN], [1 2; 3 4]))
125+
@test all(isnan, f([1 2; 3 4], [1.0, NaN]))
126+
@test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1])
127+
@test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4]))
128+
@test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4]))
129+
130+
@test isequal(f(Xnan, Ynan),
131+
[f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)])
132+
@test isequal(f(Xnan),
133+
[i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j])
134+
for i in axes(Xnan, 2), j in axes(Xnan, 2)])
135+
for k in 1:2
136+
@test isequal(f(Xnan[:,k], Ynan),
137+
[f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)])
138+
# TODO: fix corkendall (PR#659)
139+
if f === corspearman
140+
@test isequal(f(Xnan, Ynan[:,k]),
141+
[f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1])
142+
else
143+
@test isequal(f(Xnan, Ynan[:,k]),
144+
[f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)])
145+
end
146+
end
147+
end
148+
149+
150+
# Wrong dimensions
151+
152+
@test_throws DimensionMismatch corspearman([1], [1, 2])
153+
@test_throws DimensionMismatch corspearman([1], [1 2; 3 4])
154+
@test_throws DimensionMismatch corspearman([1 2; 3 4], [1])
155+
@test_throws ArgumentError corspearman([1 2; 3 4: 4 6], [1 2; 3 4])
156+
157+
# TODO: fix corkendall to match corspearman (PR#659)
158+
@test_throws ErrorException corkendall([1], [1, 2])
159+
@test_throws ErrorException corkendall([1], [1 2; 3 4])
160+
@test_throws ErrorException corkendall([1 2; 3 4], [1])
161+
@test_throws ArgumentError corkendall([1 2; 3 4: 4 6], [1 2; 3 4])

0 commit comments

Comments
 (0)