From c85116a1e2bb42fdd81bfa5123317352823ba24b Mon Sep 17 00:00:00 2001
From: PGS62 <philip.swannell@gmail.com>
Date: Sat, 30 Mar 2024 10:52:18 +0000
Subject: [PATCH 01/24] Rewrite pairwise, corkendall and corspearman

---
 .vscode/settings.json     |   1 +
 src/pairwise.jl           | 327 +++++++++++------
 src/rankcorr.jl           | 744 ++++++++++++++++++++++++++++++--------
 test/pairwise.jl          | 285 +++++++++------
 test/rankcorr.jl          | 519 ++++++++++++++++++--------
 test/rankcorrspeedtest.jl |  98 +++++
 6 files changed, 1449 insertions(+), 525 deletions(-)
 create mode 100644 .vscode/settings.json
 create mode 100644 test/rankcorrspeedtest.jl

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 000000000..9e26dfeeb
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/pairwise.jl b/src/pairwise.jl
index de1d47544..d2775fe7e 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -1,25 +1,53 @@
-function _pairwise!(::Val{:none}, f, dest::AbstractMatrix, x, y, symmetric::Bool)
-    @inbounds for (i, xi) in enumerate(x), (j, yj) in enumerate(y)
-        symmetric && i > j && continue
-
-        # For performance, diagonal is special-cased
-        if f === cor && eltype(dest) !== Union{} && i == j && xi === yj
-            # TODO: float() will not be needed after JuliaLang/Statistics.jl#61
-            dest[i, j] = float(cor(xi))
-        else
-            dest[i, j] = f(xi, yj)
-        end
+function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
+    symmetric::Bool) where {V}
+
+    nr, nc = size(dest)
+
+    # Swap x and y for more efficient threaded loop.
+    if nr < nc
+        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
+        _pairwise!(Val(:none), f, dest′, y, x, symmetric)
+        dest .= transpose(dest′)
+        return dest
     end
-    if symmetric
-        m, n = size(dest)
-        @inbounds for j in 1:n, i in (j+1):m
-            dest[i, j] = dest[j, i]
+
+    #cor and friends are special-cased.
+    iscor = (f in (corkendall, corspearman, cor))
+    (iscor || f == cov) && (symmetric = x === y)
+    #cov(x) is faster than cov(x, x)
+    (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
+
+    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+        for j in subset
+            for i = 1:(symmetric ? j : nc)
+                # For performance, diagonal is special-cased
+                if iscor && i==j && y[i] === x[j] && V !== Union{}
+                    dest[j, i] = V === Missing ? missing : 1.0
+                else
+                    dest[j, i] = f(x[j], y[i])
+                end
+                symmetric && (dest[i, j] = dest[j, i])
+            end
         end
     end
     return dest
 end
 
-function check_vectors(x, y, skipmissing::Symbol)
+#Input validation for both pairwise and pairwise!
+function check_vectors(x, y, skipmissing::Symbol, symmetric::Bool)
+
+    if symmetric && x !== y
+        throw(ArgumentError("symmetric=true only makes sense passing " *
+                            "a single set of variables (x === y)"))
+    end
+
+    if !(skipmissing in (:none, :pairwise, :listwise))
+        throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise"))
+    end
+
+    #When skipmissing is :none, elements of x/y can have unequal length.
+    skipmissing == :none && return
+
     m = length(x)
     n = length(y)
     if !(all(xi -> xi isa AbstractVector, x) && all(yi -> yi isa AbstractVector, y))
@@ -46,76 +74,59 @@ function check_vectors(x, y, skipmissing::Symbol)
     end
 end
 
-function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix, x, y, symmetric::Bool)
-    check_vectors(x, y, :pairwise)
-    @inbounds for (j, yj) in enumerate(y)
-        ynminds = .!ismissing.(yj)
-        @inbounds for (i, xi) in enumerate(x)
-            symmetric && i > j && continue
+function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetric::Bool) where {V}
 
-            if xi === yj
-                ynm = view(yj, ynminds)
-                # For performance, diagonal is special-cased
-                if f === cor && eltype(dest) !== Union{} && i == j
-                    # TODO: float() will not be needed after JuliaLang/Statistics.jl#61
-                    dest[i, j] = float(cor(xi))
+    nr, nc = size(dest)
+    m = length(x) == 0 ? 0 : length(first(x))
+
+    # Swap x and y for more efficient threaded loop.
+    if nr < nc
+        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
+        _pairwise!(Val(:pairwise), f, dest′, y, x, symmetric)
+        dest .= transpose(dest′)
+        return dest
+    end
+
+    #cor and friends are special-cased.
+    iscor = (f in (corkendall, corspearman, cor))
+    (iscor || f == cov) && (symmetric = x === y)
+    #cov(x) is faster than cov(x, x)
+    (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
+
+    nmtx = promoted_nmtype(x)[]
+    nmty = promoted_nmtype(y)[]
+
+    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+        for j in subset
+            scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
+            scratch_fy = task_local_vector(:scratch_fy, nmty, m)
+            for i = 1:(symmetric ? j : nc)
+                if iscor && i == j && y[i] === x[j] && V !== Union{} && V!== Missing
+                    dest[j, i] = 1.0
                 else
-                    dest[i, j] = f(ynm, ynm)
+                    dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx, scratch_fy)...)
                 end
-            else
-                nminds = .!ismissing.(xi) .& ynminds
-                xnm = view(xi, nminds)
-                ynm = view(yj, nminds)
-                dest[i, j] = f(xnm, ynm)
+                symmetric && (dest[i, j] = dest[j, i])
             end
         end
     end
-    if symmetric
-        m, n = size(dest)
-        @inbounds for j in 1:n, i in (j+1):m
-            dest[i, j] = dest[j, i]
-        end
-    end
     return dest
 end
 
 function _pairwise!(::Val{:listwise}, f, dest::AbstractMatrix, x, y, symmetric::Bool)
-    check_vectors(x, y, :listwise)
-    nminds = .!ismissing.(first(x))
-    @inbounds for xi in Iterators.drop(x, 1)
-        nminds .&= .!ismissing.(xi)
-    end
-    if x !== y
-        @inbounds for yj in y
-            nminds .&= .!ismissing.(yj)
-        end
-    end
-
-    # Computing integer indices once for all vectors is faster
-    nminds′ = findall(nminds)
-    # TODO: check whether wrapping views in a custom array type which asserts
-    # that entries cannot be `missing` (similar to `skipmissing`)
-    # could offer better performance
-    return _pairwise!(Val(:none), f, dest,
-                      [view(xi, nminds′) for xi in x],
-                      [view(yi, nminds′) for yi in y],
-                      symmetric)
+    return _pairwise!(Val(:none), f, dest, handle_listwise(x, y)..., symmetric)
 end
 
-function _pairwise!(f, dest::AbstractMatrix, x, y;
-                    symmetric::Bool=false, skipmissing::Symbol=:none)
-    if !(skipmissing in (:none, :pairwise, :listwise))
-        throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise"))
-    end
+function _pairwise!(f, dest::AbstractMatrix, x, y; symmetric::Bool=false,
+    skipmissing::Symbol=:none)
 
-    x′ = x isa Union{AbstractArray, Tuple, NamedTuple} ? x : collect(x)
-    y′ = y isa Union{AbstractArray, Tuple, NamedTuple} ? y : collect(y)
+    x′ = x isa Union{AbstractArray,Tuple,NamedTuple} ? x : collect(x)
+    y′ = y isa Union{AbstractArray,Tuple,NamedTuple} ? y : collect(y)
     m = length(x′)
     n = length(y′)
 
     size(dest) != (m, n) &&
         throw(DimensionMismatch("dest has dimensions $(size(dest)) but expected ($m, $n)"))
-
     Base.has_offset_axes(dest) && throw("dest indices must start at 1")
 
     return _pairwise!(Val(skipmissing), f, dest, x′, y′, symmetric)
@@ -134,7 +145,7 @@ end
 
 # Identical to `Base.promote_typejoin` except that it uses `promote_type`
 # instead of `typejoin` to combine members of `Union` types
-function promote_type_union(::Type{T}) where T
+function promote_type_union(::Type{T}) where {T}
     if T === Union{}
         return Union{}
     elseif T isa UnionAll
@@ -149,14 +160,14 @@ function promote_type_union(::Type{T}) where T
 end
 
 function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmissing}
-    x′ = x isa Union{AbstractArray, Tuple, NamedTuple} ? x : collect(x)
-    y′ = y isa Union{AbstractArray, Tuple, NamedTuple} ? y : collect(y)
+    x′ = x isa Union{AbstractArray,Tuple,NamedTuple} ? x : collect(x)
+    y′ = y isa Union{AbstractArray,Tuple,NamedTuple} ? y : collect(y)
     m = length(x′)
     n = length(y′)
 
-    T = Core.Compiler.return_type(f, Tuple{eltype(x′), eltype(y′)})
-    Tsm = Core.Compiler.return_type((x, y) -> f(disallowmissing(x), disallowmissing(y)),
-                                     Tuple{eltype(x′), eltype(y′)})
+    T = Core.Compiler.return_type(f, Tuple{eltype(x′),eltype(y′)})
+    Tsm = Core.Compiler.return_type((x, y) -> f(handle_pairwise(x, y)...),
+        Tuple{eltype(x′),eltype(y′)})
 
     if skipmissing === :none
         dest = Matrix{T}(undef, m, n)
@@ -178,7 +189,7 @@ function _pairwise(::Val{skipmissing}, f, x, y, symmetric::Bool) where {skipmiss
         # but using `promote_type` rather than `promote_typejoin`)
         U = mapreduce(typeof, promote_type, dest)
         # V is inferred (contrary to U), but it only gives an upper bound for U
-        V = promote_type_union(Union{T, Tsm})
+        V = promote_type_union(Union{T,Tsm})
         return convert(Matrix{U}, dest)::Matrix{<:V}
     end
 end
@@ -193,15 +204,15 @@ entries in `x` and columns to entries in `y`, and `dest` must therefore
 be of size `length(x) × length(y)`.
 If `y` is omitted then `x` is crossed with itself.
 
-As a special case, if `f` is `cor`, diagonal cells for which entries
-from `x` and `y` are identical (according to `===`) are set to one even
-in the presence `missing`, `NaN` or `Inf` entries.
+As a special case, if `f` is `cor`, `corspearman` or `corkendall`, diagonal cells for
+which entries from `x` and `y` are identical (according to `===`) are set to one even in the
+presence `missing`, `NaN` or `Inf` entries.
 
 # Keyword arguments
 - `symmetric::Bool=false`: If `true`, `f` is only called to compute
   for the lower triangle of the matrix, and these values are copied
-  to fill the upper triangle. Only allowed when `y` is omitted.
-  Defaults to `true` when `f` is `cor` or `cov`.
+  to fill the upper triangle. Only allowed when `y` is omitted and ignored (taken as `true`)
+  if `f` is `cov`, `cor`, `corkendall` or `corspearman`.
 - `skipmissing::Symbol=:none`: If `:none` (the default), missing values
   in inputs are passed to `f` without any modification.
   Use `:pairwise` to skip entries with a `missing` value in either
@@ -245,11 +256,7 @@ julia> dest
 """
 function pairwise!(f, dest::AbstractMatrix, x, y=x;
                    symmetric::Bool=false, skipmissing::Symbol=:none)
-    if symmetric && x !== y
-        throw(ArgumentError("symmetric=true only makes sense passing " *
-                            "a single set of variables (x === y)"))
-    end
-
+    check_vectors(x, y, skipmissing, symmetric)
     return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing)
 end
 
@@ -262,15 +269,15 @@ of entries in iterators `x` and `y`. Rows correspond to
 entries in `x` and columns to entries in `y`. If `y` is omitted then a
 square matrix crossing `x` with itself is returned.
 
-As a special case, if `f` is `cor`, diagonal cells for which entries
-from `x` and `y` are identical (according to `===`) are set to one even
-in the presence `missing`, `NaN` or `Inf` entries.
+As a special case, if `f` is `cor`, `corspearman` or `corkendall`, diagonal cells for
+which entries from `x` and `y` are identical (according to `===`) are set to one even in the
+presence `missing`, `NaN` or `Inf` entries.
 
 # Keyword arguments
 - `symmetric::Bool=false`: If `true`, `f` is only called to compute
   for the lower triangle of the matrix, and these values are copied
-  to fill the upper triangle. Only allowed when `y` is omitted.
-  Defaults to `true` when `f` is `cor` or `cov`.
+  to fill the upper triangle. Only allowed when `y` is omitted and ignored (taken as `true`)
+  if `f` is `cov`, `cor`, `corkendall` or `corspearman`.
 - `skipmissing::Symbol=:none`: If `:none` (the default), missing values
   in inputs are passed to `f` without any modification.
   Use `:pairwise` to skip entries with a `missing` value in either
@@ -307,34 +314,130 @@ julia> pairwise(cor, eachcol(y), skipmissing=:pairwise)
 ```
 """
 function pairwise(f, x, y=x; symmetric::Bool=false, skipmissing::Symbol=:none)
-    if symmetric && x !== y
-        throw(ArgumentError("symmetric=true only makes sense passing " *
-                            "a single set of variables (x === y)"))
-    end
-
+    check_vectors(x, y, skipmissing, symmetric)
     return _pairwise(Val(skipmissing), f, x, y, symmetric)
 end
 
-# cov(x) is faster than cov(x, x)
-_cov(x, y) = x === y ? cov(x) : cov(x, y)
+# Auxiliary functions for pairwise
+
+promoted_type(x) = mapreduce(eltype, promote_type, x, init=Union{})
+promoted_nmtype(x) = mapreduce(nonmissingtype ∘ eltype, promote_type, x, init=Union{})
+
+"""
+    handle_listwise(x, y)
+
+Remove missings in a listwise manner. Assumes `x` and `y` are vectors of iterables which
+have been validated via `check_vectors`.
+
+## Example
+```julia-repl
+julia> a = [6,7,8,9,10,missing];b = [4,5,6,7,missing,8];c = [2,3,4,missing,5,6];d = [1,2,3,4,5,6];
+
+julia> StatsBase.handle_listwise([a,b],[c,d])
+([[6, 7, 8], [4, 5, 6]], [[2, 3, 4], [1, 2, 3]])
+```
+"""
+function handle_listwise(x, y)
+    if !(missing isa promoted_type(x) || missing isa promoted_type(y))
+        return x, y
+    end
+
+    nminds = .!ismissing.(first(x))
+    @inbounds for xi in Iterators.drop(x, 1)
+        nminds .&= .!ismissing.(xi)
+    end
+    if x !== y
+        @inbounds for yj in y
+            nminds .&= .!ismissing.(yj)
+        end
+    end
+
+    # Computing integer indices once for all vectors is faster
+    nminds′ = findall(nminds)
+    # TODO: check whether wrapping views in a custom array type which asserts
+    # that entries cannot be `missing` (similar to `skipmissing`)
+    # could offer better performance
 
-pairwise!(::typeof(cov), dest::AbstractMatrix, x, y;
-          symmetric::Bool=false, skipmissing::Symbol=:none) =
-    pairwise!(_cov, dest, x, y, symmetric=symmetric, skipmissing=skipmissing)
+    x′ = [disallowmissing(view(xi, nminds′)) for xi in x]
+    if x === y
+        return x′, x′
+    else
+        y′ = [disallowmissing(view(yi, nminds′)) for yi in y]
+        return x′, y′
+    end
+end
 
-pairwise(::typeof(cov), x, y; symmetric::Bool=false, skipmissing::Symbol=:none) =
-    pairwise(_cov, x, y, symmetric=symmetric, skipmissing=skipmissing)
+"""
+    handle_pairwise(x::AbstractVector, y::AbstractVector;
+    scratch_fx::AbstractVector=similar(x, nonmissingtype(eltype(x))),
+    scratch_fy::AbstractVector=similar(y, nonmissingtype(eltype(y))))
 
-pairwise!(::typeof(cov), dest::AbstractMatrix, x;
-         symmetric::Bool=true, skipmissing::Symbol=:none) =
-    pairwise!(_cov, dest, x, x, symmetric=symmetric, skipmissing=skipmissing)
+Return a pair `(a,b)`, filtered copies of `(x,y)`, in which elements `x[i]` and
+`y[i]` are excluded if  `ismissing(x[i])||ismissing(y[i])`.
+"""
+function handle_pairwise(x::AbstractVector, y::AbstractVector;
+    scratch_fx::AbstractVector=similar(x, nonmissingtype(eltype(x))),
+    scratch_fy::AbstractVector=similar(y, nonmissingtype(eltype(y))))
+
+    axes(x) == axes(y) || throw(DimensionMismatch("x and y have inconsistent dimensions"))
+    lb = first(axes(x, 1))
+    j = lb - 1
+    @inbounds for i in eachindex(x)
+        if !(ismissing(x[i]) || ismissing(y[i]))
+            j += 1
+            scratch_fx[j] = x[i]
+            scratch_fy[j] = y[i]
+        end
+    end
+
+    return view(scratch_fx, lb:j), view(scratch_fy, lb:j)
+end
 
-pairwise(::typeof(cov), x; symmetric::Bool=true, skipmissing::Symbol=:none) =
-    pairwise(_cov, x, x, symmetric=symmetric, skipmissing=skipmissing)
+#=Condition a) makes equal_sum_subsets useful for load-balancing the multi-threaded section
+of _pairwise! in the non-symmetric case, and condition b) for the symmetric case.=#
+"""
+    equal_sum_subsets(n::Int, num_subsets::Int)::Vector{Vector{Int}}
+
+Divide the integers 1:n into a number of subsets such that a) each subset has
+(approximately) the same number of elements; and b) the sum of the elements in each subset
+is nearly equal. If `n` is a multiple of `2 * num_subsets` both conditions hold exactly.
+
+## Example
+```julia-repl
+julia> StatsBase.equal_sum_subsets(30,5)
+5-element Vector{Vector{Int64}}:
+ [30, 21, 20, 11, 10, 1]
+ [29, 22, 19, 12, 9, 2]
+ [28, 23, 18, 13, 8, 3]
+ [27, 24, 17, 14, 7, 4]
+ [26, 25, 16, 15, 6, 5]
+```
+"""
+function equal_sum_subsets(n::Int, num_subsets::Int)::Vector{Vector{Int}}
+    subsets = [Int[] for _ in 1:min(n, num_subsets)]
+    writeto, scanup = 1, true
+    for i = n:-1:1
+        push!(subsets[writeto], i)
+        if scanup && writeto == num_subsets
+            scanup = false
+        elseif (!scanup) && writeto == 1
+            scanup = true
+        else
+            writeto += scanup ? 1 : -1
+        end
+    end
+    return subsets
+end
 
-pairwise!(::typeof(cor), dest::AbstractMatrix, x;
-          symmetric::Bool=true, skipmissing::Symbol=:none) =
-    pairwise!(cor, dest, x, x, symmetric=symmetric, skipmissing=skipmissing)
+"""
+    task_local_vector(key::Symbol, similarto::AbstractArray{V},
+    length::Int)::Vector{V} where {V}
 
-pairwise(::typeof(cor), x; symmetric::Bool=true, skipmissing::Symbol=:none) =
-    pairwise(cor, x, x, symmetric=symmetric, skipmissing=skipmissing)
+Retrieve from task local storage a vector of length `length` and matching the element
+type of `similarto`, with initialisation on first call during a task.
+"""
+function task_local_vector(key::Symbol, similarto::AbstractArray{V},
+    length::Int)::Vector{V} where {V}
+    haskey(task_local_storage(), key) || task_local_storage(key, similar(similarto, length))
+    return task_local_storage(key)
+end
\ No newline at end of file
diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index 95c5cb03e..1d5b3871a 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -11,110 +11,385 @@
 #######################################
 
 """
-    corspearman(x, y=x)
+    corspearman(x, y=x; skipmissing::Symbol=:none)
 
 Compute Spearman's rank correlation coefficient. If `x` and `y` are vectors, the
 output is a float, otherwise it's a matrix corresponding to the pairwise correlations
 of the columns of `x` and `y`.
+
+Uses multiple threads when either `x` or `y` is a matrix and `skipmissing` is `:pairwise`.
+
+# Keyword argument
+
+- `skipmissing::Symbol=:none`: If `:none` (the default), `missing` entries in `x` or `y`
+    give rise to `missing` entries in the return. If `:pairwise` when calculating an element
+    of the return, both `i`th entries of the input vectors are skipped if either is missing.
+    If `:listwise` the `i`th rows of both `x` and `y` are skipped if `missing` appears in
+    either; note that this might skip a high proportion of entries. Only allowed when `x` or
+    `y` is a matrix.
 """
-function corspearman(x::AbstractVector{<:Real}, y::AbstractVector{<:Real})
-    n = length(x)
-    n == length(y) || throw(DimensionMismatch("vectors must have same length"))
-    (any(isnan, x) || any(isnan, y)) && return NaN
-    return cor(tiedrank(x), tiedrank(y))
+function corspearman(x::AbstractVector, y::AbstractVector; skipmissing::Symbol=:none)
+    check_rankcor_args(x, y, skipmissing, false)
+    if x === y
+        return corspearman(x)
+    else
+        return corspearman_kernel!(x, y, skipmissing)
+    end
 end
 
-function corspearman(X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real})
-    size(X, 1) == length(y) ||
-        throw(DimensionMismatch("X and y have inconsistent dimensions"))
-    n = size(X, 2)
-    C = Matrix{Float64}(I, n, 1)
-    any(isnan, y) && return fill!(C, NaN)
-    yrank = tiedrank(y)
-    for j = 1:n
-        Xj = view(X, :, j)
-        if any(isnan, Xj)
-            C[j,1] = NaN
-        else
-            Xjrank = tiedrank(Xj)
-            C[j,1] = cor(Xjrank, yrank)
+function corspearman(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none)
+    return corspearman(x, reshape(y, (length(y), 1)); skipmissing)
+end
+
+function corspearman(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none)
+    return corspearman(reshape(x, (length(x), 1)), y; skipmissing)
+end
+
+function corspearman(x::AbstractMatrix, y::AbstractMatrix=x;
+    skipmissing::Symbol=:none)
+    check_rankcor_args(x, y, skipmissing, true)
+    return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing)
+end
+
+function corspearman(x::AbstractVector{T}) where {T}
+    return T === Missing ? missing : 1.0
+end
+
+function _pairwise!(::Val{:listwise}, f::typeof(corspearman), dest::AbstractMatrix, x, y,
+    symmetric::Bool)
+    return _pairwise!(Val(:none), f, dest, handle_listwise(x, y)..., symmetric)
+end
+
+function _pairwise!(::Val{:none}, f::typeof(corspearman),
+    dest::AbstractMatrix, x, y, symmetric::Bool)
+
+    symmetric = x === y
+    if symmetric && promoted_type(x) === Missing
+        offdiag = (length(x[1]) < 2) ? NaN : missing
+        @inbounds for i in axes(dest, 1), j in axes(dest, 2)
+            dest[i, j] = i == j ? missing : offdiag
         end
+        return dest
+    end
+
+    ranksx = ranks_matrix(x)
+    if symmetric
+        dest .= _cor(ranksx, ranksx)
+    else
+        ranksy = ranks_matrix(y)
+        dest .= _cor(ranksx, ranksy)
+    end
+
+    #=When elements x[i] and y[i] are identical (according to `===`) then dest[i,i] should
+    be 1.0 even in the presence of missing and NaN values. But the return from `_cor` does
+    not respect that requirement. So amend.=#
+    autocor = (eltype(dest) === Missing && skipmissing == :none) ? missing : 1.0
+    @inbounds for i in 1:min(size(dest, 1), size(dest, 2))
+        x[i] === y[i] && (dest[i, i] = autocor)
     end
-    return C
+    return dest
 end
 
-function corspearman(x::AbstractVector{<:Real}, Y::AbstractMatrix{<:Real})
-    size(Y, 1) == length(x) ||
-        throw(DimensionMismatch("x and Y have inconsistent dimensions"))
-    n = size(Y, 2)
-    C = Matrix{Float64}(I, 1, n)
-    any(isnan, x) && return fill!(C, NaN)
-    xrank = tiedrank(x)
-    for j = 1:n
-        Yj = view(Y, :, j)
-        if any(isnan, Yj)
-            C[1,j] = NaN
-        else
-            Yjrank = tiedrank(Yj)
-            C[1,j] = cor(xrank, Yjrank)
+function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
+    dest::AbstractMatrix{V}, x, y, symmetric::Bool) where {V}
+
+    nr, nc = size(dest)
+    m = length(x) == 0 ? 0 : length(first(x))
+    symmetric = x === y
+
+    # Swap x and y for more efficient threaded loop.
+    if nr < nc
+        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
+        _pairwise!(Val(:pairwise), f, dest′, y, x, symmetric)
+        dest .= transpose(dest′)
+        return dest
+    end
+
+    tempx = sortperm_matrix(x)
+    tempy = symmetric ? tempx : sortperm_matrix(y)
+
+    int64 = Int64[]
+    fl64 = Float64[]
+    nmtx = promoted_nmtype(x)[]
+    nmty = promoted_nmtype(y)[]
+    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+
+        for j in subset
+
+            inds = task_local_vector(:inds, int64, m)
+            spnmx = task_local_vector(:spnmx, int64, m)
+            spnmy = task_local_vector(:spnmy, int64, m)
+            nmx = task_local_vector(:nmx, nmtx, m)
+            nmy = task_local_vector(:nmy, nmty, m)
+            ranksx = task_local_vector(:ranksx, fl64, m)
+            ranksy = task_local_vector(:ranksy, fl64, m)
+
+            for i = 1:(symmetric ? j : nc)
+                # For performance, diagonal is special-cased
+                if x[j] === y[i] && i == j && V !== Union{}
+                    if missing isa V && eltype(x[j]) == Missing
+                        dest[j, i] = missing
+                    else
+                        dest[j, i] = 1.0
+                    end
+                else
+                    dest[j, i] = corspearman_kernel!(x[j], y[i], :pairwise,
+                        view(tempx, :, j), view(tempy, :, i), inds, spnmx, spnmy, nmx,
+                        nmy, ranksx, ranksy)
+                end
+                symmetric && (dest[i, j] = dest[j, i])
+            end
         end
     end
-    return C
+    return dest
 end
 
-function corspearman(X::AbstractMatrix{<:Real})
-    n = size(X, 2)
-    C = Matrix{Float64}(I, n, n)
-    anynan = Vector{Bool}(undef, n)
-    for j = 1:n
-        Xj = view(X, :, j)
-        anynan[j] = any(isnan, Xj)
-        if anynan[j]
-            C[:,j] .= NaN
-            C[j,:] .= NaN
-            C[j,j] = 1
-            continue
-        end
-        Xjrank = tiedrank(Xj)
-        for i = 1:(j-1)
-            Xi = view(X, :, i)
-            if anynan[i]
-                C[i,j] = C[j,i] = NaN
+"""
+    corspearman_kernel!(x::AbstractVector{T}, y::AbstractVector{U},
+    skipmissing::Symbol, sortpermx=sortperm(x), sortpermy=sortperm(y),
+    inds=zeros(Int64, length(x)), spnmx=zeros(Int64, length(x)),
+    spnmy=zeros(Int64, length(x)), nmx=similar(x, nonmissingtype(T)),
+    nmy=similar(y, nonmissingtype(U)), ranksx=similar(x, Float64),
+    ranksy=similar(y, Float64)) where {T,U}
+
+Compute Spearman's rank correlation coefficient between `x` and `y` with no allocations if
+all arguments are provided.
+
+All vector arguments must have the same axes.
+- `sortpermx`: The sort permutation of `x`.
+- `sortpermy`: The sort permutation of `y`.
+- `inds` ... `ranksy`: Pre-allocated "scratch" arguments.
+
+## Example
+```julia-repl
+julia> using StatsBase, BenchmarkTools, Random
+
+julia> Random.seed!(1);
+
+julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05,\
+missing,randn(1000));
+
+julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);\
+spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000);
+
+julia> nmx=zeros(1000);nmy=zeros(1000);ranksx=similar(x,Float64);ranksy=similar(y,Float64);
+
+julia> @btime corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\
+\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy)
+4.671 μs (0 allocations: 0 bytes)
+-0.016058512110609713
+```
+"""
+function corspearman_kernel!(x::AbstractVector{T}, y::AbstractVector{U},
+    skipmissing::Symbol, sortpermx=sortperm(x), sortpermy=sortperm(y),
+    inds=zeros(Int64, length(x)), spnmx=zeros(Int64, length(x)),
+    spnmy=zeros(Int64, length(x)), nmx=similar(x, nonmissingtype(T)),
+    nmy=similar(y, nonmissingtype(U)), ranksx=similar(x, Float64),
+    ranksy=similar(y, Float64)) where {T,U}
+
+    (axes(x) == axes(sortpermx) == axes(y) == axes(sortpermy) == axes(inds) ==
+     axes(spnmx) == axes(spnmy) == axes(nmx) == axes(nmy) == axes(ranksx) ==
+     axes(ranksy)) || throw(ArgumentError("Axes of inputs must match"))
+
+    if skipmissing == :pairwise
+
+        lb = first(axes(x, 1))
+        k = lb
+        #= Process (x,y) to obtain (nmx,nmy) by filtering out elements at position k if
+        either x[k] or y[k] is missing. inds provides the mapping of elements of x (or y) to
+        elements of nmx (or nmy) i.e. x[k] maps to nmx[inds[k]]. inds is then used to obtain
+        spnmx and spnmy more efficiently than calling sortperm(nmx) and sortperm(nmy).
+         =#
+        @inbounds for i in axes(x, 1)
+            if !(ismissing(x[i]) || ismissing(y[i]))
+                inds[i] = k
+                nmx[k] = x[i]
+                nmy[k] = y[i]
+                k += 1
             else
-                Xirank = tiedrank(Xi)
-                C[i,j] = C[j,i] = cor(Xjrank, Xirank)
+                inds[i] = lb - 1
+            end
+        end
+
+        nnm = k - 1
+        if nnm <= 1
+            return NaN
+        end
+        nmx = view(nmx, lb:nnm)
+        nmy = view(nmy, lb:nnm)
+
+        if any(_isnan, nmx) || any(_isnan, nmy)
+            return NaN
+        end
+
+        k = lb
+        @inbounds for i in axes(x, 1)
+            if (inds[sortpermx[i]]) != lb - 1
+                spnmx[k] = inds[sortpermx[i]]
+                k += 1
+            end
+        end
+        spnmx = view(spnmx, lb:nnm)
+
+        k = lb
+        @inbounds for i in axes(y, 1)
+            if (inds[sortpermy[i]]) != lb - 1
+                spnmy[k] = inds[sortpermy[i]]
+                k += 1
             end
         end
+        spnmy = view(spnmy, lb:nnm)
+
+        if nnm <= 1
+            return NaN
+        end
+
+        _tiedrank!(view(ranksx, 1:nnm), nmx, spnmx)
+        _tiedrank!(view(ranksy, 1:nnm), nmy, spnmy)
+
+        return cor(view(ranksx, 1:nnm), view(ranksy, 1:nnm))
+
+    else
+        if length(x) <= 1
+            return NaN
+        elseif skipmissing == :none && (missing isa T || missing isa U) &&
+               (any(ismissing, x) || any(ismissing, y))
+            return missing
+        elseif any(_isnan, x) || any(_isnan, y)
+            return NaN
+        end
+
+        _tiedrank!(ranksx, x, sortpermx)
+        _tiedrank!(ranksy, y, sortpermy)
+        return cor(ranksx, ranksy)
     end
-    return C
 end
 
-function corspearman(X::AbstractMatrix{<:Real}, Y::AbstractMatrix{<:Real})
-    size(X, 1) == size(Y, 1) ||
-        throw(ArgumentError("number of rows in each array must match"))
-    nr = size(X, 2)
-    nc = size(Y, 2)
-    C = Matrix{Float64}(undef, nr, nc)
-    for j = 1:nr
-        Xj = view(X, :, j)
-        if any(isnan, Xj)
-            C[j,:] .= NaN
-            continue
+# Auxiliary functions for Spearman's rank correlation
+
+"""
+    _cor(x, y)
+Work-around various "unhelpful" features of cor:
+a) Ensures that on-diagonal elements of the return are as they should be in the symmetric
+case i.e. 1.0 unless eltype(x) is Missing in which case on-diagonal elements should be
+missing.
+b) Ensure that _cor(a,b) is NaN when a and b are vectors of equal length less than 2
+c) Works around some edge-case bugs in cor's handling of `missing` where the function throws
+if `x` and `y` are matrices but nevertheless looping around the columns of `x` and `y`
+works. https://github.com/JuliaStats/Statistics.jl/issues/63
+
+# Example
+```julia-repl
+julia> x = y = fill(missing,2,2)
+2×2 Matrix{Missing}:
+ missing  missing
+ missing  missing
+
+julia> Statistics.cor(x,y)
+ERROR: MethodError: no method matching copy(::Missing)
+
+julia> StatsBase._cor(x,y)
+2×2 Matrix{Union{Missing, Float64}}:
+ 1.0        missing
+  missing  1.0
+
+julia>
+
+```
+"""
+function _cor(ranksx::AbstractMatrix{T}, ranksy::AbstractMatrix{U}) where {T,U}
+    symmetric = ranksx === ranksy
+
+    if size(ranksx, 1) < 2
+        if symmetric && T === Missing
+            return ifelse.(axes(ranksx, 2) .== axes(ranksx, 2)', missing, NaN)
+        elseif symmetric
+            return ifelse.(axes(ranksx, 2) .== axes(ranksx, 2)', 1.0, NaN)
+        else
+            return fill(NaN, size(ranksx, 2), size(ranksy, 2))
         end
-        Xjrank = tiedrank(Xj)
-        for i = 1:nc
-            Yi = view(Y, :, i)
-            if any(isnan, Yi)
-                C[j,i] = NaN
-            else
-                Yirank = tiedrank(Yi)
-                C[j,i] = cor(Xjrank, Yirank)
+    end
+    try
+        if symmetric
+            return cor(ranksx)
+        else
+            return cor(ranksx, ranksy)
+        end
+    catch
+        #=This catch block is hit when e.g.
+        ranksx === ranksy = [missing missing;missing missing]
+        =#
+        nr, nc = size(ranksx, 2), size(ranksy, 2)
+        if ranksx === ranksy && T === Missing
+            return fill(missing, nr, nc)
+        elseif missing isa T || missing isa U
+            C = ones(Union{Missing,Float64}, nr, nc)
+        else
+            C = ones(Float64, nr, nc)
+        end
+
+        for j = (symmetric ? 2 : 1):nr
+            for i = 1:(symmetric ? j - 1 : nc)
+                C[j, i] = cor(view(ranksx, :, j), view(ranksy, :, i))
+                symmetric && (C[i, j] = C[j, i])
             end
         end
+        return C
+    end
+end
+
+"""
+    sortperm_matrix(x)
+
+Given `x`, a vector of vectors, return a matrix who's ith column is the sort permutation of
+the ith element of x.
+"""
+function sortperm_matrix(x)
+    m = length(x) == 0 ? 0 : length(first(x))
+    nc = length(x)
+    int64 = Int64[]
+    out = Array{Int}(undef, m, nc)
+
+    Threads.@threads for i in 1:nc
+        ints = task_local_vector(:ints, int64, m)
+        sortperm!(ints, x[i])
+        out[:, i] .= ints
     end
-    return C
+    return out
 end
 
+"""
+    ranks_matrix(x)
+
+Given `x`, a vector of vectors, return a matrix such that the (Pearson) correlaton between
+columns of the return is the Spearman rank correlation between the elements of x.
+"""
+function ranks_matrix(x)
+
+    m = length(x) == 0 ? 0 : length(first(x))
+    nc = length(x)
+    int64 = Int64[]
+
+    if promoted_type(x) === Missing
+        return fill(missing, m, nc)
+    end
+
+    out = Array{Union{Missing,Int,Float64}}(undef, m, nc)
+
+    Threads.@threads for i in 1:nc
+        ints = task_local_vector(:ints, int64, m)
+
+        if any(ismissing, x[i])
+            out[:, i] .= missing
+        elseif any(_isnan, x[i])
+            out[:, i] .= NaN
+        else
+            sortperm!(ints, x[i])
+            _tiedrank!(view(out, :, i), x[i], ints)
+        end
+    end
+    return out
+end
 
 #######################################
 #
@@ -122,17 +397,202 @@ end
 #
 #######################################
 
+"""
+    corkendall(x, y=x; skipmissing::Symbol=:none)
+
+Compute Kendall's rank correlation coefficient, τ. `x` and `y` must be either vectors or
+matrices, and entries may be `missing`.
+
+Uses multiple threads when either `x` or `y` is a matrix.
+
+# Keyword argument
+
+- `skipmissing::Symbol=:none`: If `:none` (the default), `missing` entries in `x` or `y`
+    give rise to `missing` entries in the return. If `:pairwise` when calculating an element
+    of the return, both `i`th entries of the input vectors are skipped if either is missing.
+    If `:listwise` the `i`th rows of both `x` and `y` are skipped if `missing` appears in
+    either; note that this might skip a high proportion of entries. Only allowed when `x` or
+    `y` is a matrix.
+"""
+function corkendall(x::AbstractMatrix, y::AbstractMatrix=x;
+    skipmissing::Symbol=:none)
+    check_rankcor_args(x, y, skipmissing, true)
+    return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing)
+end
+
+function corkendall(x::AbstractVector, y::AbstractVector;
+    skipmissing::Symbol=:none)
+    check_rankcor_args(x, y, skipmissing, false)
+    if x === y
+        return corkendall(x)
+    else
+        x = copy(x)
+        permx = sortperm(x)
+        permute!(x, permx)
+        return corkendall_kernel!(x, y, permx, skipmissing)
+    end
+end
+
+#= corkendall returns a vector in this case, inconsistent with with Statistics.cor and
+StatsBase.corspearman, but consistent with StatsBase.corkendall.
+ =#
+function corkendall(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none)
+    return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing))
+end
+
+function corkendall(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none)
+    return corkendall(reshape(x, (length(x), 1)), y; skipmissing)
+end
+
+function corkendall(x::AbstractVector{T}) where {T}
+    return T === Missing ? missing : 1.0
+end
+
+function _pairwise!(::Val{:none}, f::typeof(corkendall), dest::AbstractMatrix, x, y,
+    symmetric::Bool)
+    return corkendall_loop!(:none, f, dest, x, y, symmetric)
+end
+
+function _pairwise!(::Val{:pairwise}, f::typeof(corkendall), dest::AbstractMatrix, x, y,
+    symmetric::Bool)
+    return corkendall_loop!(:pairwise, f, dest, x, y, symmetric)
+end
+
+function _pairwise!(::Val{:listwise}, f::typeof(corkendall), dest::AbstractMatrix, x, y,
+    symmetric::Bool)
+    return corkendall_loop!(:none, f, dest, handle_listwise(x, y)..., symmetric)
+end
+
+function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::AbstractMatrix{V},
+    x, y, symmetric::Bool) where {V}
+
+    nr, nc = size(dest)
+    m = length(x) == 0 ? 0 : length(first(x))
+
+    # Swap x and y for more efficient threaded loop.
+    if nr < nc
+        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
+        corkendall_loop!(skipmissing, f, dest′, y, x, symmetric)
+        dest .= transpose(dest′)
+        return dest
+    end
+
+    intvec = Int[]
+    t = promoted_type(x)[]
+    u = promoted_type(y)[]
+    t′ = promoted_nmtype(x)[]
+    u′ = promoted_nmtype(y)[]
+
+    symmetric = x === y
+
+    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+
+        for j in subset
+
+            sortedxj = task_local_vector(:sortedxj, t, m)
+            scratch_py = task_local_vector(:scratch_py, u, m)
+            yi = task_local_vector(:yi, u, m)
+            permx = task_local_vector(:permx, intvec, m)
+            # Ensuring missing is not an element type of scratch_sy, scratch_fx, scratch_fy
+            # gives improved performance.
+            scratch_sy = task_local_vector(:scratch_sy, u′, m)
+            scratch_fx = task_local_vector(:scratch_fx, t′, m)
+            scratch_fy = task_local_vector(:scratch_fy, t′, m)
+
+            sortperm!(permx, x[j])
+            @inbounds for k in eachindex(sortedxj)
+                sortedxj[k] = x[j][permx[k]]
+            end
+
+            for i = 1:(symmetric ? j : nc)
+                # For performance, diagonal is special-cased
+                if x[j] === y[i] && i == j && V !== Union{}
+                    if missing isa V && eltype(x[j]) == Missing
+                        dest[j, i] = missing
+                    else
+                        dest[j, i] = 1.0
+                    end
+                else
+                    yi .= y[i]
+                    dest[j, i] = corkendall_kernel!(sortedxj, yi, permx, skipmissing;
+                        scratch_py, scratch_sy, scratch_fx, scratch_fy)
+                end
+                symmetric && (dest[i, j] = dest[j, i])
+            end
+        end
+    end
+    return dest
+end
+
+# Auxiliary functions for Kendall's rank correlation
+
 # Knight, William R. “A Computer Method for Calculating Kendall's Tau with Ungrouped Data.”
 # Journal of the American Statistical Association, vol. 61, no. 314, 1966, pp. 436–439.
 # JSTOR, www.jstor.org/stable/2282833.
-function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx::AbstractArray{<:Integer}=sortperm(x))
-    if any(isnan, x) || any(isnan, y) return NaN end
-    n = length(x)
-    if n != length(y) error("Vectors must have same length") end
 
-    # Initial sorting
-    permute!(x, permx)
-    permute!(y, permx)
+function check_rankcor_args(x, y, skipmissing, allowlistwise::Bool)
+    Base.require_one_based_indexing(x, y)
+    size(x, 1) == size(y, 1) ||
+        throw(DimensionMismatch("x and y have inconsistent dimensions"))
+    if allowlistwise
+        skipmissing in (:none, :pairwise, :listwise) ||
+            throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, \
+            but got :$skipmissing"))
+    else
+        skipmissing in (:none, :pairwise) ||
+            throw(ArgumentError("skipmissing must be either :none or :pairwise, but \
+            got :$skipmissing"))
+    end
+end
+
+"""
+    corkendall_kernel!(sortedx::AbstractVector, y::AbstractVector,
+    permx::AbstractVector{<:Integer}, skipmissing::Symbol;
+    scratch_py::AbstractVector=similar(y),
+    scratch_sy::AbstractVector=similar(y),
+    scratch_fx::AbstractVector=similar(sortedx),
+    scratch_fy::AbstractVector=similar(y))
+
+Kendall correlation between two vectors but omitting the initial sorting of the first
+argument. Calculating Kendall correlation between `x` and `y` is thus a two stage process:
+a) sort `x` to get `sortedx`; b) call this function on `sortedx` and `y`, with
+subsequent arguments:
+- `permx`: The permutation that sorts `x` to yield `sortedx`.
+- `scratch_py`: A vector used to permute `y` without allocation.
+- `scratch_sy`: A vector used to sort `y` without allocation.
+- `scratch_fx, scratch_fy`: Vectors used to filter `missing`s from `x` and `y` without
+   allocation.
+"""
+function corkendall_kernel!(sortedx::AbstractVector{T}, y::AbstractVector{U},
+    permx::AbstractVector{<:Integer}, skipmissing::Symbol;
+    scratch_py::AbstractVector{V}=similar(y),
+    scratch_sy::AbstractVector=similar(y),
+    scratch_fx::AbstractVector=similar(sortedx),
+    scratch_fy::AbstractVector=similar(y)) where {T,U,V}
+
+    length(sortedx) >= 2 || return NaN
+
+    if skipmissing == :none
+        if missing isa T && any(ismissing, sortedx)
+            return missing
+        elseif missing isa U && any(ismissing, y)
+            return missing
+        end
+    end
+
+    @inbounds for i in eachindex(y)
+        scratch_py[i] = y[permx[i]]
+    end
+
+    if missing isa T || missing isa V
+        sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx, scratch_fy)
+    else
+        permutedy = scratch_py
+    end
+
+    (any(_isnan, sortedx) || any(_isnan, permutedy)) && return NaN
+
+    n = length(sortedx)
 
     # Use widen to avoid overflows on both 32bit and 64bit
     npairs = div(widen(n) * (n - 1), 2)
@@ -140,80 +600,35 @@ function corkendall!(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}, permx
     k = 0
 
     @inbounds for i = 2:n
-        if x[i - 1] == x[i]
+        if sortedx[i-1] == sortedx[i]
             k += 1
         elseif k > 0
-            # Sort the corresponding chunk of y, so the rows of hcat(x,y) are
-            # sorted first on x, then (where x values are tied) on y. Hence
-            # double ties can be counted by calling countties.
-            sort!(view(y, (i - k - 1):(i - 1)))
+            #=
+            Sort the corresponding chunk of permutedy, so rows of hcat(sortedx,permutedy)
+            are sorted first on sortedx, then (where sortedx values are tied) on permutedy.
+            Hence double ties can be counted by calling countties.
+            =#
+            sort!(view(permutedy, (i-k-1):(i-1)))
             ntiesx += div(widen(k) * (k + 1), 2) # Must use wide integers here
-            ndoubleties += countties(y,  i - k - 1, i - 1)
+            ndoubleties += countties(permutedy, i - k - 1, i - 1)
             k = 0
         end
     end
     if k > 0
-        sort!(view(y, (n - k):n))
+        sort!(view(permutedy, (n-k):n))
         ntiesx += div(widen(k) * (k + 1), 2)
-        ndoubleties += countties(y, n - k, n)
+        ndoubleties += countties(permutedy, n - k, n)
     end
 
-    nswaps = merge_sort!(y, 1, n)
-    ntiesy = countties(y, 1, n)
+    nswaps = merge_sort!(permutedy, 1, n, scratch_sy)
+    ntiesy = countties(permutedy, 1, n)
 
     # Calls to float below prevent possible overflow errors when
-    # length(x) exceeds 77_936 (32 bit) or 5_107_605_667 (64 bit)
-    (npairs + ndoubleties - ntiesx - ntiesy - 2 * nswaps) /
-     sqrt(float(npairs - ntiesx) * float(npairs - ntiesy))
-end
-
-"""
-    corkendall(x, y=x)
-
-Compute Kendall's rank correlation coefficient, τ. `x` and `y` must both be either
-matrices or vectors.
-"""
-corkendall(x::AbstractVector{<:Real}, y::AbstractVector{<:Real}) = corkendall!(copy(x), copy(y))
-
-function corkendall(X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real})
-    permy = sortperm(y)
-    return([corkendall!(copy(y), X[:,i], permy) for i in 1:size(X, 2)])
-end
-
-function corkendall(x::AbstractVector{<:Real}, Y::AbstractMatrix{<:Real})
-    n = size(Y, 2)
-    permx = sortperm(x)
-    return(reshape([corkendall!(copy(x), Y[:,i], permx) for i in 1:n], 1, n))
-end
-
-function corkendall(X::AbstractMatrix{<:Real})
-    n = size(X, 2)
-    C = Matrix{Float64}(I, n, n)
-    for j = 2:n
-        permx = sortperm(X[:,j])
-        for i = 1:j - 1
-            C[j,i] = corkendall!(X[:,j], X[:,i], permx)
-            C[i,j] = C[j,i]
-        end
-    end
-    return C
+    # length(sortedx) exceeds 77_936 (32 bit) or 5_107_605_667 (64 bit)
+    return (npairs + ndoubleties - ntiesx - ntiesy - 2 * nswaps) /
+           sqrt(float(npairs - ntiesx) * float(npairs - ntiesy))
 end
 
-function corkendall(X::AbstractMatrix{<:Real}, Y::AbstractMatrix{<:Real})
-    nr = size(X, 2)
-    nc = size(Y, 2)
-    C = Matrix{Float64}(undef, nr, nc)
-    for j = 1:nr
-        permx = sortperm(X[:,j])
-        for i = 1:nc
-            C[j,i] = corkendall!(X[:,j], Y[:,i], permx)
-        end
-    end
-    return C
-end
-
-# Auxiliary functions for Kendall's rank correlation
-
 """
     countties(x::AbstractVector{<:Real}, lo::Integer, hi::Integer)
 
@@ -224,8 +639,8 @@ function countties(x::AbstractVector, lo::Integer, hi::Integer)
     # length(x) exceeds 2^16 (32 bit) or 2^32 (64 bit)
     thistiecount = result = widen(0)
     checkbounds(x, lo:hi)
-    @inbounds for i = (lo + 1):hi
-        if x[i] == x[i - 1]
+    @inbounds for i = (lo+1):hi
+        if x[i] == x[i-1]
             thistiecount += 1
         elseif thistiecount > 0
             result += div(thistiecount * (thistiecount + 1), 2)
@@ -236,7 +651,7 @@ function countties(x::AbstractVector, lo::Integer, hi::Integer)
     if thistiecount > 0
         result += div(thistiecount * (thistiecount + 1), 2)
     end
-    result
+    return result
 end
 
 # Tests appear to show that a value of 64 is optimal,
@@ -246,12 +661,15 @@ const SMALL_THRESHOLD = 64
 # merge_sort! copied from Julia Base
 # (commit 28330a2fef4d9d149ba0fd3ffa06347b50067647, dated 20 Sep 2020)
 """
-    merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0))
+    merge_sort!(v::AbstractVector, lo::Integer, hi::Integer,
+    t::AbstractVector=similar(v, 0))
 
 Mutates `v` by sorting elements `x[lo:hi]` using the merge sort algorithm.
-This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance.
+This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort
+distance.
 """
-function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVector=similar(v, 0))
+function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer,
+    t::AbstractVector=similar(v, 0))
     # Use of widen below prevents possible overflow errors when
     # length(v) exceeds 2^16 (32 bit) or 2^32 (64 bit)
     nswaps = widen(0)
@@ -261,7 +679,7 @@ function merge_sort!(v::AbstractVector, lo::Integer, hi::Integer, t::AbstractVec
         m = midpoint(lo, hi)
         (length(t) < m - lo + 1) && resize!(t, m - lo + 1)
 
-        nswaps = merge_sort!(v, lo,  m, t)
+        nswaps = merge_sort!(v, lo, m, t)
         nswaps += merge_sort!(v, m + 1, hi, t)
 
         i, j = 1, lo
@@ -294,25 +712,28 @@ end
 
 # insertion_sort! and midpoint copied from Julia Base
 # (commit 28330a2fef4d9d149ba0fd3ffa06347b50067647, dated 20 Sep 2020)
-midpoint(lo::T, hi::T) where T <: Integer = lo + ((hi - lo) >>> 0x01)
+midpoint(lo::T, hi::T) where {T<:Integer} = lo + ((hi - lo) >>> 0x01)
 midpoint(lo::Integer, hi::Integer) = midpoint(promote(lo, hi)...)
 
 """
     insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer)
 
 Mutates `v` by sorting elements `x[lo:hi]` using the insertion sort algorithm.
-This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort distance.
+This method is a copy-paste-edit of sort! in base/sort.jl, amended to return the bubblesort
+distance.
 """
 function insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer)
-    if lo == hi return widen(0) end
+    if lo == hi
+        return widen(0)
+    end
     nswaps = widen(0)
-    @inbounds for i = lo + 1:hi
+    @inbounds for i = lo+1:hi
         j = i
         x = v[i]
         while j > lo
-            if x < v[j - 1]
+            if x < v[j-1]
                 nswaps += 1
-                v[j] = v[j - 1]
+                v[j] = v[j-1]
                 j -= 1
                 continue
             end
@@ -322,3 +743,12 @@ function insertion_sort!(v::AbstractVector, lo::Integer, hi::Integer)
     end
     return nswaps
 end
+
+# Auxiliary functions for both Kendall's and Spearman's rank correlations
+
+# _isnan required so that corkendall and corspearman have correct handling of NaNs and
+# can also accept arguments with element type for which isnan is not defined but isless is
+# is defined, so that rank correlation makes sense.
+_isnan(x::T) where {T<:Number} = isnan(x)
+_isnan(x) = false
+
diff --git a/test/pairwise.jl b/test/pairwise.jl
index 59e081b08..35ecd8279 100644
--- a/test/pairwise.jl
+++ b/test/pairwise.jl
@@ -9,8 +9,14 @@ Random.seed!(1)
 # to avoid using specialized method
 arbitrary_fun(x, y) = cor(x, y)
 
-@testset "pairwise and pairwise! with $f" for f in (arbitrary_fun, cor, cov)
+@testset "pairwise and pairwise! with $f" for f in (arbitrary_fun, cor, cov, corkendall, corspearman)
+
+    isrankcor = f in (corkendall, corspearman)
+    iscor = isrankcor || f == cor
+    throwsforzerolengthinput = f in (arbitrary_fun, cor, cov)
+
     @testset "basic interface" begin
+
         x = [rand(10) for _ in 1:4]
         y = [rand(Float32, 10) for _ in 1:5]
         # to test case where inference of returned eltype fails
@@ -23,24 +29,35 @@ arbitrary_fun(x, y) = cor(x, y)
         @test res == res2 == [f(xi, yi) for xi in x, yi in y]
 
         res = pairwise(f, y, z)
-        @test res isa Matrix{Float32}
-        res2 = zeros(Float32, size(res))
-        @test pairwise!(f, res2, y, z) === res2
-        @test res == res2 == [f(yi, zi) for yi in y, zi in z]
+        if isrankcor
+            @test res isa Matrix{Float64}
+            @test res == [f(yi, zi) for yi in y, zi in z]
+        else
+            @test res isa Matrix{Float32}
+            res2 = zeros(Float32, size(res))
+            @test pairwise!(f, res2, y, z) === res2
+            @test res == res2 == [f(yi, zi) for yi in y, zi in z]
+        end
 
         res = pairwise(f, Any[[1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]])
         @test res isa Matrix{Float64}
         res2 = zeros(AbstractFloat, size(res))
         @test pairwise!(f, res2, Any[[1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]]) === res2
-        @test res == res2 ==
-            [f(xi, yi) for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]),
-                           yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0])]
+
+        res3 = [f(xi, yi) for xi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0]),
+                yi in ([1.0, 2.0, 3.0], [1.0f0, 3.0f0, 10.5f0])]
+        @test res == res2
+        @test all(isapprox.(res2, res3))
+
         @test res isa Matrix{Float64}
 
         @inferred pairwise(f, x, y)
-
-        @test_throws Union{ArgumentError,MethodError} pairwise(f, [Int[]], [Int[]])
-        @test_throws Union{ArgumentError,MethodError} pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
+        if throwsforzerolengthinput
+            # @test_throws Union{ArgumentError,MethodError} pairwise(f, [Int[]], [Int[]])
+            @test_throws CompositeException pairwise(f, [Int[]], [Int[]])
+            #@test_throws Union{ArgumentError,MethodError} pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
+            @test_throws CompositeException pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
+        end
 
         res = pairwise(f, [], [])
         @test size(res) == (0, 0)
@@ -69,7 +86,18 @@ arbitrary_fun(x, y) = cor(x, y)
         @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), x, y)
         @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), [], [])
         @test_throws DimensionMismatch pairwise!(f, zeros(0, 0),
-                                                 [], [[1, 2], [2, 3]])
+            [], [[1, 2], [2, 3]])
+    end
+
+    if f in (cor, corkendall, corspearman)
+        @testset "handling === elements" begin
+            xm = [missing, 1, 3, 2]
+            xn = [NaN, 1, 3, 2]
+            xmn = [missing, NaN, 1, 3, 2]
+            @test isequal(pairwise(f, [xm, xm], [xm, copy(xm)]), [1.0 missing; missing missing])
+            @test isequal(pairwise(f, [xn, xn], [xn, copy(xn)]), [1.0 NaN; NaN NaN])
+            @test isequal(pairwise(f, [xmn, xmn], [xmn, copy(xmn)]), [1.0 missing; missing missing])
+        end
     end
 
     @testset "missing values handling interface" begin
@@ -79,77 +107,99 @@ arbitrary_fun(x, y) = cor(x, y)
 
         res = pairwise(f, xm, ym)
         @test res isa Matrix{Missing}
-        res2 = zeros(Union{Float64, Missing}, size(res))
+        res2 = zeros(Union{Float64,Missing}, size(res))
         @test pairwise!(f, res2, xm, ym) === res2
         @test res ≅ res2 ≅ [missing for xi in xm, yi in ym]
 
         res = pairwise(f, xm, ym, skipmissing=:pairwise)
         @test res isa Matrix{Float64}
-        res2 = zeros(Union{Float64, Missing}, size(res))
+        res2 = zeros(Union{Float64,Missing}, size(res))
         @test pairwise!(f, res2, xm, ym, skipmissing=:pairwise) === res2
         @test res ≅ res2
-        @test isapprox(res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym],
-                       rtol=1e-6)
+        # Use myskipmissings rather than skipmissings to avoid deprecation warning
+        function myskipmissings(x, y)
+            which = @. !(ismissing(x) || ismissing(y))
+            return x[which], y[which]
+        end
+
+        #@test isapprox(res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym],
+        #    rtol=1e-6)
+        @test isapprox(res, [f(myskipmissings(xi, yi)...) for xi in xm, yi in ym],
+            rtol=1e-6)
 
         res = pairwise(f, ym, zm, skipmissing=:pairwise)
-        @test res isa Matrix{Float32}
-        res2 = zeros(Union{Float32, Missing}, size(res))
-        @test pairwise!(f, res2, ym, zm, skipmissing=:pairwise) === res2
-        @test res ≅ res2
-        @test isapprox(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm],
-                       rtol=1e-6)
+        if isrankcor
+            @test res isa Matrix{Float64}
+            #    @test isequal(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm])
+            @test isequal(res, [f(myskipmissings(yi, zi)...) for yi in ym, zi in zm])
+        else
+            @test res isa Matrix{Float32}
+            res2 = zeros(Union{Float32,Missing}, size(res))
+            @test pairwise!(f, res2, ym, zm, skipmissing=:pairwise) === res2
+            @test res ≅ res2
+            #@test isapprox(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm],
+            #    rtol=1e-6)
+            @test isapprox(res, [f(myskipmissings(yi, zi)...) for yi in ym, zi in zm],
+                rtol=1e-6)
 
+        end
         nminds = mapreduce(x -> .!ismissing.(x),
-                        (x, y) -> x .& y,
-                        [xm; ym])
+            (x, y) -> x .& y,
+            [xm; ym])
         res = pairwise(f, xm, ym, skipmissing=:listwise)
         @test res isa Matrix{Float64}
-        res2 = zeros(Union{Float64, Missing}, size(res))
+        res2 = zeros(Union{Float64,Missing}, size(res))
         @test pairwise!(f, res2, xm, ym, skipmissing=:listwise) === res2
         @test res ≅ res2
         @test isapprox(res, [f(view(xi, nminds), view(yi, nminds)) for xi in xm, yi in ym],
-                       rtol=1e-6)
+            rtol=1e-6)
 
         if VERSION >= v"1.6.0-DEV"
             # inference of cor fails so use an inferrable function
-            # to check that pairwise itself is inferrable
+            # to check that StatsBase.pairwise itself is inferrable
             for skipmissing in (:none, :pairwise, :listwise)
                 g(x, y=x) = pairwise((x, y) -> x[1] * y[1], x, y, skipmissing=skipmissing)
-                @test Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) ==
-                    Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}},
-                                                    Vector{Vector{Union{Float64, Missing}}}}) ==
-                    Matrix{<: Union{Float64, Missing}}
+                @test Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64,Missing}}}}) ==
+                      Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64,Missing}}},
+                          Vector{Vector{Union{Float64,Missing}}}}) ==
+                      Matrix{<:Union{Float64,Missing}}
                 if skipmissing in (:pairwise, :listwise)
-                    @test_broken Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}}}) ==
-                        Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64, Missing}}},
-                                                        Vector{Vector{Union{Float64, Missing}}}}) ==
-                        Matrix{Float64}
+                    @test_broken Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64,Missing}}}}) ==
+                                 Core.Compiler.return_type(g, Tuple{Vector{Vector{Union{Float64,Missing}}},
+                                     Vector{Vector{Union{Float64,Missing}}}}) ==
+                                 Matrix{Float64}
                 end
             end
         end
 
         @test_throws ArgumentError pairwise(f, xm, ym, skipmissing=:something)
-        @test_throws ArgumentError pairwise!(f, zeros(Union{Float64, Missing},
-                                                      length(xm), length(ym)), xm, ym,
-                                             skipmissing=:something)
+        @test_throws ArgumentError pairwise!(f, zeros(Union{Float64,Missing},
+                length(xm), length(ym)), xm, ym,
+            skipmissing=:something)
 
         # variable with only missings
         xm = [fill(missing, 10), rand(10)]
         ym = [rand(10), rand(10)]
 
         res = pairwise(f, xm, ym)
-        @test res isa Matrix{Union{Float64, Missing}}
-        res2 = zeros(Union{Float64, Missing}, size(res))
+        @test res isa Matrix{Union{Float64,Missing}}
+        res2 = zeros(Union{Float64,Missing}, size(res))
         @test pairwise!(f, res2, xm, ym) === res2
         @test res ≅ res2 ≅ [f(xi, yi) for xi in xm, yi in ym]
 
         if VERSION >= v"1.5" # Fails with UndefVarError on Julia 1.0
-            @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:pairwise)
-            @test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:listwise)
-
-            res = zeros(Union{Float64, Missing}, length(xm), length(ym))
-            @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:pairwise)
-            @test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:listwise)
+            if throwsforzerolengthinput
+                #@test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:pairwise)
+                @test_throws CompositeException pairwise(f, xm, ym, skipmissing=:pairwise)
+                #@test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:listwise)
+                @test_throws CompositeException pairwise(f, xm, ym, skipmissing=:listwise)
+
+                res = zeros(Union{Float64,Missing}, length(xm), length(ym))
+                #@test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:pairwise)
+                @test_throws CompositeException pairwise!(f, res, xm, ym, skipmissing=:pairwise)
+                #@test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:listwise)
+                @test_throws CompositeException pairwise!(f, res, xm, ym, skipmissing=:listwise)
+            end
         end
 
         for sm in (:pairwise, :listwise)
@@ -162,12 +212,11 @@ arbitrary_fun(x, y) = cor(x, y)
     @testset "iterators" begin
         x = (v for v in [rand(10) for _ in 1:4])
         y = (v for v in [rand(10) for _ in 1:4])
-
         res = @inferred pairwise(f, x, y)
+
         res2 = zeros(size(res))
         @test pairwise!(f, res2, x, y) === res2
         @test res == res2 == pairwise(f, collect(x), collect(y))
-
         res = @inferred(pairwise(f, x))
         res2 = zeros(size(res))
         @test pairwise!(f, res2, x) === res2
@@ -179,13 +228,13 @@ arbitrary_fun(x, y) = cor(x, y)
         y = (Iterators.drop(v, 1) for v in [rand(10) for _ in 1:4])
 
         @test pairwise((x, y) -> f(collect(x), collect(y)), x, y) ==
-            [f(collect(xi), collect(yi)) for xi in x, yi in y]
+              [f(collect(xi), collect(yi)) for xi in x, yi in y]
         @test pairwise((x, y) -> f(collect(x), collect(y)), x) ==
-            [f(collect(xi1), collect(xi2)) for xi1 in x, xi2 in x]
+              [f(collect(xi1), collect(xi2)) for xi1 in x, xi2 in x]
         @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y,
-                                            skipmissing=:pairwise)
+            skipmissing=:pairwise)
         @test_throws ArgumentError pairwise((x, y) -> f(collect(x), collect(y)), x, y,
-                                            skipmissing=:listwise)
+            skipmissing=:listwise)
     end
 
     @testset "two-argument method" begin
@@ -201,8 +250,8 @@ arbitrary_fun(x, y) = cor(x, y)
         y = [rand(10) for _ in 1:4]
 
         @test pairwise(f, x, x, symmetric=true) ==
-            pairwise(f, x, symmetric=true) ==
-            Symmetric(pairwise(f, x, x), :U)
+              pairwise(f, x, symmetric=true) ==
+              Symmetric(pairwise(f, x, x), :U)
 
         res = zeros(4, 4)
         res2 = zeros(4, 4)
@@ -214,47 +263,64 @@ arbitrary_fun(x, y) = cor(x, y)
         @test_throws ArgumentError pairwise!(f, res, x, y, symmetric=true)
     end
 
-    @testset "cor corner cases" begin
-        # Integer inputs must give a Float64 output
-        res = pairwise(cor, [[1, 2, 3], [1, 5, 2]])
-        @test res isa Matrix{Float64}
-        @test res == [cor(xi, yi) for xi in ([1, 2, 3], [1, 5, 2]),
-                                      yi in ([1, 2, 3], [1, 5, 2])]
-
-        # NaNs are ignored for the diagonal
-        res = pairwise(cor, [[1, 2, NaN], [1, 5, 2]])
-        @test res isa Matrix{Float64}
-        @test res ≅ [1.0 NaN
-                     NaN 1.0]
-
-        # missings are ignored for the diagonal
-        res = pairwise(cor, [[1, 2, 7], [1, 5, missing]])
-        @test res isa Matrix{Union{Float64, Missing}}
-        @test res ≅ [1.0 missing
-                     missing 1.0]
-        res = pairwise(cor, Vector{Union{Int, Missing}}[[missing, missing, missing],
-                                                        [missing, missing, missing]])
-        @test res isa Matrix{Union{Float64, Missing}}
-        @test res ≅ [1.0 missing
-                     missing 1.0]
-        if VERSION >= v"1.5"
-            # except when eltype is Missing
-            res = pairwise(cor, [[missing, missing, missing],
-                                 [missing, missing, missing]])
-            @test res isa Matrix{Missing}
-            @test res ≅ [missing missing
-                         missing missing]
-        end
+    if iscor
+        @testset "$f corner cases" begin
+            # Integer inputs must give a Float64 output
+            res = pairwise(f, [[1, 2, 3], [1, 5, 2]])
+            @test res isa Matrix{Float64}
+            if f == corspearman
+                @test isapprox(res, [f(xi, yi) for xi in ([1, 2, 3], [1, 5, 2]),
+                                     yi in ([1, 2, 3], [1, 5, 2])])
+            else
+                @test res == [f(xi, yi) for xi in ([1, 2, 3], [1, 5, 2]),
+                              yi in ([1, 2, 3], [1, 5, 2])]
+            end
 
-        for sm in (:pairwise, :listwise)
-            res = pairwise(cor, [[1, 2, NaN, 4], [1, 5, 5, missing]], skipmissing=sm)
+            # NaNs are ignored for the diagonal
+            res = pairwise(f, [[1, 2, NaN], [1, 5, 2]])
             @test res isa Matrix{Float64}
             @test res ≅ [1.0 NaN
-                         NaN 1.0]
+                NaN 1.0]
+
+            # missings are ignored for the diagonal
+            res = pairwise(f, [[1, 2, 7], [1, 5, missing]])
+            @test res isa Matrix{Union{Float64,Missing}}
+            @test res ≅ [1.0 missing
+                missing 1.0]
+            res = pairwise(f, Vector{Union{Int,Missing}}[[missing, missing, missing],
+                [missing, missing, missing]])
+            @test res isa Matrix{Union{Float64,Missing}}
+            @test res ≅ [1.0 missing
+                missing 1.0]
+
+            #NaNs and missings are ignored for the "diagonal" of a not-square result.
+            v = [missing, 1, 2, 3]
+            @test isequal(pairwise(f, [v, v], [v, v, v]),
+                [1.0 missing missing; missing 1.0 missing])
+            w = [NaN, 1, 2, 3]
+            @test isequal(pairwise(f, [w, w], [w, w, w]), [1.0 NaN NaN; NaN 1.0 NaN])
+
             if VERSION >= v"1.5"
-                @test_throws ArgumentError pairwise(cor, [[missing, missing, missing],
-                                                          [missing, missing, missing]],
-                                                    skipmissing=sm)
+                # except when eltype is Missing
+                res = pairwise(f, [[missing, missing, missing],
+                    [missing, missing, missing]])
+                @test res isa Matrix{Missing}
+                @test res ≅ [missing missing
+                    missing missing]
+            end
+
+            for sm in (:pairwise, :listwise)
+                res = pairwise(f, [[1, 2, NaN, 4], [1, 5, 5, missing]], skipmissing=sm)
+                @test res isa Matrix{Float64}
+                @test res ≅ [1.0 NaN
+                    NaN 1.0]
+                if VERSION >= v"1.5"
+                    if f == cor
+                        @test_throws CompositeException pairwise(f, [[missing, missing, missing],
+                                [missing, missing, missing]],
+                            skipmissing=sm)
+                    end
+                end
             end
         end
     end
@@ -262,30 +328,45 @@ arbitrary_fun(x, y) = cor(x, y)
     @testset "promote_type_union" begin
         @test StatsBase.promote_type_union(Int) === Int
         @test StatsBase.promote_type_union(Real) === Real
-        @test StatsBase.promote_type_union(Union{Int, Float64}) === Float64
-        @test StatsBase.promote_type_union(Union{Int, Missing}) === Union{Int, Missing}
-        @test StatsBase.promote_type_union(Union{Int, String}) === Any
+        @test StatsBase.promote_type_union(Union{Int,Float64}) === Float64
+        @test StatsBase.promote_type_union(Union{Int,Missing}) === Union{Int,Missing}
+        @test StatsBase.promote_type_union(Union{Int,String}) === Any
         @test StatsBase.promote_type_union(Vector) === Any
         @test StatsBase.promote_type_union(Union{}) === Union{}
         if VERSION >= v"1.6.0-DEV"
-            @test StatsBase.promote_type_union(Tuple{Union{Int, Float64}}) ===
-                Tuple{Real}
+            @test StatsBase.promote_type_union(Tuple{Union{Int,Float64}}) ===
+                  Tuple{Real}
         else
-            @test StatsBase.promote_type_union(Tuple{Union{Int, Float64}}) ===
-                Any
+            @test StatsBase.promote_type_union(Tuple{Union{Int,Float64}}) ===
+                  Any
         end
     end
 
     @testset "type-unstable corner case (#771)" begin
-        v = [rand(5) for _=1:10]
+        v = [rand(5) for _ = 1:10]
         function f(v)
             pairwise(v) do x, y
                 (x[1] < 0 ? nothing :
-                    x[1] > y[1] ? 1 : 1.5,
-                 0)
+                 x[1] > y[1] ? 1 : 1.5,
+                    0)
             end
         end
         res = f(v)
-        @test res isa Matrix{Tuple{Real, Int}}
+        @test res isa Matrix{Tuple{Real,Int}}
+    end
+end
+
+@testset "pairwise against inputs with non-numeric elements" begin
+    for f in (corkendall, corspearman)
+        x = [["a", "b", "c"], ["c", "b", "a"]]
+        @test pairwise(f, x) ≈ [1.0 -1.0; -1.0 1.0]
     end
+end
+
+@testset "pairwise with vectors of unequal length" begin
+    f(x, y) = length(x) - length(y)
+    x = [randn(i) for i in 0:3]
+    @test isequal(pairwise(f, x), (0:3) .- (0:3)')
+    @test_throws ArgumentError pairwise(f, x, skipmissing=:pairwise)
+    @test_throws ArgumentError pairwise(f, x, skipmissing=:listwise)
 end
\ No newline at end of file
diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index dc0207ee1..484474d99 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -1,161 +1,372 @@
-using StatsBase
+using StatsBase, Random
 using Test
 
-X = Float64[1 0; 2 1; 3 0; 4 1; 5 10]
-Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10]
-
-x1 = X[:,1]
-x2 = X[:,2]
-y = Y[:,1]
-
-# corspearman
-
-@test corspearman(x1, y) ≈ -0.102597835208515
-@test corspearman(x2, y) ≈ -0.081110710565381
-
-@test corspearman(X, y) ≈ [-0.102597835208515, -0.081110710565381]
-@test corspearman(y, X) ≈ [-0.102597835208515 -0.081110710565381]
-
-c11 = corspearman(x1, x1)
-c12 = corspearman(x1, x2)
-c22 = corspearman(x2, x2)
-@test c11 ≈ 1.0
-@test c22 ≈ 1.0
-@test corspearman(X, X) ≈ [c11 c12; c12 c22]
-@test corspearman(X)    ≈ [c11 c12; c12 c22]
-
-@test corspearman(X, Y) ==
-     [corspearman(X[:,i], Y[:,j]) for i in axes(X, 2), j in axes(Y, 2)]
-
-# corkendall
-
-# Check error, handling of NaN, Inf etc
-@test_throws ErrorException("Vectors must have same length") corkendall([1,2,3,4], [1,2,3])
-@test isnan(corkendall([1,2], [3,NaN]))
-@test isnan(corkendall([1,1,1], [1,2,3]))
-@test corkendall([-Inf,-0.0,Inf],[1,2,3]) == 1.0
-
-# Test, with exact equality, some known results. 
-# AbstractVector{<:Real}, AbstractVector{<:Real}
-@test corkendall(x1, y) == -1/sqrt(90)
-@test corkendall(x2, y) == -1/sqrt(72)
-# AbstractMatrix{<:Real}, AbstractVector{<:Real}
-@test corkendall(X, y)  == [-1/sqrt(90), -1/sqrt(72)]
-# AbstractVector{<:Real}, AbstractMatrix{<:Real}
-@test corkendall(y, X)  == [-1/sqrt(90) -1/sqrt(72)]
-
-# n = 78_000 tests for overflow errors on 32 bit
-# Testing for overflow errors on 64bit would require n be too large for practicality
-# This also tests merge_sort! since n is (much) greater than SMALL_THRESHOLD.
-n = 78_000
-# Test with many repeats
-@test corkendall(repeat(x1, n), repeat(y, n)) ≈ -1/sqrt(90)
-@test corkendall(repeat(x2, n), repeat(y, n)) ≈ -1/sqrt(72)
-@test corkendall(repeat(X, n), repeat(y, n))  ≈ [-1/sqrt(90), -1/sqrt(72)]
-@test corkendall(repeat(y, n), repeat(X, n))  ≈ [-1/sqrt(90) -1/sqrt(72)]
-@test corkendall(repeat([0,1,1,0], n), repeat([1,0,1,0], n)) == 0.0
-
-# Test with no repeats, note testing for exact equality
-@test corkendall(collect(1:n), collect(1:n))          == 1.0
-@test corkendall(collect(1:n), reverse(collect(1:n))) == -1.0
-
-# All elements identical should yield NaN
-@test isnan(corkendall(repeat([1], n), collect(1:n)))
-
-c11 = corkendall(x1, x1)
-c12 = corkendall(x1, x2)
-c22 = corkendall(x2, x2)
-
-# AbstractMatrix{<:Real}, AbstractMatrix{<:Real}
-@test corkendall(X, X) ≈ [c11 c12; c12 c22]
-# AbstractMatrix{<:Real}
-@test corkendall(X)    ≈ [c11 c12; c12 c22]
-
-@test c11 == 1.0
-@test c22 == 1.0
-@test c12 == 3/sqrt(20)
-
-# Finished testing for overflow, so redefine n for speedier tests
-n = 100
-
-@test corkendall(repeat(X, n), repeat(X, n)) ≈ [c11 c12; c12 c22]
-@test corkendall(repeat(X, n))               ≈ [c11 c12; c12 c22]
-
-# All eight three-element permutations
-z = [1  1  1;
-     1  1  2;
-     1  2  2;
-     1  2  2;
-     1  2  1;
-     2  1  2;
-     1  1  2;
-     2  2  2]
-
-@test corkendall(z)         == [1 0 1/3; 0 1 0; 1/3 0 1]
-@test corkendall(z, z)      == [1 0 1/3; 0 1 0; 1/3 0 1]
-@test corkendall(z[:,1], z) == [1 0 1/3]
-@test corkendall(z, z[:,1]) == [1; 0; 1/3]
-
-z = float(z)
-@test corkendall(z)         == [1 0 1/3; 0 1 0; 1/3 0 1]
-@test corkendall(z, z)      == [1 0 1/3; 0 1 0; 1/3 0 1]
-@test corkendall(z[:,1], z) == [1 0 1/3]
-@test corkendall(z, z[:,1]) == [1; 0; 1/3]
-
-w = repeat(z, n)
-@test corkendall(w)         == [1 0 1/3; 0 1 0; 1/3 0 1]
-@test corkendall(w, w)      == [1 0 1/3; 0 1 0; 1/3 0 1]
-@test corkendall(w[:,1], w) == [1 0 1/3]
-@test corkendall(w, w[:,1]) == [1; 0; 1/3]
-
-StatsBase.midpoint(1,10)        == 5
-StatsBase.midpoint(1,widen(10)) == 5
-
-
-# NaN handling
-
-Xnan = copy(X)
-Xnan[1,1] = NaN
-Ynan = copy(Y)
-Ynan[2,1] = NaN
-
-for f in (corspearman, corkendall)
-     @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4]))
-     @test all(isnan, f([1.0, NaN], [1 2; 3 4]))
-     @test all(isnan, f([1 2; 3 4], [1.0, NaN]))
-     @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1])
-     @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4]))
-     @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4]))
-
-     @test isequal(f(Xnan, Ynan),
-                   [f(Xnan[:,i], Ynan[:,j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)])
-     @test isequal(f(Xnan),
-                   [i == j ? 1.0 : f(Xnan[:,i], Xnan[:,j])
-                    for i in axes(Xnan, 2), j in axes(Xnan, 2)])
-     for k in 1:2
-          @test isequal(f(Xnan[:,k], Ynan),
-                        [f(Xnan[:,k], Ynan[:,j]) for i in 1:1, j in axes(Ynan, 2)])
-          # TODO: fix corkendall (PR#659)
-          if f === corspearman
-               @test isequal(f(Xnan, Ynan[:,k]),
-                             [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2), j in 1:1])
-          else
-               @test isequal(f(Xnan, Ynan[:,k]),
-                             [f(Xnan[:,i], Ynan[:,k]) for i in axes(Xnan, 2)])
-          end
-     end
+@testset "rank_correlation_auxiliary_fns" begin
+
+    #Auxiliary functions for corkendall
+    n = 100
+    x = [1, 2, 3, missing, 4]
+    y = [missing, 1, 2, 3, 4]
+    u = [missing, missing, 1, 2]
+    v = [3, 4, missing, missing]
+
+    mx = [1 2
+        missing 3
+        4 missing
+        missing missing
+        5 6]
+
+    @test StatsBase.handle_pairwise(x, y) == ([2, 3, 4], [1, 2, 4])
+    @test StatsBase.handle_pairwise(float.(x), y) == ([2.0, 3.0, 4.0], [1, 2, 4])
+    @test StatsBase.handle_pairwise(x, float.(y)) == ([2, 3, 4], [1.0, 2.0, 4.0])
+    @test StatsBase.handle_pairwise(u, v) == (Int64[], Int64[])
+
+    v = collect(100:-1:1)
+    StatsBase.insertion_sort!(v, 1, n)
+    @test v == 1:n
+
+    v = collect(1000:-1:1)
+    StatsBase.merge_sort!(v, 1, 1000)
+    @test v == 1:1000
+
+    @test StatsBase.midpoint(1, 10) == 5
+    @test StatsBase.midpoint(1, widen(10)) == 5
+
+    @test StatsBase.equal_sum_subsets(0, 1) == Vector{Int64}[]
+    @test sum.(StatsBase.equal_sum_subsets(100, 5)) == repeat([1010], 5)
+    @test sort(vcat(StatsBase.equal_sum_subsets(500, 7)...)) == collect(1:500)
+
 end
 
+#=
+Edge cases (to refer to when writing tests)
+===========================================
+In the symmetric case (x===y) on-diagonal terms should be 1.0 apart from the special case of
+eltype(x) === Missing && skipmissing == :none, in which case the on-diagonal terms should be
+missing.
+Otherwise, if x and y are equal-length vectors with length(x)<2
+f(x,y) == NaN
+otherwise if x and y are equal-length vectors and either contains missing
+f(x,y) == missing
+otherwise if x and y are equal-length vectors and either contains NaN
+f(x,y) == NaN
+otherwise
+f(x,y) = the required Kendall or Spearman correlation.
+
+This behaviour is close to but not quite identical to Statistics.cor. e.g.:
+
+julia> cor(fill(missing,3,2))
+2×2 Matrix{Missing}:
+ missing  missing
+ missing  missing
+
+julia> corkendall(fill(missing,3,2)) #SAME behavior as cor
+2×2 Matrix{Missing}:
+ missing  missing
+ missing  missing
+
+julia> cor(Matrix{Union{Missing,Float64}}(missing,5,3))
+3×3 Matrix{Missing}:
+ missing  missing  missing
+ missing  missing  missing
+ missing  missing  missing
+
+julia> corkendall(Matrix{Union{Missing,Float64}}(missing,5,3)) #DIFFERENT behaviour from cor
+3×3 Matrix{Union{Missing, Float64}}:
+ 1.0        missing   missing
+  missing  1.0        missing
+  missing   missing  1.0
+=#
+
+@testset "$f" for f in (corkendall, corspearman)
+
+    n = 100
+    big_n = 78_000
+    X = Float64[1 0; 2 1; 3 0; 4 1; 5 10]
+    Y = Float64[5 5 6; 3 4 1; 4 0 4; 2 6 1; 5 7 10]
+    Xm = [1 0; missing 1; 2 1; 3 0; 4 1; 5 10]
+    Ym = [5 5 6; 1 2 3; 3 4 1; 4 0 4; 2 6 1; 5 7 10]
+    xm = [missing, missing, missing, missing, missing]
+    xmm = hcat(xm, xm)
+    a = [5, 2, 3, 4, 1]
+    b = [1, 4, 2, 3, 5]
+
+    x1 = X[:, 1]
+    x2 = X[:, 2]
+    y1 = Y[:, 1]
+
+    c11 = f(x1, x1)
+    c12 = f(x1, x2)
+    c22 = f(x2, x2)
+
+    # Test some known results
+    if f == corkendall
+        # Vector, Vector
+        @test f(x1, y1) == -1 / sqrt(90)
+        @test f(x2, y1) == -1 / sqrt(72)
+        # Matrix, Vector
+        @test f(X, y1) == [-1 / sqrt(90), -1 / sqrt(72)]
+        # Vector, Matrix
+        @test f(y1, X) == [-1 / sqrt(90) -1 / sqrt(72)]
+
+        # big_n = 78_000 tests for overflow errors on 32 bit
+        # Testing for overflow errors on 64 bit is not practical
+        # This also tests merge_sort! since big_n is (much) greater than SMALL_THRESHOLD.
+        # Test with many repeats
+        @test f(repeat(x1, big_n), repeat(y1, big_n)) ≈ -1 / sqrt(90)
+        @test f(repeat(x2, big_n), repeat(y1, big_n)) ≈ -1 / sqrt(72)
+        @test f(repeat(X, big_n), repeat(y1, big_n)) ≈ [-1 / sqrt(90), -1 / sqrt(72)]
+        @test f(repeat(y1, big_n), repeat(X, big_n)) ≈ [-1 / sqrt(90) -1 / sqrt(72)]
+    elseif f == corspearman
+        @test corspearman(x1, y1) ≈ -1 / sqrt(95)
+        @test corspearman(repeat(x1, 1000), repeat(y1, 1000)) ≈ -1 / sqrt(95)
+        @test corspearman(vcat(missing, x1, missing), vcat(missing, y1, missing), skipmissing=:pairwise) ≈ -1 / sqrt(95)
+        @test corspearman(x2, y1) ≈ -3 / sqrt(1368)
+        @test corspearman(X, y1) ≈ [-1 / sqrt(95), -3 / sqrt(1368)]
+        @test corspearman(y1, X) ≈ [-1 / sqrt(95) -3 / sqrt(1368)]
+    end
+
+    # Matrix, Matrix
+    @test f(X, X) ≈ [c11 c12; c12 c22]
+    # Matrix
+    @test f(X) ≈ [c11 c12; c12 c22]
+
+    @test c11 == 1.0
+    @test c22 == 1.0
+    if f == corkendall
+        @test c12 == 3 / sqrt(20)
+    elseif f == corspearman
+        @test c12 == 7 / sqrt(90)
+    end
+    @test f(repeat(X, n), repeat(X, n)) ≈ [c11 c12; c12 c22]
+    @test f(repeat(X, n)) ≈ [c11 c12; c12 c22]
+    @test f(X, Y) ≈
+          [f(X[:, i], Y[:, j]) for i in axes(X, 2), j in axes(Y, 2)]
+
+    @test f(vcat(missing, a), vcat(missing, b), skipmissing=:pairwise) == f(a, b)
+    @test f(vcat(a, missing), vcat(missing, b), skipmissing=:pairwise) ==
+          f(a[2:end], b[1:(end-1)])
+    @test f(hcat(vcat(a, missing), vcat(missing, b)), skipmissing=:listwise) ==
+          f(hcat(a[2:end], b[1:(end-1)]))
+    @test f(Xm, Xm, skipmissing=:pairwise) == f(X, X)
+    @test f(Xm, Xm, skipmissing=:listwise) == f(X, X)
+    @test f(Xm, Ym, skipmissing=:listwise) == f(X, Y)
+    if f == corkendall
+        @test f(Xm, Ym, skipmissing=:pairwise) ≈ [-1/√90 0.4 1/√90; -2/√154 7/√165 -1/√154]
+    end
+    @test isnan(f([1, 2, 3, 4, 5], xm, skipmissing=:pairwise))
+    @test isnan(f(xm, [1, 2, 3, 4, 5], skipmissing=:pairwise))
+    @test isequal(f(xmm, skipmissing=:pairwise), [1.0 NaN; NaN 1.0])
+    @test isequal(f(xmm, skipmissing=:none), [missing missing; missing missing])
+    @test isequal(f(xmm, xmm, skipmissing=:none), [missing missing; missing missing])
+    @test isequal(f(xmm, copy(xmm), skipmissing=:none),
+        [missing missing; missing missing])
+    @test isequal(f(xmm, xmm, skipmissing=:listwise), [1.0 NaN; NaN 1.0])
+    @test isequal(f(xmm, copy(xmm), skipmissing=:listwise), [NaN NaN; NaN NaN])
+    @test isequal(f(xmm, copy(xmm), skipmissing=:pairwise), [NaN NaN; NaN NaN])
+    @test ismissing(f([1, 2, 3, 4, 5], xm, skipmissing=:none))
+    @test ismissing(f([1, 2, 3, 4, 5], xm, skipmissing=:none))
+    @test isequal(f(xmm, skipmissing=:none), [missing missing; missing missing])
+    @test isequal(f(xmm, copy(xmm), skipmissing=:none),
+        [missing missing; missing missing])
+    @test isequal(f(hcat(Y, xm), skipmissing=:none), vcat(hcat(f(Y, skipmissing=:none),
+            [missing, missing, missing]), [missing missing missing 1.0]))
+    @test_throws ArgumentError f([1, 2, 3, 4], [4, 3, 2, 1], skipmissing=:listwise)
+
+    # All elements identical should yield NaN
+    @test isnan(f(repeat([1], n), collect(1:n)))
+    @test f(repeat([0, 1, 1, 0], n), repeat([1, 0, 1, 0], n)) == 0.0
+
+    # Test with no repeats
+    @test f(collect(1:n), collect(1:n)) == 1.0
+    @test f(collect(1:n), reverse(collect(1:n))) == -1.0
+
+    # Inf and -Inf work ok
+    @test f([-Inf, -0.0, Inf], [1, 2, 3]) == 1.0
+
+    #Interaction of NaN and missing with skipmissing argument
+    nan_and_missing = hcat(fill(NaN, 10), fill(missing, 10),
+        vcat(fill(NaN, 5), fill(missing, 5)))
+    @test isequal(f(nan_and_missing, skipmissing=:none),
+        [1.0 missing missing; missing 1.0 missing; missing missing 1.0])
+    @test isequal(f(nan_and_missing, copy(nan_and_missing), skipmissing=:none),
+        [NaN missing missing; missing missing missing; missing missing missing])
+    @test isequal(f(nan_and_missing, skipmissing=:pairwise),
+        [1.0 NaN NaN; NaN 1.0 NaN; NaN NaN 1.0])
+    @test isequal(f(nan_and_missing, copy(nan_and_missing), skipmissing=:pairwise),
+        fill(NaN, 3, 3))
+    @test isequal(f(nan_and_missing, skipmissing=:listwise),
+        [1.0 NaN NaN; NaN 1.0 NaN; NaN NaN 1.0])
+    @test isequal(f(nan_and_missing, copy(nan_and_missing), skipmissing=:listwise),
+        fill(NaN, 3, 3))
+
+    #Reject nonsense skipmissing argument
+    @test_throws ArgumentError f(X; skipmissing=:foo)
+    @test_throws ArgumentError f(Xm; skipmissing=:foo)
+
+    # Inputs have fewer than 2 rows
+    @test isequal(f([], []), NaN)
+    @test isequal(f(fill(1, 0, 2), fill(1, 0, 2)), [NaN NaN; NaN NaN])
+    @test isequal(f(fill(1, 0, 2)), [1.0 NaN; NaN 1.0])
+    @test isequal(f([1;;], [1;;]), [NaN;;])
+    @test isequal(f([1;;]), [1.0;;])
+    @test isequal(f([missing], [missing]), NaN)
+    @test isequal(f([1], [1]), NaN)
+    @test isequal(f([NaN], [NaN]), NaN)
+    @test isequal(f([1], [1]), NaN)
+    @test isequal(f([]), 1.0)
+    @test isequal(f([1]), 1.0)
+    @test isequal(f([NaN]), 1.0)
+    @test isequal(f([missing]), missing)
+    @test isequal(f([missing]), missing)
+    @test isequal(f([missing], [missing missing]), [NaN NaN])
+    @test isequal(f([missing missing]), [missing NaN; NaN missing])
+    @test isequal(f([missing missing], [missing missing]), [NaN NaN; NaN NaN])
+
+    # Exercise catch block in method _cor (when f === corspearman)
+    @test isequal(f(vcat([1 2 3], fill(missing, 2, 3))),
+        [1.0 missing missing; missing 1.0 missing; missing missing 1.0])
+    # Exercise "correction" of on-diagonal terms in method
+    # _pairwise!(::Val{:none}, f::typeof(corspearman),...
+    @test isequal(f([missing missing; 1 1; 1 1]), [1.0 missing; missing 1.0])
+
+    # Works for not-numbers
+    @test isequal(f(["a", "b", "c"], ["a", "b", "c"]), 1.0)
+    @test isequal(f(["a", "b", "c"], ["c", "b", "a"]), -1.0)
+    @test (f(["a" "z"; "b" "y"; "c" "x"]) ≈ [1.0 -1.0; -1.0 1.0])
+    @test (f(["a" 3; "b" 2; "c" 1]) ≈ [1.0 -1.0; -1.0 1.0])
+
+    #Works for zero size input
+    @test isequal(f([;;]), [;;])
+    @test isequal(f([;;], [;;]), [;;])
+    @test isequal(f([;;], [;;], skipmissing=:pairwise), [;;])
+    @test isequal(f([;;], [;;], skipmissing=:listwise), [;;])
+
+    # Wrong dimensions
+    @test_throws DimensionMismatch f([1], [1, 2])
+    @test_throws DimensionMismatch f([1], [1 2; 3 4])
+    @test_throws DimensionMismatch f([1 2; 3 4], [1])
+    @test_throws DimensionMismatch f([1 2; 3 4; 4 6], [1 2; 3 4])
+
+    # All eight three-element permutations, for these cases corspearman and corkendall
+    # have the same return values
+    z = [1 1 1;
+        1 1 2;
+        1 2 2;
+        1 2 2;
+        1 2 1;
+        2 1 2;
+        1 1 2;
+        2 2 2]
+
+    @test f(z) ≈ [1 0 1/3; 0 1 0; 1/3 0 1]
+    @test f(z, z) ≈ [1 0 1/3; 0 1 0; 1/3 0 1]
+    @test f(z[:, 1], z) ≈ [1 0 1 / 3]
+    @test f(z, z[:, 1]) ≈ [1; 0; 1 / 3]
+
+    z = float(z)
+    @test f(z) ≈ [1 0 1/3; 0 1 0; 1/3 0 1]
+    @test f(z, z) ≈ [1 0 1/3; 0 1 0; 1/3 0 1]
+    @test f(z[:, 1], z) ≈ [1 0 1 / 3]
+    @test f(z, z[:, 1]) ≈ [1; 0; 1 / 3]
+
+    w = repeat(z, n)
+    @test f(w) ≈ [1 0 1/3; 0 1 0; 1/3 0 1]
+    @test f(w, w) ≈ [1 0 1/3; 0 1 0; 1/3 0 1]
+    @test f(w[:, 1], w) ≈ [1 0 1 / 3]
+    @test f(w, w[:, 1]) ≈ [1; 0; 1 / 3]
+
+    # NaN handling
+    Xnan = copy(X)
+    Xnan[1, 1] = NaN
+    Ynan = copy(Y)
+    Ynan[2, 1] = NaN
+
+    @test isnan(f([1.0, NaN, 2.0], [2.0, 1.0, 3.4]))
+    @test all(isnan, f([1.0, NaN], [1 2; 3 4]))
+    @test all(isnan, f([1 2; 3 4], [1.0, NaN]))
+    @test isequal(f([1 NaN; NaN 4]), [1 NaN; NaN 1])
+    @test all(isnan, f([1 NaN; NaN 4], [1 NaN; NaN 4]))
+    @test all(isnan, f([1 NaN; NaN 4], [NaN 1; NaN 4]))
+
+    @test isequal(f(Xnan, Ynan),
+        [f(Xnan[:, i], Ynan[:, j]) for i in axes(Xnan, 2), j in axes(Ynan, 2)])
+    @test isequal(f(Xnan),
+        [i == j ? 1.0 : f(Xnan[:, i], Xnan[:, j])
+         for i in axes(Xnan, 2), j in axes(Xnan, 2)])
+    for k in 1:2
+        @test isequal(f(Xnan[:, k], Ynan),
+            [f(Xnan[:, k], Ynan[:, j]) for i in 1:1, j in axes(Ynan, 2)])
+        # TODO: fix corkendall (PR#659)
+        if f === corspearman
+            @test isequal(f(Xnan, Ynan[:, k]),
+                [f(Xnan[:, i], Ynan[:, k]) for i in axes(Xnan, 2), j in 1:1])
+        else
+            @test isequal(f(Xnan, Ynan[:, k]),
+                [f(Xnan[:, i], Ynan[:, k]) for i in axes(Xnan, 2)])
+        end
+    end
+
+end
 
-# Wrong dimensions
+# Don't artificially boost coverage stats when checking for mutation and allocation size
+# COV_EXCL_START
+@testset "Check no mutation in $f" for f in (corkendall, corspearman)
+
+    nr = 50
+    nc = 5
+    cutoff = min(0.1, 10 / (nr * nc))
+    X = randn(nr, nc)
+    x = randn(nr)
+    Xm = ifelse.(rand(nr, nc) .< cutoff, missing, randn(nr, nc))
+    xm = ifelse.(rand(nr) .< cutoff, missing, randn(nr))
+    Y = randn(nr, nc)
+    y = randn(nr)
+    Ym = ifelse.(rand(nr, nc) .< cutoff, missing, randn(nr, nc))
+    ym = ifelse.(rand(nr) .< cutoff, missing, randn(nr))
+
+    for arg1 in (X, x, Xm, xm)
+        for arg2 in (Y, y, Ym, ym)
+            for skipmissing in (:pairwise, :none, :listwise)
+                for f in (corkendall, corspearman)
+                    if !((arg1 isa Vector) && (arg2 isa Vector) && skipmissing == :listwise)
+                        copy1 = copy(arg1)
+                        copy2 = copy(arg2)
+                        res = f(arg1, arg2; skipmissing)
+                        @test isequal(arg1, copy1)
+                        @test isequal(arg2, copy2)
+                    end
+                end
+            end
+        end
+    end
+end
 
-@test_throws DimensionMismatch corspearman([1], [1, 2])
-@test_throws DimensionMismatch corspearman([1], [1 2; 3 4])
-@test_throws DimensionMismatch corspearman([1 2; 3 4], [1])
-@test_throws ArgumentError corspearman([1 2; 3 4; 4 6], [1 2; 3 4])
+@testset "corkendall and corspearman allocations" begin
+
+    Random.seed!(1)
+    x = rand(1000, 10)
+    xm = ifelse.(x .< 0.1, missing, x)
+    #compile
+    corkendall(x)
+    corkendall(xm, skipmissing=:listwise)
+    corkendall(xm, skipmissing=:pairwise)
+    corspearman(x)
+    corspearman(xm, skipmissing=:listwise)
+    corspearman(xm, skipmissing=:pairwise)
+    x = rand(1000, 100)
+    xm = ifelse.(x .< 0.01, missing, x)
+
+    #=When executing code such as corkendall(x) allocations are approximately affine in the
+    number of threads, thanks to use of task-local storage. The tests below have a "safety
+    factor" of 1.2 against the expected size of allocations.
+    =#
+    @test (@allocated corkendall(x)) < (897_808 + Threads.nthreads() * 58_044) * 1.2
+    @test (@allocated corkendall(xm,skipmissing=:listwise)) < (1_119_392 + Threads.nthreads() * 22_172) * 1.2
+    @test (@allocated corkendall(xm,skipmissing=:pairwise)) < (892_112 + Threads.nthreads() * 61_116) * 1.2
+    @test (@allocated corspearman(x)) < (2_678_448 + Threads.nthreads() * 9_128) * 1.2
+    @test (@allocated corspearman(xm,skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
+    @test (@allocated corspearman(xm,skipmissing=:pairwise)) < (1_692_208 + Threads.nthreads() * 67_172) * 1.2
 
-# TODO: fix corkendall to match corspearman (PR#659)
-@test_throws ErrorException corkendall([1], [1, 2])
-@test_throws ErrorException corkendall([1], [1 2; 3 4])
-@test_throws ErrorException corkendall([1 2; 3 4], [1])
-@test_throws ErrorException corkendall([1 2; 3 4; 4 6], [1 2; 3 4])
+end
+# COV_EXCL_STOP
\ No newline at end of file
diff --git a/test/rankcorrspeedtest.jl b/test/rankcorrspeedtest.jl
new file mode 100644
index 000000000..1657dd29f
--- /dev/null
+++ b/test/rankcorrspeedtest.jl
@@ -0,0 +1,98 @@
+using Random, StatsBase, Dates, BenchmarkTools
+
+using Random;
+x = rand(MersenneTwister(0), 1000, 10);
+xm = ifelse.(x .< 0.05, missing, x);
+
+#compile...
+res_1 = corkendall(x)
+res_2 = corkendall(xm; skipmissing=:pairwise)
+res_3 = pairwise(corkendall, eachcol(xm); skipmissing=:pairwise)
+res_4 = corkendall(xm; skipmissing=:listwise)
+res_5 = pairwise(corkendall, eachcol(xm); skipmissing=:listwise)
+res_6 = corkendall(xm; skipmissing=:none)
+res_7 = pairwise(corkendall, eachcol(xm), skipmissing=:none)
+res_8 = corspearman(x)
+res_9 = corspearman(xm; skipmissing=:pairwise)
+res_10 = pairwise(corspearman, eachcol(xm); skipmissing=:pairwise)
+res_11 = corspearman(xm; skipmissing=:listwise)
+res_12 = pairwise(corspearman, eachcol(xm); skipmissing=:listwise)
+res_13 = corspearman(xm; skipmissing=:none)
+res_14 = pairwise(corspearman, eachcol(xm), skipmissing=:none)
+
+@assert res_2 == res_3
+@assert res_4 == res_5
+@assert res_9 == res_10
+@assert res_11 ≈ res_12
+
+x = rand(MersenneTwister(0), 1000, 1000);
+xm = ifelse.(x .< 0.05, missing, x);
+
+println("="^130)
+@show(Dates.now())
+@show ENV["COMPUTERNAME"]
+println("Julia Version $VERSION")
+@show Threads.nthreads()
+@show size(x)
+@show typeof(x)
+@show size(xm)
+@show typeof(xm)
+
+print("corkendall(\$x)                                           ")
+@btime res_1 = corkendall($x);
+print("corkendall(\$xm; skipmissing=:pairwise)                   ")
+@btime res_2 = corkendall($xm; skipmissing=:pairwise);
+print("pairwise(corkendall,eachcol(\$xm); skipmissing=:pairwise) ")
+@btime res_3 = pairwise(corkendall, eachcol($xm); skipmissing=:pairwise);
+print("corkendall(\$xm; skipmissing=:listwise)                   ")
+@btime res_4 = corkendall($xm; skipmissing=:listwise);
+print("pairwise(corkendall,eachcol(\$xm); skipmissing=:listwise) ")
+@btime res_5 = pairwise(corkendall, eachcol($xm); skipmissing=:listwise);
+print("corkendall(\$xm; skipmissing=:none)                       ")
+@btime res_6 = corkendall($xm; skipmissing=:none);
+print("pairwise(corkendall,eachcol(\$xm),skipmissing=:none)      ")
+@btime res_7 = pairwise(corkendall, eachcol($xm), skipmissing=:none);
+print("corspearman(\$x)                                          ")
+@btime res_8 = corspearman($x);
+print("corspearman(\$xm; skipmissing=:pairwise)                  ")
+@btime res_9 = corspearman($xm; skipmissing=:pairwise);
+print("pairwise(corspearman,eachcol(\$xm); skipmissing=:pairwise)")
+@btime res_10 = pairwise(corspearman, eachcol($xm); skipmissing=:pairwise);
+print("corspearman(\$xm; skipmissing=:listwise)                  ")
+@btime res_11 = corspearman($xm; skipmissing=:listwise);
+print("pairwise(corspearman,eachcol(\$xm); skipmissing=:listwise)")
+@btime res_12 = pairwise(corspearman, eachcol($xm); skipmissing=:listwise);
+print("corspearman(\$xm; skipmissing=:none)                      ")
+@btime res_13 = corspearman($xm; skipmissing=:none);
+print("pairwise(corspearman,eachcol(\$xm),skipmissing=:none)     ")
+@btime res_14 = pairwise(corspearman, eachcol($xm), skipmissing=:none);
+
+println("="^130)
+
+#=
+==================================================================================================================================
+Dates.now() = DateTime("2024-03-30T10:16:29.303")
+ENV["COMPUTERNAME"] = "PHILIP-LAPTOP"
+Julia Version 1.10.2
+Threads.nthreads() = 8
+size(x) = (1000, 1000)
+typeof(x) = Matrix{Float64}
+size(xm) = (1000, 1000)
+typeof(xm) = Matrix{Union{Missing, Float64}}
+corkendall($x)                                             5.758 s (1193 allocations: 15.84 MiB)
+corkendall($xm; skipmissing=:pairwise)                     5.250 s (1193 allocations: 15.51 MiB)
+pairwise(corkendall,eachcol($xm); skipmissing=:pairwise)   5.541 s (1161 allocations: 15.51 MiB)
+corkendall($xm; skipmissing=:listwise)                     9.694 ms (2194 allocations: 11.82 MiB)
+pairwise(corkendall,eachcol($xm); skipmissing=:listwise)   9.863 ms (2162 allocations: 11.82 MiB)
+corkendall($xm; skipmissing=:none)                         258.379 ms (1204 allocations: 16.47 MiB)
+pairwise(corkendall,eachcol($xm),skipmissing=:none)        238.871 ms (1202 allocations: 16.47 MiB)
+corspearman($x)                                            35.905 ms (1127 allocations: 39.31 MiB)
+corspearman($xm; skipmissing=:pairwise)                    1.971 s (1260 allocations: 23.19 MiB)
+pairwise(corspearman,eachcol($xm); skipmissing=:pairwise)  1.706 s (1228 allocations: 23.18 MiB)
+corspearman($xm; skipmissing=:listwise)                    4.466 ms (2113 allocations: 19.43 MiB)
+pairwise(corspearman,eachcol($xm); skipmissing=:listwise)  4.432 ms (2081 allocations: 19.43 MiB)
+corspearman($xm; skipmissing=:none)                        20.017 ms (1625 allocations: 17.27 MiB)
+pairwise(corspearman,eachcol($xm),skipmissing=:none)       20.085 ms (1623 allocations: 17.27 MiB)
+==================================================================================================================================
+
+=#
\ No newline at end of file

From 8ced336285531c493ce318b2993f82dc25aa2240 Mon Sep 17 00:00:00 2001
From: PGS62 <philip.swannell@gmail.com>
Date: Sat, 30 Mar 2024 11:25:09 +0000
Subject: [PATCH 02/24] Delete rankcorrspeedtest.jl

---
 test/rankcorrspeedtest.jl | 98 ---------------------------------------
 1 file changed, 98 deletions(-)
 delete mode 100644 test/rankcorrspeedtest.jl

diff --git a/test/rankcorrspeedtest.jl b/test/rankcorrspeedtest.jl
deleted file mode 100644
index 1657dd29f..000000000
--- a/test/rankcorrspeedtest.jl
+++ /dev/null
@@ -1,98 +0,0 @@
-using Random, StatsBase, Dates, BenchmarkTools
-
-using Random;
-x = rand(MersenneTwister(0), 1000, 10);
-xm = ifelse.(x .< 0.05, missing, x);
-
-#compile...
-res_1 = corkendall(x)
-res_2 = corkendall(xm; skipmissing=:pairwise)
-res_3 = pairwise(corkendall, eachcol(xm); skipmissing=:pairwise)
-res_4 = corkendall(xm; skipmissing=:listwise)
-res_5 = pairwise(corkendall, eachcol(xm); skipmissing=:listwise)
-res_6 = corkendall(xm; skipmissing=:none)
-res_7 = pairwise(corkendall, eachcol(xm), skipmissing=:none)
-res_8 = corspearman(x)
-res_9 = corspearman(xm; skipmissing=:pairwise)
-res_10 = pairwise(corspearman, eachcol(xm); skipmissing=:pairwise)
-res_11 = corspearman(xm; skipmissing=:listwise)
-res_12 = pairwise(corspearman, eachcol(xm); skipmissing=:listwise)
-res_13 = corspearman(xm; skipmissing=:none)
-res_14 = pairwise(corspearman, eachcol(xm), skipmissing=:none)
-
-@assert res_2 == res_3
-@assert res_4 == res_5
-@assert res_9 == res_10
-@assert res_11 ≈ res_12
-
-x = rand(MersenneTwister(0), 1000, 1000);
-xm = ifelse.(x .< 0.05, missing, x);
-
-println("="^130)
-@show(Dates.now())
-@show ENV["COMPUTERNAME"]
-println("Julia Version $VERSION")
-@show Threads.nthreads()
-@show size(x)
-@show typeof(x)
-@show size(xm)
-@show typeof(xm)
-
-print("corkendall(\$x)                                           ")
-@btime res_1 = corkendall($x);
-print("corkendall(\$xm; skipmissing=:pairwise)                   ")
-@btime res_2 = corkendall($xm; skipmissing=:pairwise);
-print("pairwise(corkendall,eachcol(\$xm); skipmissing=:pairwise) ")
-@btime res_3 = pairwise(corkendall, eachcol($xm); skipmissing=:pairwise);
-print("corkendall(\$xm; skipmissing=:listwise)                   ")
-@btime res_4 = corkendall($xm; skipmissing=:listwise);
-print("pairwise(corkendall,eachcol(\$xm); skipmissing=:listwise) ")
-@btime res_5 = pairwise(corkendall, eachcol($xm); skipmissing=:listwise);
-print("corkendall(\$xm; skipmissing=:none)                       ")
-@btime res_6 = corkendall($xm; skipmissing=:none);
-print("pairwise(corkendall,eachcol(\$xm),skipmissing=:none)      ")
-@btime res_7 = pairwise(corkendall, eachcol($xm), skipmissing=:none);
-print("corspearman(\$x)                                          ")
-@btime res_8 = corspearman($x);
-print("corspearman(\$xm; skipmissing=:pairwise)                  ")
-@btime res_9 = corspearman($xm; skipmissing=:pairwise);
-print("pairwise(corspearman,eachcol(\$xm); skipmissing=:pairwise)")
-@btime res_10 = pairwise(corspearman, eachcol($xm); skipmissing=:pairwise);
-print("corspearman(\$xm; skipmissing=:listwise)                  ")
-@btime res_11 = corspearman($xm; skipmissing=:listwise);
-print("pairwise(corspearman,eachcol(\$xm); skipmissing=:listwise)")
-@btime res_12 = pairwise(corspearman, eachcol($xm); skipmissing=:listwise);
-print("corspearman(\$xm; skipmissing=:none)                      ")
-@btime res_13 = corspearman($xm; skipmissing=:none);
-print("pairwise(corspearman,eachcol(\$xm),skipmissing=:none)     ")
-@btime res_14 = pairwise(corspearman, eachcol($xm), skipmissing=:none);
-
-println("="^130)
-
-#=
-==================================================================================================================================
-Dates.now() = DateTime("2024-03-30T10:16:29.303")
-ENV["COMPUTERNAME"] = "PHILIP-LAPTOP"
-Julia Version 1.10.2
-Threads.nthreads() = 8
-size(x) = (1000, 1000)
-typeof(x) = Matrix{Float64}
-size(xm) = (1000, 1000)
-typeof(xm) = Matrix{Union{Missing, Float64}}
-corkendall($x)                                             5.758 s (1193 allocations: 15.84 MiB)
-corkendall($xm; skipmissing=:pairwise)                     5.250 s (1193 allocations: 15.51 MiB)
-pairwise(corkendall,eachcol($xm); skipmissing=:pairwise)   5.541 s (1161 allocations: 15.51 MiB)
-corkendall($xm; skipmissing=:listwise)                     9.694 ms (2194 allocations: 11.82 MiB)
-pairwise(corkendall,eachcol($xm); skipmissing=:listwise)   9.863 ms (2162 allocations: 11.82 MiB)
-corkendall($xm; skipmissing=:none)                         258.379 ms (1204 allocations: 16.47 MiB)
-pairwise(corkendall,eachcol($xm),skipmissing=:none)        238.871 ms (1202 allocations: 16.47 MiB)
-corspearman($x)                                            35.905 ms (1127 allocations: 39.31 MiB)
-corspearman($xm; skipmissing=:pairwise)                    1.971 s (1260 allocations: 23.19 MiB)
-pairwise(corspearman,eachcol($xm); skipmissing=:pairwise)  1.706 s (1228 allocations: 23.18 MiB)
-corspearman($xm; skipmissing=:listwise)                    4.466 ms (2113 allocations: 19.43 MiB)
-pairwise(corspearman,eachcol($xm); skipmissing=:listwise)  4.432 ms (2081 allocations: 19.43 MiB)
-corspearman($xm; skipmissing=:none)                        20.017 ms (1625 allocations: 17.27 MiB)
-pairwise(corspearman,eachcol($xm),skipmissing=:none)       20.085 ms (1623 allocations: 17.27 MiB)
-==================================================================================================================================
-
-=#
\ No newline at end of file

From 6a8fecda3e91cc00caa39f2452ae186c74bf1080 Mon Sep 17 00:00:00 2001
From: PGS62 <philip.swannell@gmail.com>
Date: Tue, 2 Apr 2024 09:00:01 +0100
Subject: [PATCH 03/24] Partial solution to test failures on Julia 1.0.5

---
 .vscode/settings.json |  3 ++-
 src/pairwise.jl       |  2 +-
 src/rankcorr.jl       | 36 +++++++++++++++++-------------------
 test/rankcorr.jl      | 20 +++++++++++---------
 4 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 9e26dfeeb..7a73a41bf 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1 +1,2 @@
-{}
\ No newline at end of file
+{
+}
\ No newline at end of file
diff --git a/src/pairwise.jl b/src/pairwise.jl
index d2775fe7e..025566b16 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -104,7 +104,7 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
                 if iscor && i == j && y[i] === x[j] && V !== Union{} && V!== Missing
                     dest[j, i] = 1.0
                 else
-                    dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx, scratch_fy)...)
+                    dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx = scratch_fx, scratch_fy = scratch_fy)...)
                 end
                 symmetric && (dest[i, j] = dest[j, i])
             end
diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index 1d5b3871a..338839025 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -38,17 +38,17 @@ function corspearman(x::AbstractVector, y::AbstractVector; skipmissing::Symbol=:
 end
 
 function corspearman(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none)
-    return corspearman(x, reshape(y, (length(y), 1)); skipmissing)
+    return corspearman(x, reshape(y, (length(y), 1)); skipmissing=skipmissing)
 end
 
 function corspearman(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none)
-    return corspearman(reshape(x, (length(x), 1)), y; skipmissing)
+    return corspearman(reshape(x, (length(x), 1)), y; skipmissing=skipmissing)
 end
 
 function corspearman(x::AbstractMatrix, y::AbstractMatrix=x;
     skipmissing::Symbol=:none)
     check_rankcor_args(x, y, skipmissing, true)
-    return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing)
+    return pairwise(corspearman, _eachcol(x), _eachcol(y); skipmissing=skipmissing)
 end
 
 function corspearman(x::AbstractVector{T}) where {T}
@@ -166,16 +166,13 @@ julia> using StatsBase, BenchmarkTools, Random
 
 julia> Random.seed!(1);
 
-julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05,\
-missing,randn(1000));
+julia> x = ifelse.(rand(1000) .< 0.05,missing,randn(1000));y = ifelse.(rand(1000) .< 0.05, missing,randn(1000));
 
-julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);\
-spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000);
+julia> sortpermx=sortperm(x);sortpermy=sortperm(y);inds=zeros(Int64,1000);spnmx=zeros(Int64,1000);spnmy=zeros(Int64,1000);
 
 julia> nmx=zeros(1000);nmy=zeros(1000);ranksx=similar(x,Float64);ranksy=similar(y,Float64);
 
-julia> @btime corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\
-\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy)
+julia> @btime corspearman_kernel!(\$x,\$y,:pairwise,\$sortpermx,\$sortpermy,\$inds,\$spnmx,\$spnmy,\$nmx,\$nmy,\$ranksx,\$ranksy)
 4.671 μs (0 allocations: 0 bytes)
 -0.016058512110609713
 ```
@@ -417,7 +414,7 @@ Uses multiple threads when either `x` or `y` is a matrix.
 function corkendall(x::AbstractMatrix, y::AbstractMatrix=x;
     skipmissing::Symbol=:none)
     check_rankcor_args(x, y, skipmissing, true)
-    return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing)
+    return pairwise(corkendall, _eachcol(x), _eachcol(y); skipmissing=skipmissing)
 end
 
 function corkendall(x::AbstractVector, y::AbstractVector;
@@ -433,15 +430,17 @@ function corkendall(x::AbstractVector, y::AbstractVector;
     end
 end
 
+_eachcol(x) = VERSION >= v"1.1" ? eachcol(x) : [view(x,:,j) for j in axes(x,2)]
+
 #= corkendall returns a vector in this case, inconsistent with with Statistics.cor and
 StatsBase.corspearman, but consistent with StatsBase.corkendall.
  =#
 function corkendall(x::AbstractMatrix, y::AbstractVector; skipmissing::Symbol=:none)
-    return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing))
+    return vec(corkendall(x, reshape(y, (length(y), 1)); skipmissing=skipmissing))
 end
 
 function corkendall(x::AbstractVector, y::AbstractMatrix; skipmissing::Symbol=:none)
-    return corkendall(reshape(x, (length(x), 1)), y; skipmissing)
+    return corkendall(reshape(x, (length(x), 1)), y; skipmissing=skipmissing)
 end
 
 function corkendall(x::AbstractVector{T}) where {T}
@@ -515,7 +514,8 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
                 else
                     yi .= y[i]
                     dest[j, i] = corkendall_kernel!(sortedxj, yi, permx, skipmissing;
-                        scratch_py, scratch_sy, scratch_fx, scratch_fy)
+                        scratch_py=scratch_py, scratch_sy=scratch_sy, scratch_fx=scratch_fx,
+                        scratch_fy=scratch_fy)
                 end
                 symmetric && (dest[i, j] = dest[j, i])
             end
@@ -531,17 +531,15 @@ end
 # JSTOR, www.jstor.org/stable/2282833.
 
 function check_rankcor_args(x, y, skipmissing, allowlistwise::Bool)
-    Base.require_one_based_indexing(x, y)
+    #  Base.require_one_based_indexing(x, y) #TODO find how to reject non-1-based in a way that works in Julia 1.0.5
     size(x, 1) == size(y, 1) ||
         throw(DimensionMismatch("x and y have inconsistent dimensions"))
     if allowlistwise
         skipmissing in (:none, :pairwise, :listwise) ||
-            throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, \
-            but got :$skipmissing"))
+            throw(ArgumentError("skipmissing must be one of :none, :pairwise or :listwise, but got :$skipmissing"))
     else
         skipmissing in (:none, :pairwise) ||
-            throw(ArgumentError("skipmissing must be either :none or :pairwise, but \
-            got :$skipmissing"))
+            throw(ArgumentError("skipmissing must be either :none or :pairwise, but got :$skipmissing"))
     end
 end
 
@@ -585,7 +583,7 @@ function corkendall_kernel!(sortedx::AbstractVector{T}, y::AbstractVector{U},
     end
 
     if missing isa T || missing isa V
-        sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx, scratch_fy)
+        sortedx, permutedy = handle_pairwise(sortedx, scratch_py; scratch_fx=scratch_fx, scratch_fy=scratch_fy)
     else
         permutedy = scratch_py
     end
diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index 484474d99..719870d3d 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -236,11 +236,13 @@ julia> corkendall(Matrix{Union{Missing,Float64}}(missing,5,3)) #DIFFERENT behavi
     @test (f(["a" "z"; "b" "y"; "c" "x"]) ≈ [1.0 -1.0; -1.0 1.0])
     @test (f(["a" 3; "b" 2; "c" 1]) ≈ [1.0 -1.0; -1.0 1.0])
 
-    #Works for zero size input
-    @test isequal(f([;;]), [;;])
-    @test isequal(f([;;], [;;]), [;;])
-    @test isequal(f([;;], [;;], skipmissing=:pairwise), [;;])
-    @test isequal(f([;;], [;;], skipmissing=:listwise), [;;])
+    #Works for zero size input ( [;;] not compatible with Julia 1.0.5)
+    let nada = Array{Any,2}(undef, 0, 0)
+        @test isequal(f(nada), nada)
+        @test isequal(f(nada, nada), nada)
+        @test isequal(f(nada, nada, skipmissing=:pairwise), nada)
+        @test isequal(f(nada, nada, skipmissing=:listwise), nada)
+    end
 
     # Wrong dimensions
     @test_throws DimensionMismatch f([1], [1, 2])
@@ -362,11 +364,11 @@ end
     factor" of 1.2 against the expected size of allocations.
     =#
     @test (@allocated corkendall(x)) < (897_808 + Threads.nthreads() * 58_044) * 1.2
-    @test (@allocated corkendall(xm,skipmissing=:listwise)) < (1_119_392 + Threads.nthreads() * 22_172) * 1.2
-    @test (@allocated corkendall(xm,skipmissing=:pairwise)) < (892_112 + Threads.nthreads() * 61_116) * 1.2
+    @test (@allocated corkendall(xm, skipmissing=:listwise)) < (1_119_392 + Threads.nthreads() * 22_172) * 1.2
+    @test (@allocated corkendall(xm, skipmissing=:pairwise)) < (892_112 + Threads.nthreads() * 61_116) * 1.2
     @test (@allocated corspearman(x)) < (2_678_448 + Threads.nthreads() * 9_128) * 1.2
-    @test (@allocated corspearman(xm,skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
-    @test (@allocated corspearman(xm,skipmissing=:pairwise)) < (1_692_208 + Threads.nthreads() * 67_172) * 1.2
+    @test (@allocated corspearman(xm, skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
+    @test (@allocated corspearman(xm, skipmissing=:pairwise)) < (1_692_208 + Threads.nthreads() * 67_172) * 1.2
 
 end
 # COV_EXCL_STOP
\ No newline at end of file

From eff32e4c0760201d63cfaa9d561558479740181d Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Tue, 2 Apr 2024 16:12:57 +0100
Subject: [PATCH 04/24] Reverted to using eachcol in the hope that support for
 Julia 1.0.5 will be dropped.

---
 src/rankcorr.jl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index 338839025..ca59753ca 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -48,7 +48,7 @@ end
 function corspearman(x::AbstractMatrix, y::AbstractMatrix=x;
     skipmissing::Symbol=:none)
     check_rankcor_args(x, y, skipmissing, true)
-    return pairwise(corspearman, _eachcol(x), _eachcol(y); skipmissing=skipmissing)
+    return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing=skipmissing)
 end
 
 function corspearman(x::AbstractVector{T}) where {T}
@@ -414,7 +414,7 @@ Uses multiple threads when either `x` or `y` is a matrix.
 function corkendall(x::AbstractMatrix, y::AbstractMatrix=x;
     skipmissing::Symbol=:none)
     check_rankcor_args(x, y, skipmissing, true)
-    return pairwise(corkendall, _eachcol(x), _eachcol(y); skipmissing=skipmissing)
+    return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing=skipmissing)
 end
 
 function corkendall(x::AbstractVector, y::AbstractVector;
@@ -430,8 +430,6 @@ function corkendall(x::AbstractVector, y::AbstractVector;
     end
 end
 
-_eachcol(x) = VERSION >= v"1.1" ? eachcol(x) : [view(x,:,j) for j in axes(x,2)]
-
 #= corkendall returns a vector in this case, inconsistent with with Statistics.cor and
 StatsBase.corspearman, but consistent with StatsBase.corkendall.
  =#

From 1a4d064d63cd2b09f3153e5524656c07d407d077 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Tue, 2 Apr 2024 16:13:16 +0100
Subject: [PATCH 05/24] Deleted commented out tests

---
 test/pairwise.jl | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/test/pairwise.jl b/test/pairwise.jl
index 35ecd8279..6a3bf1df1 100644
--- a/test/pairwise.jl
+++ b/test/pairwise.jl
@@ -53,9 +53,7 @@ arbitrary_fun(x, y) = cor(x, y)
 
         @inferred pairwise(f, x, y)
         if throwsforzerolengthinput
-            # @test_throws Union{ArgumentError,MethodError} pairwise(f, [Int[]], [Int[]])
             @test_throws CompositeException pairwise(f, [Int[]], [Int[]])
-            #@test_throws Union{ArgumentError,MethodError} pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
             @test_throws CompositeException pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
         end
 
@@ -122,23 +120,18 @@ arbitrary_fun(x, y) = cor(x, y)
             return x[which], y[which]
         end
 
-        #@test isapprox(res, [f(collect.(skipmissings(xi, yi))...) for xi in xm, yi in ym],
-        #    rtol=1e-6)
         @test isapprox(res, [f(myskipmissings(xi, yi)...) for xi in xm, yi in ym],
             rtol=1e-6)
 
         res = pairwise(f, ym, zm, skipmissing=:pairwise)
         if isrankcor
             @test res isa Matrix{Float64}
-            #    @test isequal(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm])
             @test isequal(res, [f(myskipmissings(yi, zi)...) for yi in ym, zi in zm])
         else
             @test res isa Matrix{Float32}
             res2 = zeros(Union{Float32,Missing}, size(res))
             @test pairwise!(f, res2, ym, zm, skipmissing=:pairwise) === res2
             @test res ≅ res2
-            #@test isapprox(res, [f(collect.(skipmissings(yi, zi))...) for yi in ym, zi in zm],
-            #    rtol=1e-6)
             @test isapprox(res, [f(myskipmissings(yi, zi)...) for yi in ym, zi in zm],
                 rtol=1e-6)
 
@@ -189,15 +182,11 @@ arbitrary_fun(x, y) = cor(x, y)
 
         if VERSION >= v"1.5" # Fails with UndefVarError on Julia 1.0
             if throwsforzerolengthinput
-                #@test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:pairwise)
                 @test_throws CompositeException pairwise(f, xm, ym, skipmissing=:pairwise)
-                #@test_throws Union{ArgumentError,MethodError} pairwise(f, xm, ym, skipmissing=:listwise)
                 @test_throws CompositeException pairwise(f, xm, ym, skipmissing=:listwise)
 
                 res = zeros(Union{Float64,Missing}, length(xm), length(ym))
-                #@test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:pairwise)
                 @test_throws CompositeException pairwise!(f, res, xm, ym, skipmissing=:pairwise)
-                #@test_throws Union{ArgumentError,MethodError} pairwise!(f, res, xm, ym, skipmissing=:listwise)
                 @test_throws CompositeException pairwise!(f, res, xm, ym, skipmissing=:listwise)
             end
         end

From 893d4b67e8de28ac4ec6f48b0c812073dd21c1d6 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Tue, 2 Apr 2024 16:16:44 +0100
Subject: [PATCH 06/24] Delete settings.json

---
 .vscode/settings.json | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 7a73a41bf..000000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,2 +0,0 @@
-{
-}
\ No newline at end of file

From e2c9ef59e98593ef29c14924f030a28088865aab Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Tue, 2 Apr 2024 17:19:45 +0100
Subject: [PATCH 07/24] Update pairwise.jl in response to some of devmotion's
 comments

---
 src/pairwise.jl | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index 025566b16..c8bbf2357 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -5,15 +5,15 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
 
     # Swap x and y for more efficient threaded loop.
     if nr < nc
-        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
-        _pairwise!(Val(:none), f, dest′, y, x, symmetric)
-        dest .= transpose(dest′)
+        dest2 = reshape(dest, size(dest, 2), size(dest, 1))
+        _pairwise!(Val(:none), f, dest2, y, x, symmetric)
+        dest .= transpose(dest2)
         return dest
     end
 
     #cor and friends are special-cased.
-    iscor = (f in (corkendall, corspearman, cor))
-    (iscor || f == cov) && (symmetric = x === y)
+    diag_is_1 = (f in (corkendall, corspearman, cor))
+    (diag_is_1 || f == cov) && (symmetric = x === y)
     #cov(x) is faster than cov(x, x)
     (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
 
@@ -21,13 +21,13 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
         for j in subset
             for i = 1:(symmetric ? j : nc)
                 # For performance, diagonal is special-cased
-                if iscor && i==j && y[i] === x[j] && V !== Union{}
+                if diag_is_1 && i == j && y[i] === x[j] && V !== Union{}
                     dest[j, i] = V === Missing ? missing : 1.0
                 else
                     dest[j, i] = f(x[j], y[i])
                 end
-                symmetric && (dest[i, j] = dest[j, i])
             end
+            symmetric && LinearAlgebra.copytri!(dest, 'L')
         end
     end
     return dest
@@ -81,15 +81,15 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
 
     # Swap x and y for more efficient threaded loop.
     if nr < nc
-        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
-        _pairwise!(Val(:pairwise), f, dest′, y, x, symmetric)
-        dest .= transpose(dest′)
+        dest2 = reshape(dest, size(dest, 2), size(dest, 1))
+        _pairwise!(Val(:pairwise), f, dest2, y, x, symmetric)
+        dest .= transpose(dest2)
         return dest
     end
 
     #cor and friends are special-cased.
-    iscor = (f in (corkendall, corspearman, cor))
-    (iscor || f == cov) && (symmetric = x === y)
+    diag_is_1 = (f in (corkendall, corspearman, cor))
+    (diag_is_1 || f == cov) && (symmetric = x === y)
     #cov(x) is faster than cov(x, x)
     (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
 
@@ -97,17 +97,17 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
     nmty = promoted_nmtype(y)[]
 
     Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+        scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
+        scratch_fy = task_local_vector(:scratch_fy, nmty, m)
         for j in subset
-            scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
-            scratch_fy = task_local_vector(:scratch_fy, nmty, m)
             for i = 1:(symmetric ? j : nc)
-                if iscor && i == j && y[i] === x[j] && V !== Union{} && V!== Missing
+                if diag_is_1 && i == j && y[i] === x[j] && V !== Union{} && V !== Missing
                     dest[j, i] = 1.0
                 else
-                    dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx = scratch_fx, scratch_fy = scratch_fy)...)
+                    dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx=scratch_fx, scratch_fy=scratch_fy)...)
                 end
-                symmetric && (dest[i, j] = dest[j, i])
             end
+            symmetric && LinearAlgebra.copytri!(dest, 'L')
         end
     end
     return dest
@@ -255,7 +255,7 @@ julia> dest
 ```
 """
 function pairwise!(f, dest::AbstractMatrix, x, y=x;
-                   symmetric::Bool=false, skipmissing::Symbol=:none)
+    symmetric::Bool=false, skipmissing::Symbol=:none)
     check_vectors(x, y, skipmissing, symmetric)
     return _pairwise!(f, dest, x, y, symmetric=symmetric, skipmissing=skipmissing)
 end

From 4c2e7a06988256d3c5200f5e860940a0f6fadb2d Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Tue, 2 Apr 2024 17:37:37 +0100
Subject: [PATCH 08/24] Was calling copytry! too soon. Fixed that.

---
 src/pairwise.jl | 4 ++--
 src/rankcorr.jl | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index c8bbf2357..b0b79448a 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -27,9 +27,9 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
                     dest[j, i] = f(x[j], y[i])
                 end
             end
-            symmetric && LinearAlgebra.copytri!(dest, 'L')
         end
     end
+    symmetric && LinearAlgebra.copytri!(dest, 'L')
     return dest
 end
 
@@ -107,9 +107,9 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
                     dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx=scratch_fx, scratch_fy=scratch_fy)...)
                 end
             end
-            symmetric && LinearAlgebra.copytri!(dest, 'L')
         end
     end
+    symmetric && LinearAlgebra.copytri!(dest, 'L')
     return dest
 end
 
diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index ca59753ca..e94db2f04 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -137,10 +137,10 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
                         view(tempx, :, j), view(tempy, :, i), inds, spnmx, spnmy, nmx,
                         nmy, ranksx, ranksy)
                 end
-                symmetric && (dest[i, j] = dest[j, i])
             end
         end
     end
+    symmetric && LinearAlgebra.copytri!(dest, 'L')
     return dest
 end
 
@@ -515,10 +515,10 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
                         scratch_py=scratch_py, scratch_sy=scratch_sy, scratch_fx=scratch_fx,
                         scratch_fy=scratch_fy)
                 end
-                symmetric && (dest[i, j] = dest[j, i])
             end
         end
     end
+    symmetric && LinearAlgebra.copytri!(dest, 'L')
     return dest
 end
 

From 2119804bb83450a500b41349f7766de6a24c5692 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Tue, 2 Apr 2024 18:22:41 +0100
Subject: [PATCH 09/24] Update pairwise.jl

---
 src/pairwise.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index b0b79448a..f55823219 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -381,16 +381,16 @@ function handle_pairwise(x::AbstractVector, y::AbstractVector;
 
     axes(x) == axes(y) || throw(DimensionMismatch("x and y have inconsistent dimensions"))
     lb = first(axes(x, 1))
-    j = lb - 1
+    j = lb
     @inbounds for i in eachindex(x)
         if !(ismissing(x[i]) || ismissing(y[i]))
-            j += 1
             scratch_fx[j] = x[i]
             scratch_fy[j] = y[i]
+            j += 1
         end
     end
 
-    return view(scratch_fx, lb:j), view(scratch_fy, lb:j)
+    return view(scratch_fx, lb:(j-1)), view(scratch_fy, lb:(j-1))
 end
 
 #=Condition a) makes equal_sum_subsets useful for load-balancing the multi-threaded section

From 5da5943902e992e2dbb9851a02cf6833ffef20e0 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Wed, 3 Apr 2024 14:47:20 +0100
Subject: [PATCH 10/24] nested loops more friendly to Julia being column-major

---
 src/pairwise.jl | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index f55823219..11dbbcda5 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -4,8 +4,8 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
     nr, nc = size(dest)
 
     # Swap x and y for more efficient threaded loop.
-    if nr < nc
-        dest2 = reshape(dest, size(dest, 2), size(dest, 1))
+    if nc < nr
+        dest2 = similar(dest, size(dest, 2), size(dest, 1))
         _pairwise!(Val(:none), f, dest2, y, x, symmetric)
         dest .= transpose(dest2)
         return dest
@@ -17,14 +17,14 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
     #cov(x) is faster than cov(x, x)
     (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
 
-    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+    Threads.@threads for subset in equal_sum_subsets(nc, Threads.nthreads())
         for j in subset
-            for i = 1:(symmetric ? j : nc)
+            for i = (symmetric ? j : 1):nr
                 # For performance, diagonal is special-cased
-                if diag_is_1 && i == j && y[i] === x[j] && V !== Union{}
-                    dest[j, i] = V === Missing ? missing : 1.0
+                if diag_is_1 && i == j && x[i] === y[j]
+                    dest[i, j] = V === Missing ? missing : 1.0
                 else
-                    dest[j, i] = f(x[j], y[i])
+                    dest[i, j] = f(x[i], y[j])
                 end
             end
         end
@@ -80,8 +80,8 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
     m = length(x) == 0 ? 0 : length(first(x))
 
     # Swap x and y for more efficient threaded loop.
-    if nr < nc
-        dest2 = reshape(dest, size(dest, 2), size(dest, 1))
+    if nc < nr
+        dest2 = similar(dest, size(dest, 2), size(dest, 1))
         _pairwise!(Val(:pairwise), f, dest2, y, x, symmetric)
         dest .= transpose(dest2)
         return dest
@@ -96,15 +96,15 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
     nmtx = promoted_nmtype(x)[]
     nmty = promoted_nmtype(y)[]
 
-    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+    Threads.@threads for subset in equal_sum_subsets(nc, Threads.nthreads())
         scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
         scratch_fy = task_local_vector(:scratch_fy, nmty, m)
         for j in subset
-            for i = 1:(symmetric ? j : nc)
-                if diag_is_1 && i == j && y[i] === x[j] && V !== Union{} && V !== Missing
-                    dest[j, i] = 1.0
+            for i = (symmetric ? j : 1):nr
+                if diag_is_1 && i == j && x[i] === y[j] && V !== Missing
+                    dest[i, j] = 1.0
                 else
-                    dest[j, i] = f(handle_pairwise(x[j], y[i]; scratch_fx=scratch_fx, scratch_fy=scratch_fy)...)
+                    dest[i, j] = f(handle_pairwise(x[i], y[j]; scratch_fx=scratch_fx, scratch_fy=scratch_fy)...)
                 end
             end
         end

From baf193b72446c217a158193bbe8b5280c8a996fd Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Wed, 3 Apr 2024 14:48:01 +0100
Subject: [PATCH 11/24] flipped i and j loop variables. reshape -> similar

---
 src/rankcorr.jl | 53 ++++++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index e94db2f04..2b06d9896 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -99,9 +99,9 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
 
     # Swap x and y for more efficient threaded loop.
     if nr < nc
-        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
-        _pairwise!(Val(:pairwise), f, dest′, y, x, symmetric)
-        dest .= transpose(dest′)
+        dest2 = similar(dest, size(dest, 2), size(dest, 1))
+        _pairwise!(Val(:pairwise), f, dest2, y, x, symmetric)
+        dest .= transpose(dest2)
         return dest
     end
 
@@ -114,7 +114,7 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
     nmty = promoted_nmtype(y)[]
     Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
 
-        for j in subset
+        for i in subset
 
             inds = task_local_vector(:inds, int64, m)
             spnmx = task_local_vector(:spnmx, int64, m)
@@ -124,17 +124,17 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
             ranksx = task_local_vector(:ranksx, fl64, m)
             ranksy = task_local_vector(:ranksy, fl64, m)
 
-            for i = 1:(symmetric ? j : nc)
+            for j = 1:(symmetric ? i : nc)
                 # For performance, diagonal is special-cased
-                if x[j] === y[i] && i == j && V !== Union{}
-                    if missing isa V && eltype(x[j]) == Missing
-                        dest[j, i] = missing
+                if x[i] === y[j] && i == j
+                    if missing isa V && eltype(x[i]) == Missing
+                        dest[i, j] = missing
                     else
-                        dest[j, i] = 1.0
+                        dest[i, j] = 1.0
                     end
                 else
-                    dest[j, i] = corspearman_kernel!(x[j], y[i], :pairwise,
-                        view(tempx, :, j), view(tempy, :, i), inds, spnmx, spnmy, nmx,
+                    dest[i, j] = corspearman_kernel!(x[i], y[j], :pairwise,
+                        view(tempx, :, i), view(tempy, :, j), inds, spnmx, spnmy, nmx,
                         nmy, ranksx, ranksy)
                 end
             end
@@ -468,9 +468,9 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
 
     # Swap x and y for more efficient threaded loop.
     if nr < nc
-        dest′ = reshape(dest, size(dest, 2), size(dest, 1))
-        corkendall_loop!(skipmissing, f, dest′, y, x, symmetric)
-        dest .= transpose(dest′)
+        dest2 = similar(dest, size(dest, 2), size(dest, 1))
+        corkendall_loop!(skipmissing, f, dest2, y, x, symmetric)
+        dest .= transpose(dest2)
         return dest
     end
 
@@ -484,11 +484,11 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
 
     Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
 
-        for j in subset
+        for i in subset
 
             sortedxj = task_local_vector(:sortedxj, t, m)
             scratch_py = task_local_vector(:scratch_py, u, m)
-            yi = task_local_vector(:yi, u, m)
+            yj = task_local_vector(:yj, u, m)
             permx = task_local_vector(:permx, intvec, m)
             # Ensuring missing is not an element type of scratch_sy, scratch_fx, scratch_fy
             # gives improved performance.
@@ -496,22 +496,22 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
             scratch_fx = task_local_vector(:scratch_fx, t′, m)
             scratch_fy = task_local_vector(:scratch_fy, t′, m)
 
-            sortperm!(permx, x[j])
+            sortperm!(permx, x[i])
             @inbounds for k in eachindex(sortedxj)
-                sortedxj[k] = x[j][permx[k]]
+                sortedxj[k] = x[i][permx[k]]
             end
 
-            for i = 1:(symmetric ? j : nc)
+            for j = 1:(symmetric ? i : nc)
                 # For performance, diagonal is special-cased
-                if x[j] === y[i] && i == j && V !== Union{}
-                    if missing isa V && eltype(x[j]) == Missing
-                        dest[j, i] = missing
+                if x[i] === y[j] && j == i
+                    if missing isa V && eltype(x[i]) == Missing
+                        dest[i, j] = missing
                     else
-                        dest[j, i] = 1.0
+                        dest[i, j] = 1.0
                     end
                 else
-                    yi .= y[i]
-                    dest[j, i] = corkendall_kernel!(sortedxj, yi, permx, skipmissing;
+                    yj .= y[j]
+                    dest[i, j] = corkendall_kernel!(sortedxj, yj, permx, skipmissing;
                         scratch_py=scratch_py, scratch_sy=scratch_sy, scratch_fx=scratch_fx,
                         scratch_fy=scratch_fy)
                 end
@@ -746,5 +746,4 @@ end
 # can also accept arguments with element type for which isnan is not defined but isless is
 # is defined, so that rank correlation makes sense.
 _isnan(x::T) where {T<:Number} = isnan(x)
-_isnan(x) = false
-
+_isnan(x) = false
\ No newline at end of file

From a1d9ae994ffdc5868b34de47bdb42daa163b34e3 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Thu, 4 Apr 2024 16:33:53 +0100
Subject: [PATCH 12/24] Added iterators EqualSumSubsets and TwoStepRange

---
 src/pairwise.jl  | 139 +++++++++++++++++++++++++++++++++++------------
 src/rankcorr.jl  |   4 +-
 test/rankcorr.jl |  13 ++++-
 3 files changed, 115 insertions(+), 41 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index 11dbbcda5..641f02db8 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -17,7 +17,7 @@ function _pairwise!(::Val{:none}, f, dest::AbstractMatrix{V}, x, y,
     #cov(x) is faster than cov(x, x)
     (f == cov) && (f = ((x, y) -> x === y ? cov(x) : cov(x, y)))
 
-    Threads.@threads for subset in equal_sum_subsets(nc, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nc, Threads.nthreads())
         for j in subset
             for i = (symmetric ? j : 1):nr
                 # For performance, diagonal is special-cased
@@ -96,7 +96,7 @@ function _pairwise!(::Val{:pairwise}, f, dest::AbstractMatrix{V}, x, y, symmetri
     nmtx = promoted_nmtype(x)[]
     nmty = promoted_nmtype(y)[]
 
-    Threads.@threads for subset in equal_sum_subsets(nc, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nc, Threads.nthreads())
         scratch_fx = task_local_vector(:scratch_fx, nmtx, m)
         scratch_fy = task_local_vector(:scratch_fy, nmty, m)
         for j in subset
@@ -393,51 +393,118 @@ function handle_pairwise(x::AbstractVector, y::AbstractVector;
     return view(scratch_fx, lb:(j-1)), view(scratch_fy, lb:(j-1))
 end
 
-#=Condition a) makes equal_sum_subsets useful for load-balancing the multi-threaded section
-of _pairwise! in the non-symmetric case, and condition b) for the symmetric case.=#
 """
-    equal_sum_subsets(n::Int, num_subsets::Int)::Vector{Vector{Int}}
+    task_local_vector(key::Symbol, similarto::AbstractArray{V},
+    length::Int)::Vector{V} where {V}
+
+Retrieve from task local storage a vector of length `length` and matching the element
+type of `similarto`, with initialisation on first call during a task.
+"""
+function task_local_vector(key::Symbol, similarto::AbstractArray{V},
+    length::Int)::Vector{V} where {V}
+    haskey(task_local_storage(), key) || task_local_storage(key, similar(similarto, length))
+    return task_local_storage(key)
+end
 
-Divide the integers 1:n into a number of subsets such that a) each subset has
-(approximately) the same number of elements; and b) the sum of the elements in each subset
-is nearly equal. If `n` is a multiple of `2 * num_subsets` both conditions hold exactly.
+"""
+    EqualSumSubsets
+
+An iterator that partitions the integers 1 to n into `num_subsets` "subsets" (of type
+TwoStepRange) such that a) each subset is of nearly equal length; and b) the sum of the
+elements in each subset is nearly equal. If `n` is a multiple of `2 * num_subsets` both
+conditions hold exactly.
 
 ## Example
 ```julia-repl
-julia> StatsBase.equal_sum_subsets(30,5)
-5-element Vector{Vector{Int64}}:
- [30, 21, 20, 11, 10, 1]
- [29, 22, 19, 12, 9, 2]
- [28, 23, 18, 13, 8, 3]
- [27, 24, 17, 14, 7, 4]
- [26, 25, 16, 15, 6, 5]
+julia> for s in StatsBase.EqualSumSubsets(30,5)
+println((collect(s), sum(s)))
+end
+([30, 21, 20, 11, 10, 1], 93)
+([29, 22, 19, 12, 9, 2], 93)
+([28, 23, 18, 13, 8, 3], 93)
+([27, 24, 17, 14, 7, 4], 93)
+([26, 25, 16, 15, 6, 5], 93)
+
+#Check for correct partitioning, in this case of integers 1:1000 into 17 subsets.
+julia> sort(vcat([collect(s) for s in StatsBase.EqualSumSubsets(1000,17)]...))==1:1000
+true
+
 ```
 """
-function equal_sum_subsets(n::Int, num_subsets::Int)::Vector{Vector{Int}}
-    subsets = [Int[] for _ in 1:min(n, num_subsets)]
-    writeto, scanup = 1, true
-    for i = n:-1:1
-        push!(subsets[writeto], i)
-        if scanup && writeto == num_subsets
-            scanup = false
-        elseif (!scanup) && writeto == 1
-            scanup = true
-        else
-            writeto += scanup ? 1 : -1
-        end
+struct EqualSumSubsets
+    n::Int64
+    num_subsets::Int64
+
+    function EqualSumSubsets(n, num_subsets)
+        n >= 0 || throw("n must not be negative, but got $n")
+        num_subsets > 0 || throw("num_subsets must be positive, but got $num_subsets")
+        return new(n, num_subsets)
     end
-    return subsets
+
+end
+
+Base.eltype(::EqualSumSubsets) = TwoStepRange
+Base.length(x::EqualSumSubsets) = min(x.n, x.num_subsets)
+Base.firstindex(::EqualSumSubsets) = 1
+
+function Base.iterate(ess::EqualSumSubsets, state::Int64=1)
+    state > length(ess) && return nothing
+    return getindex(ess, state), state + 1
+end
+
+function Base.getindex(ess::EqualSumSubsets, i::Int64=1)
+    n, nss = ess.n, ess.num_subsets
+    step1 = 2i - 2nss - 1
+    step2 = 1 - 2i
+    return TwoStepRange(n - i + 1, step1, step2)
 end
 
 """
-    task_local_vector(key::Symbol, similarto::AbstractArray{V},
-    length::Int)::Vector{V} where {V}
+TwoStepRange
 
-Retrieve from task local storage a vector of length `length` and matching the element
-type of `similarto`, with initialisation on first call during a task.
+Range with a starting value of `start`, stop value of `1` and a step that alternates
+between `step1` and `step2`. `start` must be positive and `step1` and `step2` must be
+negative.
+
+# Examples
+```jldoctest
+julia> collect(StatsBase.TwoStepRange(30,-7,-3))
+6-element Vector{Int64}:
+ 30
+ 23
+ 20
+ 13
+ 10
+  3
+
+```
 """
-function task_local_vector(key::Symbol, similarto::AbstractArray{V},
-    length::Int)::Vector{V} where {V}
-    haskey(task_local_storage(), key) || task_local_storage(key, similar(similarto, length))
-    return task_local_storage(key)
+struct TwoStepRange
+    start::Int64
+    step1::Int64
+    step2::Int64
+
+    function TwoStepRange(start, step1, step2)
+        start > 0 || throw("start must be positive, but got $start")
+        step1 < 0 || throw("step1 must be negative, but got $step1")
+        step2 < 0 || throw("step2 must be negative, but got $step2")
+        return new(start, step1, step2)
+    end
+end
+
+Base.eltype(::TwoStepRange) = Int64
+
+function Base.length(tsr::TwoStepRange)
+    return length((tsr.start):(tsr.step1+tsr.step2):1) +
+           length((tsr.start+tsr.step1):(tsr.step1+tsr.step2):1)
+end
+
+function Base.iterate(tsr::TwoStepRange, i::Int64=1)
+    (i > length(tsr)) && return nothing
+    return getindex(tsr, i), i + 1
+end
+
+function Base.getindex(tsr::TwoStepRange, i::Int64=1)::Int64
+    a, b = divrem(i - 1, 2)
+    return tsr.start + (tsr.step1 + tsr.step2) * a + b * tsr.step1
 end
\ No newline at end of file
diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index 2b06d9896..dca82770e 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -112,7 +112,7 @@ function _pairwise!(::Val{:pairwise}, f::typeof(corspearman),
     fl64 = Float64[]
     nmtx = promoted_nmtype(x)[]
     nmty = promoted_nmtype(y)[]
-    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nr, Threads.nthreads())
 
         for i in subset
 
@@ -482,7 +482,7 @@ function corkendall_loop!(skipmissing::Symbol, f::typeof(corkendall), dest::Abst
 
     symmetric = x === y
 
-    Threads.@threads for subset in equal_sum_subsets(nr, Threads.nthreads())
+    Threads.@threads for subset in EqualSumSubsets(nr, Threads.nthreads())
 
         for i in subset
 
diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index 719870d3d..9cd9ee1ce 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -32,9 +32,16 @@ using Test
     @test StatsBase.midpoint(1, 10) == 5
     @test StatsBase.midpoint(1, widen(10)) == 5
 
-    @test StatsBase.equal_sum_subsets(0, 1) == Vector{Int64}[]
-    @test sum.(StatsBase.equal_sum_subsets(100, 5)) == repeat([1010], 5)
-    @test sort(vcat(StatsBase.equal_sum_subsets(500, 7)...)) == collect(1:500)
+    for n in 1:200, nss in 1:7
+        #check is a partition
+        @test sort(vcat([collect(s) for s in StatsBase.EqualSumSubsets(n, nss)]...)) == 1:n
+        #check near-equal lengths
+        lengths = [length(s) for s in StatsBase.EqualSumSubsets(n, nss)]
+        @test (maximum(lengths) - minimum(lengths)) <= 1
+        #check near-equal sums
+        sums = [sum(collect(s)) for s in StatsBase.EqualSumSubsets(n, nss)]
+        @test (maximum(sums) - minimum(sums)) < nss
+    end
 
 end
 

From 951b3916ca2da28898d296b8539e57ba9498bcbe Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 12:27:35 +0100
Subject: [PATCH 13/24] Int64 -> Int to work on 32bit

---
 src/pairwise.jl | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/pairwise.jl b/src/pairwise.jl
index 641f02db8..96a204b57 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -222,7 +222,7 @@ presence `missing`, `NaN` or `Inf` entries.
   Only allowed when entries in `x` and `y` are vectors.
 
 # Examples
-```jldoctest
+```julia-repl
 julia> using StatsBase, Statistics
 
 julia> dest = zeros(3, 3);
@@ -287,7 +287,7 @@ presence `missing`, `NaN` or `Inf` entries.
   Only allowed when entries in `x` and `y` are vectors.
 
 # Examples
-```jldoctest
+```julia-repl
 julia> using StatsBase, Statistics
 
 julia> x = [1 3 7
@@ -432,8 +432,8 @@ true
 ```
 """
 struct EqualSumSubsets
-    n::Int64
-    num_subsets::Int64
+    n::Int
+    num_subsets::Int
 
     function EqualSumSubsets(n, num_subsets)
         n >= 0 || throw("n must not be negative, but got $n")
@@ -447,12 +447,12 @@ Base.eltype(::EqualSumSubsets) = TwoStepRange
 Base.length(x::EqualSumSubsets) = min(x.n, x.num_subsets)
 Base.firstindex(::EqualSumSubsets) = 1
 
-function Base.iterate(ess::EqualSumSubsets, state::Int64=1)
+function Base.iterate(ess::EqualSumSubsets, state::Int=1)
     state > length(ess) && return nothing
     return getindex(ess, state), state + 1
 end
 
-function Base.getindex(ess::EqualSumSubsets, i::Int64=1)
+function Base.getindex(ess::EqualSumSubsets, i::Int=1)
     n, nss = ess.n, ess.num_subsets
     step1 = 2i - 2nss - 1
     step2 = 1 - 2i
@@ -467,7 +467,7 @@ between `step1` and `step2`. `start` must be positive and `step1` and `step2` mu
 negative.
 
 # Examples
-```jldoctest
+```julia-repl
 julia> collect(StatsBase.TwoStepRange(30,-7,-3))
 6-element Vector{Int64}:
  30
@@ -480,9 +480,9 @@ julia> collect(StatsBase.TwoStepRange(30,-7,-3))
 ```
 """
 struct TwoStepRange
-    start::Int64
-    step1::Int64
-    step2::Int64
+    start::Int
+    step1::Int
+    step2::Int
 
     function TwoStepRange(start, step1, step2)
         start > 0 || throw("start must be positive, but got $start")
@@ -492,19 +492,19 @@ struct TwoStepRange
     end
 end
 
-Base.eltype(::TwoStepRange) = Int64
+Base.eltype(::TwoStepRange) = Int
 
 function Base.length(tsr::TwoStepRange)
     return length((tsr.start):(tsr.step1+tsr.step2):1) +
            length((tsr.start+tsr.step1):(tsr.step1+tsr.step2):1)
 end
 
-function Base.iterate(tsr::TwoStepRange, i::Int64=1)
+function Base.iterate(tsr::TwoStepRange, i::Int=1)
     (i > length(tsr)) && return nothing
     return getindex(tsr, i), i + 1
 end
 
-function Base.getindex(tsr::TwoStepRange, i::Int64=1)::Int64
+function Base.getindex(tsr::TwoStepRange, i::Int=1)::Int
     a, b = divrem(i - 1, 2)
     return tsr.start + (tsr.step1 + tsr.step2) * a + b * tsr.step1
 end
\ No newline at end of file

From 75d206a31c1a34978ecb2e33de44d13fd56d2cee Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 12:28:03 +0100
Subject: [PATCH 14/24] Update tests

---
 test/rankcorr.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index 9cd9ee1ce..af79131b8 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -32,7 +32,7 @@ using Test
     @test StatsBase.midpoint(1, 10) == 5
     @test StatsBase.midpoint(1, widen(10)) == 5
 
-    for n in 1:200, nss in 1:7
+    for n in vcat(1:5, 10:20:90,1000), nss in [1, 4, 8, 20, 32, 64]
         #check is a partition
         @test sort(vcat([collect(s) for s in StatsBase.EqualSumSubsets(n, nss)]...)) == 1:n
         #check near-equal lengths

From 56d27cc2dc3b994aafd47df1d1d69c9062ec6c81 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 12:39:44 +0100
Subject: [PATCH 15/24] Update tests

---
 test/rankcorr.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index af79131b8..381752e45 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -370,12 +370,12 @@ end
     number of threads, thanks to use of task-local storage. The tests below have a "safety
     factor" of 1.2 against the expected size of allocations.
     =#
-    @test (@allocated corkendall(x)) < (897_808 + Threads.nthreads() * 58_044) * 1.2
-    @test (@allocated corkendall(xm, skipmissing=:listwise)) < (1_119_392 + Threads.nthreads() * 22_172) * 1.2
-    @test (@allocated corkendall(xm, skipmissing=:pairwise)) < (892_112 + Threads.nthreads() * 61_116) * 1.2
+    @test (@allocated corkendall(x)) < (896_144 + Threads.nthreads() * 57_976) * 1.2
+    @test (@allocated corkendall(xm,skipmissing=:listwise)) < (1_117_728 + Threads.nthreads() * 22_104) * 1.2
+    @test (@allocated corkendall(xm,skipmissing=:pairwise)) < (890_448 + Threads.nthreads() * 61_048) * 1.2
     @test (@allocated corspearman(x)) < (2_678_448 + Threads.nthreads() * 9_128) * 1.2
-    @test (@allocated corspearman(xm, skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
-    @test (@allocated corspearman(xm, skipmissing=:pairwise)) < (1_692_208 + Threads.nthreads() * 67_172) * 1.2
-
+    @test (@allocated corspearman(xm,skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
+    @test (@allocated corspearman(xm,skipmissing=:pairwise)) < (1_690_544 + Threads.nthreads() * 67_104) * 1.2
+    
 end
 # COV_EXCL_STOP
\ No newline at end of file

From 682859a1179870ae207ea5e5502ccd3a598edff4 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 18:10:35 +0100
Subject: [PATCH 16/24] Tests now pass on Julia 1.8.5

No longer use eachcol when VERSION < v"1.9"
---
 src/rankcorr.jl  | 18 +++++++++++++++---
 test/pairwise.jl | 14 +++++++-------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/rankcorr.jl b/src/rankcorr.jl
index dca82770e..eb9c4c211 100644
--- a/src/rankcorr.jl
+++ b/src/rankcorr.jl
@@ -48,7 +48,11 @@ end
 function corspearman(x::AbstractMatrix, y::AbstractMatrix=x;
     skipmissing::Symbol=:none)
     check_rankcor_args(x, y, skipmissing, true)
-    return pairwise(corspearman, eachcol(x), eachcol(y); skipmissing=skipmissing)
+    if x === y
+        return pairwise(corspearman, _eachcol(x); skipmissing=skipmissing)    
+    else
+        return pairwise(corspearman, _eachcol(x), _eachcol(y); skipmissing=skipmissing)
+    end
 end
 
 function corspearman(x::AbstractVector{T}) where {T}
@@ -414,7 +418,11 @@ Uses multiple threads when either `x` or `y` is a matrix.
 function corkendall(x::AbstractMatrix, y::AbstractMatrix=x;
     skipmissing::Symbol=:none)
     check_rankcor_args(x, y, skipmissing, true)
-    return pairwise(corkendall, eachcol(x), eachcol(y); skipmissing=skipmissing)
+    if x === y
+        return pairwise(corkendall, _eachcol(x); skipmissing=skipmissing)
+    else
+        return pairwise(corkendall, _eachcol(x), _eachcol(y); skipmissing=skipmissing)
+    end
 end
 
 function corkendall(x::AbstractVector, y::AbstractVector;
@@ -746,4 +754,8 @@ end
 # can also accept arguments with element type for which isnan is not defined but isless is
 # is defined, so that rank correlation makes sense.
 _isnan(x::T) where {T<:Number} = isnan(x)
-_isnan(x) = false
\ No newline at end of file
+_isnan(x) = false
+
+# eachcol was added in Julia 1.1 but for Julia 1.8, keys(eachcol(x)) fails for any Matrix x
+# which causes `check_vectors` to fail. Solution is to use the comprehension below.
+_eachcol(x) = VERSION < v"1.9" ? [view(x, :, i) for i in axes(x, 2)] : eachcol(x)
\ No newline at end of file
diff --git a/test/pairwise.jl b/test/pairwise.jl
index 6a3bf1df1..77ab279ea 100644
--- a/test/pairwise.jl
+++ b/test/pairwise.jl
@@ -53,8 +53,8 @@ arbitrary_fun(x, y) = cor(x, y)
 
         @inferred pairwise(f, x, y)
         if throwsforzerolengthinput
-            @test_throws CompositeException pairwise(f, [Int[]], [Int[]])
-            @test_throws CompositeException pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
+            @test_throws Union{CompositeException,TaskFailedException} pairwise(f, [Int[]], [Int[]])
+            @test_throws Union{CompositeException,TaskFailedException} pairwise!(f, zeros(1, 1), [Int[]], [Int[]])
         end
 
         res = pairwise(f, [], [])
@@ -182,12 +182,12 @@ arbitrary_fun(x, y) = cor(x, y)
 
         if VERSION >= v"1.5" # Fails with UndefVarError on Julia 1.0
             if throwsforzerolengthinput
-                @test_throws CompositeException pairwise(f, xm, ym, skipmissing=:pairwise)
-                @test_throws CompositeException pairwise(f, xm, ym, skipmissing=:listwise)
+                @test_throws Union{CompositeException,TaskFailedException} pairwise(f, xm, ym, skipmissing=:pairwise)
+                @test_throws Union{CompositeException,TaskFailedException} pairwise(f, xm, ym, skipmissing=:listwise)
 
                 res = zeros(Union{Float64,Missing}, length(xm), length(ym))
-                @test_throws CompositeException pairwise!(f, res, xm, ym, skipmissing=:pairwise)
-                @test_throws CompositeException pairwise!(f, res, xm, ym, skipmissing=:listwise)
+                @test_throws Union{CompositeException,TaskFailedException} pairwise!(f, res, xm, ym, skipmissing=:pairwise)
+                @test_throws Union{CompositeException,TaskFailedException} pairwise!(f, res, xm, ym, skipmissing=:listwise)
             end
         end
 
@@ -305,7 +305,7 @@ arbitrary_fun(x, y) = cor(x, y)
                     NaN 1.0]
                 if VERSION >= v"1.5"
                     if f == cor
-                        @test_throws CompositeException pairwise(f, [[missing, missing, missing],
+                        @test_throws Union{CompositeException,TaskFailedException} pairwise(f, [[missing, missing, missing],
                                 [missing, missing, missing]],
                             skipmissing=sm)
                     end

From 9c039dd39edd122e950e5448617de40ee1530f66 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 19:00:28 +0100
Subject: [PATCH 17/24] Compatibility with Julia 1.6

---
 test/pairwise.jl | 6 +++++-
 test/rankcorr.jl | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/pairwise.jl b/test/pairwise.jl
index 77ab279ea..6bfb483c6 100644
--- a/test/pairwise.jl
+++ b/test/pairwise.jl
@@ -116,7 +116,11 @@ arbitrary_fun(x, y) = cor(x, y)
         @test res ≅ res2
         # Use myskipmissings rather than skipmissings to avoid deprecation warning
         function myskipmissings(x, y)
-            which = @. !(ismissing(x) || ismissing(y))
+            #which = @. !(ismissing(x) || ismissing(y)) # not compatible with Julia 1.6
+            which = similar(x,Bool)
+            for i in eachindex(x)
+                which[i] = !(ismissing(x[i])|| ismissing(y[i]))
+            end
             return x[which], y[which]
         end
 
diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index 381752e45..e2e025dc1 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -215,8 +215,8 @@ julia> corkendall(Matrix{Union{Missing,Float64}}(missing,5,3)) #DIFFERENT behavi
     @test isequal(f([], []), NaN)
     @test isequal(f(fill(1, 0, 2), fill(1, 0, 2)), [NaN NaN; NaN NaN])
     @test isequal(f(fill(1, 0, 2)), [1.0 NaN; NaN 1.0])
-    @test isequal(f([1;;], [1;;]), [NaN;;])
-    @test isequal(f([1;;]), [1.0;;])
+    @test isequal(f(reshape([1],(1,1)), reshape([1],(1,1))), reshape([NaN],(1,1)))
+    @test isequal(f(reshape([1],(1,1))), reshape([1.0],(1,1)))
     @test isequal(f([missing], [missing]), NaN)
     @test isequal(f([1], [1]), NaN)
     @test isequal(f([NaN], [NaN]), NaN)

From f53b7b50903ba931aeca7aa73254f890a8d97290 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 21:50:34 +0100
Subject: [PATCH 18/24] Test against Julia 1.6, 1.7, 1.8 and 1.9 to find
 possible new minimum version

---
 .github/workflows/ci.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 74e4c14cd..0b3acc82f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,6 +13,10 @@ jobs:
       matrix:
         version:
           - '1.0'
+          - '1.6'
+          - '1.7'
+          - '1.8'
+          - '1.9'
           - '1' # automatically expands to the latest stable 1.x release of Julia
           - 'nightly'
         os:

From 88bf8f60f16f4a266488878fdcbec6b798648bd7 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 22:16:24 +0100
Subject: [PATCH 19/24] Test on Julia 1.1, 1.2, 1.3. 1.4 and 1.5

---
 .github/workflows/ci.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0b3acc82f..dce83bfd1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,11 +12,11 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.0'
-          - '1.6'
-          - '1.7'
-          - '1.8'
-          - '1.9'
+          - '1.1'
+          - '1.2'
+          - '1.3'
+          - '1.4'
+          - '1.5'
           - '1' # automatically expands to the latest stable 1.x release of Julia
           - 'nightly'
         os:

From ddb1b516c123352d6dc2818447af9be0ebeb92b7 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Fri, 5 Apr 2024 22:56:51 +0100
Subject: [PATCH 20/24] Demonstrate that 1.6 could be new minimum version

---
 .github/workflows/ci.yml | 6 +-----
 Project.toml             | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dce83bfd1..433237ecc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,11 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.1'
-          - '1.2'
-          - '1.3'
-          - '1.4'
-          - '1.5'
+          - '1.6'
           - '1' # automatically expands to the latest stable 1.x release of Julia
           - 'nightly'
         os:
diff --git a/Project.toml b/Project.toml
index 5e6bffed1..076dd40d4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,7 +24,7 @@ Missings = "0.3, 0.4, 1.0"
 SortingAlgorithms = "0.3, 1.0"
 Statistics = "1"
 StatsAPI = "1.2"
-julia = "1"
+julia = "1.6"
 
 [extras]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"

From f67e867250d6eda98b556ff728ce5f1393ad248d Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Sun, 7 Apr 2024 12:41:14 +0100
Subject: [PATCH 21/24] Corrected call to keyword argument

---
 test/rankcorr.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index e2e025dc1..be0aeaa97 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -341,7 +341,7 @@ end
                     if !((arg1 isa Vector) && (arg2 isa Vector) && skipmissing == :listwise)
                         copy1 = copy(arg1)
                         copy2 = copy(arg2)
-                        res = f(arg1, arg2; skipmissing)
+                        res = f(arg1, arg2; skipmissing=skipmissing)
                         @test isequal(arg1, copy1)
                         @test isequal(arg2, copy2)
                     end

From 246f32d5f6b152d41a68a9b2442f7f675bee5a6f Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Wed, 10 Apr 2024 15:23:18 +0100
Subject: [PATCH 22/24] Improved test coverage (src/pairwise.jl#L84-L87)

---
 test/pairwise.jl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/pairwise.jl b/test/pairwise.jl
index 6bfb483c6..24ede4492 100644
--- a/test/pairwise.jl
+++ b/test/pairwise.jl
@@ -85,6 +85,10 @@ arbitrary_fun(x, y) = cor(x, y)
         @test_throws DimensionMismatch pairwise!(f, zeros(1, 2), [], [])
         @test_throws DimensionMismatch pairwise!(f, zeros(0, 0),
             [], [[1, 2], [2, 3]])
+
+        @test pairwise(f, x, y) == (pairwise(f, y, x))'
+        @test pairwise(f, x, y, skipmissing=:pairwise) == (pairwise(f, y, x, skipmissing=:pairwise))'
+        @test pairwise(f, x, y, skipmissing=:listwise) == (pairwise(f, y, x, skipmissing=:listwise))'
     end
 
     if f in (cor, corkendall, corspearman)
@@ -117,9 +121,9 @@ arbitrary_fun(x, y) = cor(x, y)
         # Use myskipmissings rather than skipmissings to avoid deprecation warning
         function myskipmissings(x, y)
             #which = @. !(ismissing(x) || ismissing(y)) # not compatible with Julia 1.6
-            which = similar(x,Bool)
+            which = similar(x, Bool)
             for i in eachindex(x)
-                which[i] = !(ismissing(x[i])|| ismissing(y[i]))
+                which[i] = !(ismissing(x[i]) || ismissing(y[i]))
             end
             return x[which], y[which]
         end

From 46d5655d8287cc41ad25096b812b554666108df5 Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Wed, 10 Apr 2024 16:15:21 +0100
Subject: [PATCH 23/24] Improved test coverage (StatsBase._cor)

---
 test/rankcorr.jl | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index be0aeaa97..c399ea16f 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -21,6 +21,24 @@ using Test
     @test StatsBase.handle_pairwise(x, float.(y)) == ([2, 3, 4], [1.0, 2.0, 4.0])
     @test StatsBase.handle_pairwise(u, v) == (Int64[], Int64[])
 
+    ranksx = [missing missing]
+    ranksy = [1 2]
+    @test isequal(StatsBase._cor(ranksx, ranksy), [NaN NaN; NaN NaN])
+    @test isequal(StatsBase._cor(ranksx, ranksx), [missing NaN; NaN missing])
+    @test isequal(StatsBase._cor(ranksy, ranksy), [1.0 NaN; NaN 1.0])
+
+    ranksx = [missing missing; missing missing]
+    @test isequal(StatsBase._cor(ranksx, ranksx), [missing missing; missing missing])
+
+    Random.seed!(1)
+    ranksx = rand(10,5) ;ranksy = rand(10,6)
+    @test StatsBase._cor(ranksx,ranksx) == cor(ranksx)
+    @test StatsBase._cor(ranksx,ranksy) == cor(ranksx,ranksy)
+
+    ranksx = fill(missing,3,3);ranksy = hcat(ranksx,[1,2,3])
+    @test isequal(StatsBase._cor(ranksx,ranksx),fill(missing,3,3))
+    @test isequal(StatsBase._cor(ranksx,ranksy),fill(missing,3,4))
+
     v = collect(100:-1:1)
     StatsBase.insertion_sort!(v, 1, n)
     @test v == 1:n
@@ -32,7 +50,7 @@ using Test
     @test StatsBase.midpoint(1, 10) == 5
     @test StatsBase.midpoint(1, widen(10)) == 5
 
-    for n in vcat(1:5, 10:20:90,1000), nss in [1, 4, 8, 20, 32, 64]
+    for n in vcat(1:5, 10:20:90, 1000), nss in [1, 4, 8, 20, 32, 64]
         #check is a partition
         @test sort(vcat([collect(s) for s in StatsBase.EqualSumSubsets(n, nss)]...)) == 1:n
         #check near-equal lengths
@@ -215,8 +233,8 @@ julia> corkendall(Matrix{Union{Missing,Float64}}(missing,5,3)) #DIFFERENT behavi
     @test isequal(f([], []), NaN)
     @test isequal(f(fill(1, 0, 2), fill(1, 0, 2)), [NaN NaN; NaN NaN])
     @test isequal(f(fill(1, 0, 2)), [1.0 NaN; NaN 1.0])
-    @test isequal(f(reshape([1],(1,1)), reshape([1],(1,1))), reshape([NaN],(1,1)))
-    @test isequal(f(reshape([1],(1,1))), reshape([1.0],(1,1)))
+    @test isequal(f(reshape([1], (1, 1)), reshape([1], (1, 1))), reshape([NaN], (1, 1)))
+    @test isequal(f(reshape([1], (1, 1))), reshape([1.0], (1, 1)))
     @test isequal(f([missing], [missing]), NaN)
     @test isequal(f([1], [1]), NaN)
     @test isequal(f([NaN], [NaN]), NaN)
@@ -371,11 +389,11 @@ end
     factor" of 1.2 against the expected size of allocations.
     =#
     @test (@allocated corkendall(x)) < (896_144 + Threads.nthreads() * 57_976) * 1.2
-    @test (@allocated corkendall(xm,skipmissing=:listwise)) < (1_117_728 + Threads.nthreads() * 22_104) * 1.2
-    @test (@allocated corkendall(xm,skipmissing=:pairwise)) < (890_448 + Threads.nthreads() * 61_048) * 1.2
+    @test (@allocated corkendall(xm, skipmissing=:listwise)) < (1_117_728 + Threads.nthreads() * 22_104) * 1.2
+    @test (@allocated corkendall(xm, skipmissing=:pairwise)) < (890_448 + Threads.nthreads() * 61_048) * 1.2
     @test (@allocated corspearman(x)) < (2_678_448 + Threads.nthreads() * 9_128) * 1.2
-    @test (@allocated corspearman(xm,skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
-    @test (@allocated corspearman(xm,skipmissing=:pairwise)) < (1_690_544 + Threads.nthreads() * 67_104) * 1.2
-    
+    @test (@allocated corspearman(xm, skipmissing=:listwise)) < (1_803_712 + Threads.nthreads() * 3_992) * 1.2
+    @test (@allocated corspearman(xm, skipmissing=:pairwise)) < (1_690_544 + Threads.nthreads() * 67_104) * 1.2
+
 end
 # COV_EXCL_STOP
\ No newline at end of file

From e1395b8c00af90ed4ae1b28a65002ac26c3853cf Mon Sep 17 00:00:00 2001
From: Philip Swannell <18028484+PGS62@users.noreply.github.com>
Date: Thu, 11 Apr 2024 11:58:14 +0100
Subject: [PATCH 24/24] Trivial change to re-run tests

---
 test/rankcorr.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/rankcorr.jl b/test/rankcorr.jl
index c399ea16f..a08070408 100644
--- a/test/rankcorr.jl
+++ b/test/rankcorr.jl
@@ -1,5 +1,4 @@
-using StatsBase, Random
-using Test
+using StatsBase, Random, Test
 
 @testset "rank_correlation_auxiliary_fns" begin