fix type instability in Jaccard for float input (#44)

KristofferC · web-flow · commit b3a1a3520200 · 2016-06-14T21:18:13.000+02:00
* fix type instability in Jaccard and add some inbounds
diff --git a/src/metrics.jl b/src/metrics.jl
@@ -176,18 +176,22 @@ end
 
 # Jaccard
 
-@inline eval_start(::Jaccard, a::AbstractArray, b::AbstractArray) = 0, 0
+@inline eval_start(::Jaccard, a::AbstractArray{Bool}, b::AbstractArray{Bool}) = 0, 0
+@inline eval_start{T}(::Jaccard, a::AbstractArray{T}, b::AbstractArray{T}) = zero(T), zero(T)
 @inline function eval_op(::Jaccard, s1, s2)
-    denominator = max(s1, s2)
-    numerator = min(s1, s2)
-    numerator, denominator
+    abs_m = abs(s1 - s2)
+    abs_p = abs(s1 + s2)
+    abs_p - abs_m, abs_p + abs_m
 end
 @inline function eval_reduce(::Jaccard, s1, s2)
-    a = s1[1] + s2[1]
-    b = s1[2] + s2[2]
+    @inbounds a = s1[1] + s2[1]
+    @inbounds b = s1[2] + s2[2]
     a, b
 end
-@inline eval_end(::Jaccard, a) = 1 - (a[1]/a[2])
+@inline function eval_end(::Jaccard, a)
+    @inbounds v = 1 - (a[1]/a[2])
+    return v
+end
 jaccard(a::AbstractArray, b::AbstractArray) = evaluate(Jaccard(), a, b)
 
 # Tanimoto
@@ -201,15 +205,17 @@ jaccard(a::AbstractArray, b::AbstractArray) = evaluate(Jaccard(), a, b)
   tt, tf, ft, ff
 end
 @inline function eval_reduce(::RogersTanimoto, s1, s2)
-    a = s1[1] + s2[1]
-    b = s1[2] + s2[2]
-    c = s1[3] + s2[3]
-    d = s1[4] + s1[4]
+    @inbounds begin
+        a = s1[1] + s2[1]
+        b = s1[2] + s2[2]
+        c = s1[3] + s2[3]
+        d = s1[4] + s1[4]
+    end
     a, b, c, d
 end
 @inline function eval_end(::RogersTanimoto, a)
-    numerator = 2(a[2] + a[3])
-    denominator = a[1] + a[4] + 2(a[2] + a[3])
+    @inbounds numerator = 2(a[2] + a[3])
+    @inbounds denominator = a[1] + a[4] + 2(a[2] + a[3])
     numerator / denominator
 end
 rogerstanimoto{T <: Bool}(a::AbstractArray{T}, b::AbstractArray{T}) = evaluate(RogersTanimoto(), a, b)
@@ -240,13 +246,13 @@ function pairwise!(r::AbstractMatrix, dist::SqEuclidean, a::AbstractMatrix)
     m, n = get_pairwise_dims(r, a)
     At_mul_B!(r, a, a)
     sa2 = sumsq_percol(a)
-    for j = 1 : n
+    @inbounds for j = 1 : n
         for i = 1 : j-1
-            @inbounds r[i,j] = r[j,i]
+            r[i,j] = r[j,i]
         end
-        @inbounds r[j,j] = 0
+        r[j,j] = 0
         for i = j+1 : n
-            @inbounds r[i,j] = sa2[i] + sa2[j] - 2 * r[i,j]
+            r[i,j] = sa2[i] + sa2[j] - 2 * r[i,j]
         end
     end
     r
@@ -258,10 +264,10 @@ function pairwise!(r::AbstractMatrix, dist::Euclidean, a::AbstractMatrix, b::Abs
     At_mul_B!(r, a, b)
     sa2 = sumsq_percol(a)
     sb2 = sumsq_percol(b)
-    for j = 1 : nb
+    @inbounds for j = 1 : nb
         for i = 1 : na
-            @inbounds v = sa2[i] + sb2[j] - 2 * r[i,j]
-            @inbounds r[i,j] = isnan(v) ? NaN : sqrt(max(v, 0.))
+            v = sa2[i] + sb2[j] - 2 * r[i,j]
+            r[i,j] = isnan(v) ? NaN : sqrt(max(v, 0.))
         end
     end
     r
@@ -271,14 +277,14 @@ function pairwise!(r::AbstractMatrix, dist::Euclidean, a::AbstractMatrix)
     m, n = get_pairwise_dims(r, a)
     At_mul_B!(r, a, a)
     sa2 = sumsq_percol(a)
-    for j = 1 : n
+    @inbounds for j = 1 : n
         for i = 1 : j-1
-            @inbounds r[i,j] = r[j,i]
+            r[i,j] = r[j,i]
         end
         @inbounds r[j,j] = 0
         for i = j+1 : n
-            @inbounds v = sa2[i] + sa2[j] - 2 * r[i,j]
-            @inbounds r[i,j] = isnan(v) ? NaN : sqrt(max(v, 0.))
+            v = sa2[i] + sa2[j] - 2 * r[i,j]
+            r[i,j] = isnan(v) ? NaN : sqrt(max(v, 0.))
         end
     end
     r
@@ -302,13 +308,13 @@ function pairwise!(r::AbstractMatrix, dist::CosineDist, a::AbstractMatrix)
     m, n = get_pairwise_dims(r, a)
     At_mul_B!(r, a, a)
     ra = sqrt!(sumsq_percol(a))
-    for j = 1 : n
+    @inbounds for j = 1 : n
         @simd for i = j+1 : n
-            @inbounds r[i,j] = max(1 - r[i,j] / (ra[i] * ra[j]), 0)
+            r[i,j] = max(1 - r[i,j] / (ra[i] * ra[j]), 0)
         end
-        @inbounds r[j,j] = 0
+        r[j,j] = 0
         for i = 1 : j-1
-            @inbounds r[i,j] = r[j,i]
+            r[i,j] = r[j,i]
         end
     end
     r
diff --git a/test/test_dists.jl b/test/test_dists.jl
@@ -108,6 +108,10 @@ for (x, y) in (([4., 5., 6., 7.], [3., 9., 8., 1.]),
     @test whamming(a, b, w) == sum((a .!= b) .* w)
 end
 
+@inferred evaluate(Jaccard(), rand(3), rand(3))
+@inferred evaluate(Jaccard(), [1,2,3], [1,2,3])
+@inferred evaluate(Jaccard(), [true, false, true], [false, true, true])
+
 end # testset
 
 
@@ -188,28 +192,28 @@ q = rand(12)
 px = x ./ sum(x)
 py = y ./ sum(y)
 expected_bc_x_y = sum(sqrt(px .* py))
-@test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y 
-@test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) 
-@test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) 
+@test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y
+@test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y))
+@test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y)
 
 
 
 pa = a ./ sum(a)
 pb = b ./ sum(b)
 expected_bc_a_b = sum(sqrt(pa .* pb))
-@test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b 
-@test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) 
-@test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) 
+@test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b
+@test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b))
+@test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b)
 
 pp = p ./ sum(p)
 pq = q ./ sum(q)
 expected_bc_p_q = sum(sqrt(pp .* pq))
-@test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q 
-@test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) 
-@test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) 
+@test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q
+@test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q))
+@test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q)
 
 # Ensure it is semimetric
-@test bhattacharyya(x, y) ≈ bhattacharyya(y, x) 
+@test bhattacharyya(x, y) ≈ bhattacharyya(y, x)
 
 end #testset