|
1 | 1 | # Unit tests for Distances
|
2 | 2 |
|
| 3 | +using SparseArrays: sparsevec, sprand |
| 4 | + |
3 | 5 | struct FooDist <: PreMetric end # Julia 1.0 Compat: struct definition must be put in global scope
|
4 | 6 |
|
5 | 7 | @testset "result_type" begin
|
|
217 | 219 | for (_x, _y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]),
|
218 | 220 | ([4.0, 5.0, 6.0, 7.0], [3. 8.; 9. 1.0]))
|
219 | 221 | x, y = T.(_x), T.(_y)
|
220 |
| - for (x, y) in ((x, y), |
| 222 | + for (x, y) in ((x, y), (sparsevec(x), sparsevec(y)), |
221 | 223 | (convert(Array{Union{Missing, T}}, x), convert(Array{Union{Missing, T}}, y)),
|
222 | 224 | ((Iterators.take(x, 4), Iterators.take(y, 4))), # iterator
|
223 | 225 | (((x[i] for i in 1:length(x)), (y[i] for i in 1:length(y)))), # generator
|
@@ -331,7 +333,8 @@ end # testset
|
331 | 333 | end #testset
|
332 | 334 |
|
333 | 335 | @testset "empty vector" begin
|
334 |
| - for T in (Float64, F64), (a, b) in ((T[], T[]), (Iterators.take(T[], 0), Iterators.take(T[], 0))) |
| 336 | + for T in (Float64, F64), (a, b) in ((T[], T[]), (Iterators.take(T[], 0), Iterators.take(T[], 0)), |
| 337 | + (sprand(T, 0, .1), sprand(T, 0, .1))) |
335 | 338 | @test sqeuclidean(a, b) == 0.0
|
336 | 339 | @test isa(sqeuclidean(a, b), T)
|
337 | 340 | @test euclidean(a, b) == 0.0
|
@@ -391,6 +394,10 @@ end # testset
|
391 | 394 | @test_throws DimensionMismatch colwise!(mat23, Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x), mat23, mat22)
|
392 | 395 | @test_throws DimensionMismatch Bregman(x -> sqeuclidean(x, zero(x)), x -> 2*x)([1, 2, 3], [1, 2])
|
393 | 396 | @test_throws DimensionMismatch Bregman(x -> sqeuclidean(x, zero(x)), x -> [1, 2])([1, 2, 3], [1, 2, 3])
|
| 397 | + sv1 = sprand(10, .2) |
| 398 | + sv2 = sprand(20, .2) |
| 399 | + @test_throws DimensionMismatch euclidean(sv1, sv2) |
| 400 | + @test_throws DimensionMismatch bhattacharyya(sv1, sv2) |
394 | 401 | end # testset
|
395 | 402 |
|
396 | 403 | @testset "Different input types" begin
|
@@ -504,41 +511,43 @@ end
|
504 | 511 |
|
505 | 512 | @testset "bhattacharyya / hellinger" begin
|
506 | 513 | for T in (Int, Float64, F64)
|
507 |
| - x, y = T.([4, 5, 6, 7]), T.([3, 9, 8, 1]) |
508 |
| - a = T.([1, 2, 1, 3, 2, 1]) |
509 |
| - b = T.([1, 3, 0, 2, 2, 0]) |
510 |
| - p = T == Int ? rand(0:10, 12) : rand(T, 12) |
511 |
| - p[p .< median(p)] .= 0 |
512 |
| - q = T == Int ? rand(0:10, 12) : rand(T, 12) |
513 |
| - |
514 |
| - # Bhattacharyya and Hellinger distances are defined for discrete |
515 |
| - # probability distributions so to calculate the expected values |
516 |
| - # we need to normalize vectors. |
517 |
| - px = x ./ sum(x) |
518 |
| - py = y ./ sum(y) |
519 |
| - expected_bc_x_y = sum(sqrt.(px .* py)) |
520 |
| - for (x, y) in ((x, y), (Iterators.take(x, 12), Iterators.take(y, 12))) |
521 |
| - @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y |
522 |
| - @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) |
523 |
| - @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) |
524 |
| - end |
| 514 | + _x, _y = T.([4, 5, 6, 7]), T.([3, 9, 8, 1]) |
| 515 | + _a = T.([1, 2, 1, 3, 2, 1]) |
| 516 | + _b = T.([1, 3, 0, 2, 2, 0]) |
| 517 | + _p = T == Int ? rand(0:10, 12) : rand(T, 12) |
| 518 | + _p[_p .< median(_p)] .= 0 |
| 519 | + _q = T == Int ? rand(0:10, 12) : rand(T, 12) |
| 520 | + |
| 521 | + for (x, y, a, b, p, q) in ((_x, _y, _a, _b, _p, _q), sparsevec.((_x, _y, _a, _b, _p, _q))) |
| 522 | + # Bhattacharyya and Hellinger distances are defined for discrete |
| 523 | + # probability distributions so to calculate the expected values |
| 524 | + # we need to normalize vectors. |
| 525 | + px = x ./ sum(x) |
| 526 | + py = y ./ sum(y) |
| 527 | + expected_bc_x_y = sum(sqrt.(px .* py)) |
| 528 | + for (x, y) in ((x, y), (Iterators.take(x, 12), Iterators.take(y, 12))) |
| 529 | + @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y |
| 530 | + @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) |
| 531 | + @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) |
| 532 | + end |
525 | 533 |
|
526 |
| - pa = a ./ sum(a) |
527 |
| - pb = b ./ sum(b) |
528 |
| - expected_bc_a_b = sum(sqrt.(pa .* pb)) |
529 |
| - @test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b |
530 |
| - @test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) |
531 |
| - @test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) |
532 |
| - |
533 |
| - pp = p ./ sum(p) |
534 |
| - pq = q ./ sum(q) |
535 |
| - expected_bc_p_q = sum(sqrt.(pp .* pq)) |
536 |
| - @test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q |
537 |
| - @test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) |
538 |
| - @test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) |
539 |
| - |
540 |
| - # Ensure it is semimetric |
541 |
| - @test bhattacharyya(x, y) ≈ bhattacharyya(y, x) |
| 534 | + pa = a ./ sum(a) |
| 535 | + pb = b ./ sum(b) |
| 536 | + expected_bc_a_b = sum(sqrt.(pa .* pb)) |
| 537 | + @test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b |
| 538 | + @test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) |
| 539 | + @test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) |
| 540 | + |
| 541 | + pp = p ./ sum(p) |
| 542 | + pq = q ./ sum(q) |
| 543 | + expected_bc_p_q = sum(sqrt.(pp .* pq)) |
| 544 | + @test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q |
| 545 | + @test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) |
| 546 | + @test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) |
| 547 | + |
| 548 | + # Ensure it is semimetric |
| 549 | + @test bhattacharyya(x, y) ≈ bhattacharyya(y, x) |
| 550 | + end |
542 | 551 | end
|
543 | 552 | end #testset
|
544 | 553 |
|
|
769 | 778 |
|
770 | 779 | X = rand(ComplexF64, m, nx)
|
771 | 780 | Y = rand(ComplexF64, m, ny)
|
772 |
| - |
| 781 | + |
773 | 782 | test_pairwise(SqEuclidean(), X, Y, Float64)
|
774 | 783 | test_pairwise(Euclidean(), X, Y, Float64)
|
775 | 784 |
|
|
946 | 955 | @test pairwise(PeriodicEuclidean(p), X, Y, dims=2)[1,2] == 0m
|
947 | 956 | end
|
948 | 957 |
|
| 958 | +@testset "SparseVector, nnz(a) != nnz(b)" begin |
| 959 | + for (n, densa, densb) in ((100, .1, .8), (200, .8, .1)) |
| 960 | + a = sprand(n, densa) |
| 961 | + b = sprand(n, densb) |
| 962 | + for d in (bhattacharyya, euclidean, sqeuclidean, jaccard, cityblock, totalvariation, |
| 963 | + chebyshev, braycurtis, hamming) |
| 964 | + @test d(a, b) ≈ d(Vector(a), Vector(b)) |
| 965 | + end |
| 966 | + end |
| 967 | +end |
| 968 | + |
949 | 969 | #=
|
950 | 970 | @testset "zero allocation colwise!" begin
|
951 | 971 | d = Euclidean()
|
|
0 commit comments