Skip to content

Commit 2b0ab92

Browse files
juliohmKristofferC
authored andcommitted
Fix #71: Add Haversine distance (#77)
* Fix #71: Add Haversine distance * Add Haversine to README.md * Add Haversine to benchmarks * Add benchmark results for Haversine to README.md * Add helper function haversine() with default Earth radius * Add tests for Haversine distance * Use 4 spaces for indentation * Use 4 spaces for indentation on test suite * Remove default radius for Haversine distance * Fix typo in Haversine tests * fixups
1 parent a6de2f8 commit 2b0ab92

File tree

6 files changed

+72
-5
lines changed

6 files changed

+72
-5
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ This package also provides optimized functions to compute column-wise and pairwi
3131
* Squared Mahalanobis distance
3232
* Bhattacharyya distance
3333
* Hellinger distance
34+
* Haversine distance
3435
* Mean absolute deviation
3536
* Mean squared deviation
3637
* Root mean squared deviation
@@ -148,6 +149,7 @@ Each distance corresponds to a distance type. The type name and the correspondin
148149
| SpanNormDist | `spannorm_dist(x, y)` | `max(x - y) - min(x - y )` |
149150
| BhattacharyyaDist | `bhattacharyya(x, y)` | `-log(sum(sqrt(x .* y) / sqrt(sum(x) * sum(y)))` |
150151
| HellingerDist | `hellinger(x, y) ` | `sqrt(1 - sum(sqrt(x .* y) / sqrt(sum(x) * sum(y))))` |
152+
| Haversine | `haversine(x, y, r)` | [Haversine formula](https://en.wikipedia.org/wiki/Haversine_formula) |
151153
| Mahalanobis | `mahalanobis(x, y, Q)` | `sqrt((x - y)' * Q * (x - y))` |
152154
| SqMahalanobis | `sqmahalanobis(x, y, Q)` | ` (x - y)' * Q * (x - y)` |
153155
| MeanAbsDeviation | `meanad(x, y)` | `mean(abs.(x - y))` |
@@ -226,6 +228,7 @@ The table below compares the performance (measured in terms of average elapsed t
226228
| WeightedHamming | 0.009042s | 0.003182s | 2.8417 |
227229
| SqMahalanobis | 0.070869s | 0.019199s | 3.6913 |
228230
| Mahalanobis | 0.070980s | 0.019305s | 3.6768 |
231+
| Haversine | 0.006549s | 0.000809s | 8.0967 |
229232

230233
We can see that using ``colwise`` instead of a simple loop yields considerable gain (2x - 4x), especially when the internal computation of each distance is simple. Nonetheless, when the computation of a single distance is heavy enough (e.g. *KLDivergence*, *RenyiDivergence*), the gain is not as significant.
231234

@@ -257,5 +260,6 @@ The table below compares the performance (measured in terms of average elapsed t
257260
| WeightedHamming | 0.024456s | 0.007403s | 3.3033 |
258261
| SqMahalanobis | 0.113107s | 0.000366s | **309.3621** |
259262
| Mahalanobis | 0.114646s | 0.000686s | **167.0595** |
263+
| Haversine | 0.015267s | 0.003656s | 4.1763 |
260264

261265
For distances of which a major part of the computation is a quadratic form (e.g. *Euclidean*, *CosineDist*, *Mahalanobis*), the performance can be drastically improved by restructuring the computation and delegating the core part to ``GEMM`` in *BLAS*. The use of this strategy can easily lead to 100x performance gain over simple loops (see the highlighted part of the table above).

benchmark/benchmarks.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ function create_distances(w, Q)
1919
BhattacharyyaDist(),
2020
HellingerDist(),
2121

22+
Haversine(),
23+
2224
WeightedSqEuclidean(w),
2325
WeightedEuclidean(w),
2426
WeightedCityblock(w),
@@ -57,7 +59,7 @@ function evaluate_colwise(dist, x, y)
5759
end
5860

5961
function add_colwise_benchmarks!(SUITE)
60-
62+
6163
m = 200
6264
n = 10000
6365

benchmark/print_table.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ order = [
3131
:WeightedHamming,
3232
:SqMahalanobis,
3333
:Mahalanobis,
34+
:Haversine,
3435
]
3536

3637
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 2.0 # Long enough
@@ -67,4 +68,4 @@ function print_table(judgement)
6768
end
6869
end
6970

70-
print_table(judgement)
71+
print_table(judgement)

src/Distances.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ export
4545
BhattacharyyaDist,
4646
HellingerDist,
4747

48+
Haversine,
49+
4850
MeanAbsDeviation,
4951
MeanSqDeviation,
5052
RMSDeviation,
@@ -79,6 +81,8 @@ export
7981
bhattacharyya,
8082
hellinger,
8183

84+
haversine,
85+
8286
meanad,
8387
msd,
8488
rmsd,
@@ -88,6 +92,7 @@ include("common.jl")
8892
include("generic.jl")
8993
include("metrics.jl")
9094
include("wmetrics.jl")
95+
include("haversine.jl")
9196
include("mahalanobis.jl")
9297
include("bhattacharyya.jl")
9398

src/haversine.jl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
Haversine(radius)
3+
4+
The haversine distance between two locations on a sphere of given `radius`.
5+
6+
Locations are described with longitude and latitude in degrees.
7+
The computed distance has the same units as that of the radius.
8+
"""
9+
struct Haversine{T<:Real} <: Metric
10+
radius::T
11+
end
12+
13+
const VecOrLengthTwoTuple{T} = Union{AbstractVector{T}, NTuple{2, T}}
14+
15+
function evaluate(dist::Haversine, x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple)
16+
length(x) == length(y) == 2 || haversine_error()
17+
18+
@inbounds begin
19+
# longitudes
20+
Δλ = deg2rad(y[1] - x[1])
21+
22+
# latitudes
23+
φ₁ = deg2rad(x[2])
24+
φ₂ = deg2rad(y[2])
25+
end
26+
27+
Δφ = φ₂ - φ₁
28+
29+
# haversine formula
30+
a = sin(Δφ/2)^2 + cos(φ₁)*cos(φ₂)*sin(Δλ/2)^2
31+
c = 2atan2(a, (1-a))
32+
33+
# distance on the sphere
34+
c*dist.radius
35+
end
36+
37+
haversine(x::VecOrLengthTwoTuple, y::VecOrLengthTwoTuple, radius::Real) = evaluate(Haversine(radius), x, y)
38+
39+
@noinline haversine_error() = throw(ArgumentError("expected both inputs to have length 2 in Haversine distance"))

test/test_dists.jl

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,13 @@ end
5858

5959
test_metricity(BhattacharyyaDist(), x, y, z)
6060
test_metricity(HellingerDist(), x, y, z)
61-
61+
62+
x₁ = rand(T, 2)
63+
x₂ = rand(T, 2)
64+
x₃ = rand(T, 2)
65+
66+
test_metricity(Haversine(6371.), x₁, x₂, x₃)
67+
6268
k = rand(1:3, n)
6369
l = rand(1:3, n)
6470
m = rand(1:3, n)
@@ -154,7 +160,7 @@ end
154160
@test wcityblock(x, y, w) dot(abs.(x - vec(y)), w)
155161
@test wminkowski(x, y, w, 2) weuclidean(x, y, w)
156162
end
157-
163+
158164

159165
# Test weighted Hamming distances with even weights
160166
a = T.([1.0, 2.0, 1.0, 3.0, 2.0, 1.0])
@@ -187,7 +193,7 @@ end
187193
end
188194
@test kl_divergence(p, q) klv
189195
@test typeof(kl_divergence(p, q)) == T
190-
196+
191197

192198
@test renyi_divergence(p, r, 0) -log(scale)
193199
@test renyi_divergence(p, r, 1) -log(scale)
@@ -272,6 +278,16 @@ end # testset
272278
end
273279
end #testset
274280

281+
@testset "haversine" begin
282+
for T in (Float64, F64)
283+
@test haversine([-180.,0.], [180.,0.], 1.) 0 atol=1e-10
284+
@test haversine([0.,-90.], [0.,90.], 1.) π atol=1e-10
285+
@test haversine((-180.,0.), (180.,0.), 1.) 0 atol=1e-10
286+
@test haversine((0.,-90.), (0.,90.), 1.) π atol=1e-10
287+
@test_throws ArgumentError haversine([0.,-90., 0.25], [0.,90.], 1.)
288+
end
289+
end
290+
275291
@testset "bhattacharyya / hellinger" begin
276292
for T in (Float64, F64)
277293
x, y = T.([4.0, 5.0, 6.0, 7.0]), T.([3.0, 9.0, 8.0, 1.0])

0 commit comments

Comments
 (0)