Skip to content

Commit 586af4c

Browse files
authored
Faster hash and equality (#159)
* overload hash and equality * Update src/geos_types.jl * bump version * wip * fix hash * implement == via equals * implement coordinatewise equality * overload Base.isapprox * more isapprox tests * somewhat faster compare * faster * speedup hash and equality * test that hash eq don't allocate much * eliminate more allocs * add Random as test dependency * fix * fix * relax alloc tests * use MersenneTwister instead of Xoshiro in tests * simplify * fix ci * add another early return to compare * fix * fix * add some coordinates! tests * fix typo * get rid of Random test dependency
1 parent ffe4d6e commit 586af4c

File tree

3 files changed

+180
-59
lines changed

3 files changed

+180
-59
lines changed

src/geos_functions.jl

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -292,22 +292,31 @@ function getZ(ptr::GEOSCoordSeq, context::GEOSContext = get_global_context())
292292
zcoords
293293
end
294294

295-
function getCoordinates(ptr::GEOSCoordSeq, i::Integer, context::GEOSContext = get_global_context())
295+
function getCoordinates!(out::Vector{Float64}, ptr::GEOSCoordSeq, i::Integer, context)
296296
if !(0 < i <= getSize(ptr, context))
297297
error(
298298
"LibGEOS: i=$i is out of bounds for CoordSeq with size=$(getSize(ptr, context))",
299299
)
300300
end
301301
ndim = getDimensions(ptr, context)
302-
coord = Array{Float64}(undef, ndim)
303-
start = pointer(coord)
304-
floatsize = sizeof(Float64)
305-
GEOSCoordSeq_getX_r(context, ptr, i - 1, start)
306-
GEOSCoordSeq_getY_r(context, ptr, i - 1, start + floatsize)
307-
if ndim == 3
308-
GEOSCoordSeq_getZ_r(context, ptr, i - 1, start + 2 * floatsize)
302+
@assert length(out) == ndim
303+
GC.@preserve out begin
304+
start = pointer(out)
305+
floatsize = sizeof(Float64)
306+
GEOSCoordSeq_getX_r(context, ptr, i - 1, start)
307+
GEOSCoordSeq_getY_r(context, ptr, i - 1, start + floatsize)
308+
if ndim == 3
309+
GEOSCoordSeq_getZ_r(context, ptr, i - 1, start + 2 * floatsize)
310+
end
309311
end
310-
coord
312+
out
313+
end
314+
315+
function getCoordinates(ptr::GEOSCoordSeq, i::Integer, context::GEOSContext = get_global_context())
316+
ndim = getDimensions(ptr, context)
317+
out = Array{Float64}(undef, ndim)
318+
getCoordinates!(out, ptr, i, context)
319+
out
311320
end
312321

313322
function getCoordinates(ptr::GEOSCoordSeq, context::GEOSContext = get_global_context())

src/geos_types.jl

Lines changed: 108 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ mutable struct LineString <: AbstractGeometry
8989
finalizer(destroyGeom, line)
9090
line
9191
end
92-
#create a linestring from a list of coordiantes
92+
#create a linestring from a list of coordinates
9393
function LineString(coords::Vector{Vector{Float64}}, context::GEOSContext = get_global_context())
9494
line = new(createLineString(coords, context), context)
9595
finalizer(destroyGeom, line)
@@ -116,7 +116,7 @@ mutable struct MultiLineString <: AbstractGeometry
116116
finalizer(destroyGeom, multiline)
117117
multiline
118118
end
119-
# create a multilinestring from a list of linestring coordiantes
119+
# create a multilinestring from a list of linestring coordinates
120120
MultiLineString(multiline::Vector{Vector{Vector{Float64}}},context::GEOSContext = get_global_context()) =
121121
MultiLineString(
122122
createCollection(
@@ -309,6 +309,31 @@ const geomtypes = [
309309
GeometryCollection,
310310
]
311311

312+
const HasCoordSeq = Union{LineString, LinearRing, Point}
313+
"""
314+
315+
coordinates!(out::Vector{Float64}, geo::$HasCoordSeq, i::Integer)
316+
coordinates!(out::Vector{Float64}, geo::Point)
317+
318+
Copy the coordinates of the ith point of geo into `out`.
319+
"""
320+
function coordinates!(out, geo::HasCoordSeq, i::Integer, ctx::GEOSContext=get_context(geo))
321+
GC.@preserve out geo begin
322+
seq = GEOSGeom_getCoordSeq_r(ctx, geo)::Ptr
323+
getCoordinates!(out, seq, i, ctx)
324+
end
325+
end
326+
function coordinates!(out, geo::Point, ctx::GEOSContext=get_context(geo))
327+
coordinates!(out, geo, 1, ctx)
328+
end
329+
330+
function has_coord_seq(geo::AbstractGeometry)
331+
false
332+
end
333+
function has_coord_seq(::HasCoordSeq)
334+
true
335+
end
336+
312337
Base.@kwdef struct IsApprox
313338
atol::Float64=0.0
314339
rtol::Float64=sqrt(eps(Float64))
@@ -323,51 +348,78 @@ end
323348
function Base.isapprox(geo1::AbstractGeometry, geo2::AbstractGeometry; kw...)::Bool
324349
compare(IsApprox(;kw...), geo1, geo2)
325350
end
326-
function (cmp::IsApprox)(geo1::AbstractGeometry, geo2::AbstractGeometry)::Bool
327-
compare(cmp, geo1, geo2)
328-
end
329-
function compare(, geo1::AbstractGeometry, geo2::AbstractGeometry)::Bool
351+
function compare(cmp, geo1::AbstractGeometry, geo2::AbstractGeometry, ctx=get_context(geo1))::Bool
330352
(typeof(geo1) === typeof(geo2)) || return false
331-
ng1 = ngeom(geo1)
332-
ng2 = ngeom(geo2)
333-
ng1 == ng2 || return false
334-
for i in 1:ng1
335-
(getgeom(geo1, i) getgeom(geo2, i)) || return false
353+
if (geo1 === geo2) && (cmp === isequal)
354+
return true
336355
end
337-
return true
338-
end
339-
function compare(, pt1::Point, pt2::Point)::Bool
340-
is3d = GeoInterface.is3d(pt1)
341-
is3d === GeoInterface.is3d(pt2) || return false
342-
GeoInterface.getcoord(pt1,1) GeoInterface.getcoord(pt2,1) || return false
343-
GeoInterface.getcoord(pt1,2) GeoInterface.getcoord(pt2,2) || return false
344-
if is3d
345-
GeoInterface.getcoord(pt1,3) GeoInterface.getcoord(pt2,3) || return false
356+
if has_coord_seq(geo1)
357+
return compare_coord_seqs(cmp, geo1, geo2, ctx)
358+
else
359+
ng1 = ngeom(geo1)
360+
ng2 = ngeom(geo2)
361+
ng1 == ng2 || return false
362+
for i in 1:ng1
363+
compare(cmp, getgeom(geo1, i), getgeom(geo2, i), ctx) || return false
364+
end
346365
end
347366
return true
348367
end
349368

350-
function _norm(x,y,z)
351-
return sqrt(x^2 + y^2 + z^2)
369+
npoints(pt::Point) = 1
370+
npoints(geo::HasCoordSeq) = GeoInterface.ngeom(geo)
371+
372+
function compare_coord_seqs(cmp, geo1, geo2, ctx)
373+
ncoords1 = GeoInterface.ncoord(geo1)
374+
ncoords2 = GeoInterface.ncoord(geo2)
375+
ncoords1 == ncoords2 || return false
376+
ncoords1 == 0 && return true
377+
np1 = npoints(geo1)
378+
np2 = npoints(geo2)
379+
np1 == np2 || return false
380+
coords1 = Vector{Float64}(undef, ncoords1)
381+
coords2 = Vector{Float64}(undef, ncoords1)
382+
for i in 1:np1
383+
coordinates!(coords1, geo1, i, ctx)
384+
coordinates!(coords2, geo2, i, ctx)
385+
cmp(coords1, coords2) || return false
386+
end
387+
return true
352388
end
353389

354-
function compare(cmp::IsApprox, pt1::Point, pt2::Point)::Bool
355-
is3d = GeoInterface.is3d(pt1)
356-
is3d === GeoInterface.is3d(pt2) || return false
357-
x1 = GeoInterface.getcoord(pt1,1)
358-
x2 = GeoInterface.getcoord(pt2,1)
359-
y1 = GeoInterface.getcoord(pt1,2)
360-
y2 = GeoInterface.getcoord(pt2,2)
361-
if is3d
362-
z1 = GeoInterface.getcoord(pt1,3)
363-
z2 = GeoInterface.getcoord(pt2,3)
364-
else
365-
z1 = 0.0
366-
z2 = 0.0
390+
function compare_coord_seqs(cmp::IsApprox, geo1, geo2, ctx)
391+
ncoords1 = GeoInterface.ncoord(geo1)
392+
ncoords2 = GeoInterface.ncoord(geo2)
393+
ncoords1 == ncoords2 || return false
394+
ncoords1 == 0 && return true
395+
@assert ncoords1 in 2:3
396+
ncoords1 == ncoords2 || return false
397+
np1 = npoints(geo1)
398+
np2 = npoints(geo2)
399+
np1 == np2 || return false
400+
coords1 = Vector{Float64}(undef, ncoords1)
401+
coords2 = Vector{Float64}(undef, ncoords1)
402+
# isapprox of two vectors is calculated using their euclidean norms and the norm of their difference
403+
# we compute (the squares) of these norms in an allocation free way
404+
s1 = 0.0
405+
s2 = 0.0
406+
s12 = 0.0
407+
for i in 1:np1
408+
coordinates!(coords1, geo1, i, ctx)
409+
coordinates!(coords2, geo2, i, ctx)
410+
if ncoords1 == 2
411+
x1,y1 = coords1
412+
x2,y2 = coords2
413+
s12 += (x1 - x2)^2 + (y1 - y2)^2
414+
else
415+
x1,y1,z1 = coords1
416+
x2,y2,z2 = coords2
417+
s12 += (x1 - x2)^2 + (y1 - y2)^2 + (z1 - z2)^2
418+
end
419+
s1 += sum(abs2, coords1)
420+
s2 += sum(abs2, coords2)
367421
end
368-
lhs = _norm(x1 - x2, y1 - y2, z1 - z2)
369-
rhs = cmp.atol + cmp.rtol * max(_norm(x1,y1,z2), _norm(x2,y2,z2))
370-
return lhs <= rhs
422+
return sqrt(s12) <= cmp.atol + cmp.rtol * sqrt(max(s1, s2))
371423
end
372424

373425
typesalt(::Type{GeometryCollection} ) = 0xd1fd7c6403c36e5b
@@ -381,25 +433,31 @@ typesalt(::Type{Point} ) = 0x4b5c101d3843160e
381433
typesalt(::Type{Polygon} ) = 0xa5c895d62ef56723
382434

383435
function Base.hash(geo::AbstractGeometry, h::UInt)::UInt
384-
salt = typesalt(typeof(geo))
385-
h = hash(salt, h)
386-
for i in 1:ngeom(geo)
387-
g = getgeom(geo,i)
388-
h = hash(g, h)
436+
h = hash(typesalt(typeof(geo)), h)
437+
if has_coord_seq(geo)
438+
return hash_coord_seq(geo, h)
439+
else
440+
for i in 1:ngeom(geo)
441+
h = hash(getgeom(geo, i), h)
442+
end
389443
end
390444
return h
391445
end
392-
393-
function Base.hash(pt::Point, h::UInt)::UInt
394-
h = hash(getcoord(pt,1), h)
395-
h = hash(getcoord(pt,2), h)
396-
if GeoInterface.is3d(pt)
397-
h = hash(getcoord(pt,3), h)
446+
function hash_coord_seq(geo::HasCoordSeq, h::UInt)::UInt
447+
nc = GeoInterface.ncoord(geo)
448+
if nc == 0
449+
return h
450+
end
451+
buf = Vector{Float64}(undef, nc)
452+
ctx = get_context(geo)
453+
for i in 1:npoints(geo)
454+
coordinates!(buf, geo, i, ctx)
455+
h = hash(buf, h)
398456
end
399-
h = hash(getcoord(pt,2), h)
400457
return h
401458
end
402459

460+
403461
# teach ccall how to get the pointer to pass to libgeos
404462
# this way the Julia compiler will track the lifetimes for us
405463
Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::AbstractGeometry) = x.ptr

test/test_misc.jl

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ end
4646
@test LibGEOS.intersects(p2, q1, ctx3)
4747
end
4848

49+
@testset "coordinates!" begin
50+
coordinates! = LibGEOS.coordinates!
51+
buf3 = zeros(3)
52+
buf2 = zeros(2)
53+
@test coordinates!(buf3, readgeom("POINT(1 2 3)")) == Float64[1,2,3] == buf3
54+
@test coordinates!(buf2, readgeom("POINT(1 2)")) == Float64[1,2] == buf2
55+
@test coordinates!(buf2, readgeom("POINT(1 2)"), 1) == Float64[1,2] == buf2
56+
@test coordinates!(buf2, readgeom("LINESTRING (130 240, 650 240)"), 1) == buf2 == [130, 240]
57+
@test coordinates!(buf2, readgeom("LINESTRING (130 240, 650 240)"), 2) == buf2 == [650, 240]
58+
end
59+
4960
@testset "hash eq" begin
5061
ctx1 = LibGEOS.GEOSContext()
5162
ctx2 = LibGEOS.GEOSContext()
@@ -66,6 +77,15 @@ end
6677
@test readgeom("LINESTRING (130 240, 650 240)") readgeom("LINESTRING (130 240, -650 240)") atol=1300
6778
@test readgeom("LINESTRING (130 240, 650 240)") readgeom("LINESTRING (130 240, 650 240.00000001)")
6879

80+
@test isapprox(readgeom("POLYGON((1 1,0 0, 1 2,2 2,2 4,1 1))"),
81+
readgeom("POLYGON((1 1,0 0.00000000001,1 2,2 2,2 4,1 1))"))
82+
83+
pt = readgeom("POINT(-1 NaN)")
84+
@test isequal(pt, pt)
85+
@test pt != pt
86+
@test !(isapprox(pt, pt))
87+
@test !(isapprox(pt, pt, atol=Inf))
88+
6989
pt = readgeom("POINT(0 NaN)")
7090
@test isequal(pt, pt)
7191
@test pt != pt
@@ -115,6 +135,40 @@ end
115135
end
116136
end
117137

138+
@testset "performance hash eq" begin
139+
pts1 = [[sin(x), cos(x), 1] for x in range(0, 2pi, length=1000)]
140+
pts1[end] = pts1[1]
141+
lr1 = LinearRing(pts1)
142+
pts2 = copy(pts1)
143+
pts2[453] = 2*pts2[453]
144+
lr2 = LinearRing(pts2)
145+
@test !(lr1 == lr2)
146+
@test !isequal(lr1, lr2)
147+
@test !isapprox(lr1, lr2)
148+
149+
@test 300 > @allocated lr1 == lr2
150+
@test 300 > @allocated isequal(lr1, lr2)
151+
@test 300 > @allocated isapprox(lr1, lr2)
152+
153+
hash(lr1) != hash(lr2)
154+
@test 300 > @allocated hash(lr1)
155+
156+
poly1 = Polygon(lr1)
157+
poly2 = Polygon(lr2)
158+
159+
poly2 = LinearRing(pts2)
160+
@test !(poly1 == poly2)
161+
@test !isequal(poly1, poly2)
162+
@test !isapprox(poly1, poly2)
163+
164+
@test 300 > @allocated poly1 == poly2
165+
@test 300 > @allocated isequal(poly1, poly2)
166+
@test 300 > @allocated isapprox(poly1, poly2)
167+
168+
@test hash(poly1) != hash(poly2)
169+
@test 300 > @allocated hash(poly1)
170+
end
171+
118172
@testset "show it like you build it" begin
119173
for geo in [
120174
readgeom("POINT(0 0)")

0 commit comments

Comments
 (0)