Skip to content

Commit 95c156d

Browse files
authored
chore: Add sedona-geo-traits-ext and sedona-geo-generic-alg to the workspace, eliminating geo, wkt and wkb forks (#203)
This is the final step of the forked dependency elimination plan: https://github.com/apache/sedona-db/pull/165/files. sedona-geo-generic-alg will go live and replace the original wherobots/geo dependency. We also replace forked wkb and wkt dependencies with the latest release versions. This patch depends on wherobots/geo-index#7 to resolve a geo version conflict. Remaining tasks: * geo-index will be the only forked dependency, and we are [submitting patches](kylebarron/geo-index#141) to put the nearest neighbour search APIs needed by sedona-db to the upstream. * We need a released version of [georust/wkb](https://github.com/georust/wkb) before releasing 0.2.0.
1 parent 6d0b218 commit 95c156d

File tree

33 files changed

+1891
-266
lines changed

33 files changed

+1891
-266
lines changed

Cargo.lock

Lines changed: 160 additions & 89 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ members = [
2222
"c/sedona-s2geography",
2323
"c/sedona-tg",
2424
"r/sedonadb/src/rust",
25+
"rust/sedona-geo-traits-ext",
26+
"rust/sedona-geo-generic-alg",
2527
"rust/sedona-adbc",
2628
"rust/sedona-expr",
2729
"rust/sedona-functions",
@@ -65,6 +67,7 @@ arrow-json = { version = "55.1.0" }
6567
arrow-schema = { version = "55.1.0" }
6668
async-trait = { version = "0.1.87" }
6769
bytes = "1.10"
70+
byteorder = "1"
6871
chrono = { version = "0.4.38", default-features = false }
6972
comfy-table = { version = "7.0" }
7073
criterion = { version = "0.5", features = ["html_reports"] }
@@ -82,23 +85,21 @@ env_logger = "0.11"
8285
futures = { version = "0.3" }
8386
object_store = { version = "0.12.0", default-features = false }
8487
float_next_after = "1"
88+
num-traits = { version = "0.2", default-features = false, features = ["libm"] }
8589
mimalloc = { version = "0.1", default-features = false }
8690
libmimalloc-sys = { version = "0.1", default-features = false }
8791
once_cell = "1.20"
8892

89-
geos = { version = "10.0.0", features = ["geo"] }
93+
geos = { version = "10.0.0", features = ["geo", "v3_10_0"] }
9094

91-
# Use our own fork of georust/geo, which implements generic computational geometry algorithms for geo-traits
92-
geo-generic-alg = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-generic-alg" }
93-
geo-types = "0.7.16"
94-
geo-traits = "0.2.0"
95-
geo-traits-ext = "0.1.0"
96-
geo = { version = "0.30.0" }
95+
geo-types = "0.7.17"
96+
geo-traits = "0.3.0"
97+
geo = "0.31.0"
9798

9899
geo-index = { version = "0.3.1" }
99100

100-
wkb = { version = "0.8.0", features = ["geos"] }
101-
wkt = "0.13.0"
101+
wkb = "0.9.1"
102+
wkt = "0.14.0"
102103

103104
parking_lot = "0.12"
104105
parquet = { version = "55.1.0", default-features = false, features = [
@@ -129,9 +130,6 @@ datafusion-ffi = { git = "https://github.com/paleolimbot/datafusion.git", branch
129130
datafusion-physical-expr = { git = "https://github.com/paleolimbot/datafusion.git", branch = "local-49-with-patch", package = "datafusion-physical-expr" }
130131
datafusion-physical-plan = { git = "https://github.com/paleolimbot/datafusion.git", branch = "local-49-with-patch", package = "datafusion-physical-plan" }
131132

132-
geo-types = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-types" }
133-
geo-traits = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-traits" }
134-
geo-traits-ext = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-traits-ext" }
133+
# TODO: remove them once changes we made to geo-index and wkb crates are merged to upstream and released
135134
geo-index = { git = "https://github.com/wherobots/geo-index.git", branch = "main" }
136-
wkb = { git = "https://github.com/wherobots/wkb.git", branch = "generic-alg" }
137-
wkt = { git = "https://github.com/wherobots/wkt.git", branch = "generic-alg" }
135+
wkb = { git = "https://github.com/georust/wkb.git", rev = "130eb0c2b343bc9299aeafba6d34c2a6e53f3b6a" }

c/sedona-geos/Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ criterion = { workspace = true }
3131
sedona = { path = "../../rust/sedona" }
3232
sedona-testing = { path = "../../rust/sedona-testing", features = ["criterion"] }
3333
rstest = { workspace = true }
34+
geo-types = { workspace = true }
3435

3536
[dependencies]
3637
arrow-schema = { workspace = true }
@@ -42,8 +43,14 @@ sedona-expr = { path = "../../rust/sedona-expr" }
4243
sedona-functions = { path = "../../rust/sedona-functions" }
4344
sedona-geometry = { path = "../../rust/sedona-geometry" }
4445
sedona-schema = { path = "../../rust/sedona-schema" }
45-
wkb = { workspace = true, features = ["geos"] }
46+
geo-traits = { workspace = true }
47+
wkb = { workspace = true }
48+
byteorder = { workspace = true }
4649

4750
[[bench]]
4851
harness = false
4952
name = "geos-functions"
53+
54+
[[bench]]
55+
harness = false
56+
name = "wkb_to_geos"
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
use criterion::{criterion_group, criterion_main};
18+
use geo_types::{LineString, Point};
19+
use sedona_geos::wkb_to_geos::GEOSWkbFactory;
20+
use wkb::Endianness;
21+
22+
fn generate_wkb_linestring(num_points: usize, endianness: Endianness) -> Vec<u8> {
23+
let mut points = Vec::new();
24+
for i in 0..num_points {
25+
points.push(Point::new(i as f64, i as f64));
26+
}
27+
let linestring = LineString::from(points);
28+
let mut buffer = Vec::new();
29+
wkb::writer::write_geometry(
30+
&mut buffer,
31+
&linestring,
32+
&wkb::writer::WriteOptions { endianness },
33+
)
34+
.unwrap();
35+
buffer
36+
}
37+
38+
fn bench_parse(c: &mut criterion::Criterion) {
39+
for num_points in [4, 10, 100, 500, 1000] {
40+
for endianness in [Endianness::BigEndian, Endianness::LittleEndian] {
41+
let wkb_buf = generate_wkb_linestring(num_points, endianness);
42+
let wkb = wkb::reader::read_wkb(&wkb_buf).unwrap();
43+
let endianness_name: &str = match endianness {
44+
Endianness::BigEndian => "big endian",
45+
Endianness::LittleEndian => "little endian",
46+
};
47+
48+
c.bench_function(
49+
&format!(
50+
"convert linestring containing {num_points} points using to_geos ({endianness_name})"
51+
),
52+
|b| {
53+
let factory = GEOSWkbFactory::new();
54+
b.iter(|| {
55+
let g = factory.create(&wkb).unwrap();
56+
criterion::black_box(g);
57+
});
58+
},
59+
);
60+
61+
c.bench_function(
62+
&format!(
63+
"convert linestring containing {num_points} points using geos wkb parser ({endianness_name})"
64+
),
65+
|b| {
66+
b.iter(|| {
67+
let g = geos::Geometry::new_from_wkb(wkb.buf()).unwrap();
68+
criterion::black_box(g);
69+
});
70+
},
71+
);
72+
}
73+
}
74+
}
75+
76+
criterion_group!(benches, bench_parse);
77+
criterion_main!(benches);

c/sedona-geos/src/executor.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
use datafusion_common::{DataFusionError, Result};
1818
use sedona_functions::executor::{GenericExecutor, GeometryFactory};
1919

20+
use crate::wkb_to_geos::GEOSWkbFactory;
21+
2022
/// A [GenericExecutor] that iterates over [geos::Geometry]
2123
pub type GeosExecutor<'a, 'b> = GenericExecutor<'a, 'b, GeosGeometryFactory, GeosGeometryFactory>;
2224

2325
/// [GeometryFactory] implementation for iterating over [geos::Geometry]
2426
#[derive(Default)]
2527
pub struct GeosGeometryFactory {
26-
inner: wkb::reader::to_geos::GEOSWkbFactory,
28+
inner: GEOSWkbFactory,
2729
}
2830

2931
impl GeometryFactory for GeosGeometryFactory {

c/sedona-geos/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,4 @@ mod st_convexhull;
2727
mod st_dwithin;
2828
mod st_length;
2929
mod st_perimeter;
30+
pub mod wkb_to_geos;

0 commit comments

Comments
 (0)