diff --git a/Cargo.lock b/Cargo.lock index d9a3c0a6..ce037d7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,6 +537,17 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -2332,6 +2343,19 @@ dependencies = [ "regex", ] +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "atty", + "humantime 1.3.0", + "log", + "regex", + "termcolor", +] + [[package]] name = "env_logger" version = "0.11.8" @@ -2578,9 +2602,9 @@ dependencies = [ [[package]] name = "geo" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4416397671d8997e9a3e7ad99714f4f00a22e9eaa9b966a5985d2194fc9e02e1" +checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" dependencies = [ "earcutr", "float_next_after", @@ -2594,48 +2618,21 @@ dependencies = [ "spade", ] -[[package]] -name = "geo-generic-alg" -version = "0.1.0" -source = "git+https://github.com/wherobots/geo.git?branch=generic-alg#66ff85949a82549b0d28fb2d4fae01e3ea19ca83" -dependencies = [ - "earcutr", - "float_next_after", - "geo-traits 0.2.0", - "geo-traits-ext", - "geo-types", - "geographiclib-rs", - "i_overlay", - "log", - "num-traits", - "robust", - "rstar", - "spade", -] - [[package]] name = "geo-index" version = "0.3.1" -source = "git+https://github.com/wherobots/geo-index.git?branch=main#6a03f0a2e3ba7ecfaacbf18019008449b8c93541" +source = "git+https://github.com/wherobots/geo-index.git?branch=main#3213c162b1dfdac9effdef3083afc17261c9f6fc" dependencies = [ "bytemuck", "float_next_after", "geo", - "geo-traits 0.3.0", + "geo-traits", "geo-types", "num-traits", "thiserror 1.0.69", "tinyvec", ] -[[package]] -name = "geo-traits" -version = "0.2.0" -source = "git+https://github.com/wherobots/geo.git?branch=generic-alg#66ff85949a82549b0d28fb2d4fae01e3ea19ca83" -dependencies = [ - "geo-types", -] - [[package]] name = "geo-traits" version = "0.3.0" @@ -2645,18 +2642,6 @@ dependencies = [ "geo-types", ] -[[package]] -name = "geo-traits-ext" -version = "0.1.0" -source = "git+https://github.com/wherobots/geo.git?branch=generic-alg#66ff85949a82549b0d28fb2d4fae01e3ea19ca83" -dependencies = [ - "approx", - "geo-traits 0.2.0", - "geo-types", - "num-traits", - "serde", -] - [[package]] name = "geo-types" version = "0.7.17" @@ -2825,6 +2810,15 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.5.2" @@ -2917,6 +2911,15 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "humantime" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + [[package]] name = "humantime" version = "2.3.0" @@ -2988,24 +2991,24 @@ dependencies = [ [[package]] name = "i_float" -version = "1.7.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85df3a416829bb955fdc2416c7b73680c8dcea8d731f2c7aa23e1042fe1b8343" +checksum = "010025c2c532c8d82e42d0b8bb5184afa449fa6f06c709ea9adcb16c49ae405b" dependencies = [ - "serde", + "libm", ] [[package]] name = "i_key_sort" -version = "0.2.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "347c253b4748a1a28baf94c9ce133b6b166f08573157e05afe718812bc599fcd" +checksum = "9190f86706ca38ac8add223b2aed8b1330002b5cdbbce28fb58b10914d38fc27" [[package]] name = "i_overlay" -version = "2.0.5" +version = "4.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0542dfef184afdd42174a03dcc0625b6147fb73e1b974b1a08a2a42ac35cee49" +checksum = "0fcccbd4e4274e0f80697f5fbc6540fdac533cce02f2081b328e68629cce24f9" dependencies = [ "i_float", "i_key_sort", @@ -3016,19 +3019,18 @@ dependencies = [ [[package]] name = "i_shape" -version = "1.7.0" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a38f5a42678726718ff924f6d4a0e79b129776aeed298f71de4ceedbd091bce" +checksum = "1ea154b742f7d43dae2897fcd5ead86bc7b5eefcedd305a7ebf9f69d44d61082" dependencies = [ "i_float", - "serde", ] [[package]] name = "i_tree" -version = "0.8.3" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "155181bc97d770181cf9477da51218a19ee92a8e5be642e796661aee2b601139" +checksum = "35e6d558e6d4c7b82bc51d9c771e7a927862a161a7d87bf2b0541450e0e20915" [[package]] name = "iana-time-zone" @@ -3222,7 +3224,7 @@ version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ - "hermit-abi", + "hermit-abi 0.5.2", "libc", "windows-sys 0.59.0", ] @@ -3773,7 +3775,7 @@ dependencies = [ "futures", "http 1.3.1", "http-body-util", - "humantime", + "humantime 2.3.0", "hyper", "itertools 0.14.0", "md-5", @@ -4028,6 +4030,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "pretty_env_logger" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d" +dependencies = [ + "env_logger 0.7.1", + "log", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -4170,6 +4182,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quick-xml" version = "0.38.3" @@ -4319,6 +4337,16 @@ dependencies = [ "getrandom 0.3.3", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + [[package]] name = "rayon" version = "1.11.0" @@ -4769,7 +4797,7 @@ dependencies = [ "datafusion-ffi", "dirs", "futures", - "geo-traits 0.2.0", + "geo-traits", "geo-types", "object_store", "parking_lot", @@ -4816,7 +4844,7 @@ dependencies = [ "async-trait", "clap", "datafusion", - "env_logger", + "env_logger 0.11.8", "futures", "libmimalloc-sys", "mimalloc", @@ -4845,7 +4873,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "geo-traits 0.2.0", + "geo-traits", "rstest", "sedona-common", "sedona-geometry", @@ -4866,7 +4894,7 @@ dependencies = [ "datafusion", "datafusion-common", "datafusion-expr", - "geo-traits 0.2.0", + "geo-traits", "rstest", "sedona-common", "sedona-expr", @@ -4876,7 +4904,7 @@ dependencies = [ "serde_json", "tokio", "wkb", - "wkt 0.13.0", + "wkt 0.14.0", ] [[package]] @@ -4889,17 +4917,54 @@ dependencies = [ "datafusion-common", "datafusion-expr", "geo", - "geo-generic-alg", - "geo-traits 0.2.0", + "geo-traits", "geo-types", "rstest", "sedona-expr", "sedona-functions", + "sedona-geo-generic-alg", "sedona-geometry", "sedona-schema", "sedona-testing", "wkb", - "wkt 0.13.0", + "wkt 0.14.0", +] + +[[package]] +name = "sedona-geo-generic-alg" +version = "0.2.0" +dependencies = [ + "approx", + "criterion", + "float_next_after", + "geo", + "geo-traits", + "geo-types", + "i_overlay", + "log", + "num-traits", + "pretty_env_logger", + "rand 0.8.5", + "rand_distr", + "robust", + "rstar", + "sedona-geo-traits-ext", + "sedona-testing", + "wkb", + "wkt 0.14.0", +] + +[[package]] +name = "sedona-geo-traits-ext" +version = "0.2.0" +dependencies = [ + "byteorder", + "geo-traits", + "geo-types", + "num-traits", + "rstest", + "wkb", + "wkt 0.14.0", ] [[package]] @@ -4927,7 +4992,7 @@ dependencies = [ name = "sedona-geometry" version = "0.2.0" dependencies = [ - "geo-traits 0.2.0", + "geo-traits", "geo-types", "lru", "rstest", @@ -4936,7 +5001,7 @@ dependencies = [ "serde_with", "thiserror 2.0.17", "wkb", - "wkt 0.13.0", + "wkt 0.14.0", ] [[package]] @@ -4957,7 +5022,7 @@ dependencies = [ "datafusion-physical-plan", "float_next_after", "futures", - "geo-traits 0.2.0", + "geo-traits", "object_store", "parquet", "rstest", @@ -4981,9 +5046,12 @@ version = "0.2.0" dependencies = [ "arrow-array", "arrow-schema", + "byteorder", "criterion", "datafusion-common", "datafusion-expr", + "geo-traits", + "geo-types", "geos", "rstest", "sedona", @@ -5006,7 +5074,7 @@ dependencies = [ "criterion", "datafusion-common", "datafusion-expr", - "geo-traits 0.2.0", + "geo-traits", "geo-types", "proj-sys", "rstest", @@ -5071,10 +5139,9 @@ dependencies = [ "datafusion-physical-plan", "float_next_after", "futures", - "geo-generic-alg", + "geo", "geo-index", - "geo-traits 0.2.0", - "geo-traits-ext", + "geo-traits", "geo-types", "geos", "once_cell", @@ -5085,6 +5152,8 @@ dependencies = [ "sedona-expr", "sedona-functions", "sedona-geo", + "sedona-geo-generic-alg", + "sedona-geo-traits-ext", "sedona-geometry", "sedona-geos", "sedona-schema", @@ -5092,7 +5161,7 @@ dependencies = [ "sedona-tg", "tokio", "wkb", - "wkt 0.13.0", + "wkt 0.14.0", ] [[package]] @@ -5107,7 +5176,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "geo", - "geo-traits 0.2.0", + "geo-traits", "geo-types", "parquet", "rand 0.8.5", @@ -5117,7 +5186,7 @@ dependencies = [ "sedona-geometry", "sedona-schema", "wkb", - "wkt 0.13.0", + "wkt 0.14.0", ] [[package]] @@ -5520,6 +5589,15 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -6380,14 +6458,11 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "wkb" -version = "0.8.0" -source = "git+https://github.com/wherobots/wkb.git?branch=generic-alg#5496c33919e9193edcde6ccf7dd51a9093782277" +version = "0.9.1" +source = "git+https://github.com/georust/wkb.git?rev=130eb0c2b343bc9299aeafba6d34c2a6e53f3b6a#130eb0c2b343bc9299aeafba6d34c2a6e53f3b6a" dependencies = [ "byteorder", - "geo-traits 0.2.0", - "geo-traits-ext", - "geo-types", - "geos", + "geo-traits", "num_enum", "thiserror 1.0.69", ] @@ -6406,10 +6481,11 @@ dependencies = [ [[package]] name = "wkt" -version = "0.13.0" -source = "git+https://github.com/wherobots/wkt.git?branch=generic-alg#ec26b050ec1718ee08e4d8a911e99f1039b60c8b" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" dependencies = [ - "geo-traits 0.2.0", + "geo-traits", "geo-types", "log", "num-traits", @@ -6584,8 +6660,3 @@ dependencies = [ "cc", "pkg-config", ] - -[[patch.unused]] -name = "geo-types" -version = "0.7.16" -source = "git+https://github.com/wherobots/geo.git?branch=generic-alg#66ff85949a82549b0d28fb2d4fae01e3ea19ca83" diff --git a/Cargo.toml b/Cargo.toml index 1c361112..f314e539 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ members = [ "c/sedona-s2geography", "c/sedona-tg", "r/sedonadb/src/rust", + "rust/sedona-geo-traits-ext", + "rust/sedona-geo-generic-alg", "rust/sedona-adbc", "rust/sedona-expr", "rust/sedona-functions", @@ -65,6 +67,7 @@ arrow-json = { version = "55.1.0" } arrow-schema = { version = "55.1.0" } async-trait = { version = "0.1.87" } bytes = "1.10" +byteorder = "1" chrono = { version = "0.4.38", default-features = false } comfy-table = { version = "7.0" } criterion = { version = "0.5", features = ["html_reports"] } @@ -82,23 +85,21 @@ env_logger = "0.11" futures = { version = "0.3" } object_store = { version = "0.12.0", default-features = false } float_next_after = "1" +num-traits = { version = "0.2", default-features = false, features = ["libm"] } mimalloc = { version = "0.1", default-features = false } libmimalloc-sys = { version = "0.1", default-features = false } once_cell = "1.20" -geos = { version = "10.0.0", features = ["geo"] } +geos = { version = "10.0.0", features = ["geo", "v3_10_0"] } -# Use our own fork of georust/geo, which implements generic computational geometry algorithms for geo-traits -geo-generic-alg = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-generic-alg" } -geo-types = "0.7.16" -geo-traits = "0.2.0" -geo-traits-ext = "0.1.0" -geo = { version = "0.30.0" } +geo-types = "0.7.17" +geo-traits = "0.3.0" +geo = "0.31.0" geo-index = { version = "0.3.1" } -wkb = { version = "0.8.0", features = ["geos"] } -wkt = "0.13.0" +wkb = "0.9.1" +wkt = "0.14.0" parking_lot = "0.12" parquet = { version = "55.1.0", default-features = false, features = [ @@ -129,9 +130,6 @@ datafusion-ffi = { git = "https://github.com/paleolimbot/datafusion.git", branch datafusion-physical-expr = { git = "https://github.com/paleolimbot/datafusion.git", branch = "local-49-with-patch", package = "datafusion-physical-expr" } datafusion-physical-plan = { git = "https://github.com/paleolimbot/datafusion.git", branch = "local-49-with-patch", package = "datafusion-physical-plan" } -geo-types = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-types" } -geo-traits = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-traits" } -geo-traits-ext = { git = "https://github.com/wherobots/geo.git", branch = "generic-alg", package = "geo-traits-ext" } +# TODO: remove them once changes we made to geo-index and wkb crates are merged to upstream and released geo-index = { git = "https://github.com/wherobots/geo-index.git", branch = "main" } -wkb = { git = "https://github.com/wherobots/wkb.git", branch = "generic-alg" } -wkt = { git = "https://github.com/wherobots/wkt.git", branch = "generic-alg" } +wkb = { git = "https://github.com/georust/wkb.git", rev = "130eb0c2b343bc9299aeafba6d34c2a6e53f3b6a" } diff --git a/c/sedona-geos/Cargo.toml b/c/sedona-geos/Cargo.toml index f7de7ee9..d92f076f 100644 --- a/c/sedona-geos/Cargo.toml +++ b/c/sedona-geos/Cargo.toml @@ -31,6 +31,7 @@ criterion = { workspace = true } sedona = { path = "../../rust/sedona" } sedona-testing = { path = "../../rust/sedona-testing", features = ["criterion"] } rstest = { workspace = true } +geo-types = { workspace = true } [dependencies] arrow-schema = { workspace = true } @@ -42,8 +43,14 @@ sedona-expr = { path = "../../rust/sedona-expr" } sedona-functions = { path = "../../rust/sedona-functions" } sedona-geometry = { path = "../../rust/sedona-geometry" } sedona-schema = { path = "../../rust/sedona-schema" } -wkb = { workspace = true, features = ["geos"] } +geo-traits = { workspace = true } +wkb = { workspace = true } +byteorder = { workspace = true } [[bench]] harness = false name = "geos-functions" + +[[bench]] +harness = false +name = "wkb_to_geos" diff --git a/c/sedona-geos/benches/wkb_to_geos.rs b/c/sedona-geos/benches/wkb_to_geos.rs new file mode 100644 index 00000000..cd2b9dbb --- /dev/null +++ b/c/sedona-geos/benches/wkb_to_geos.rs @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use criterion::{criterion_group, criterion_main}; +use geo_types::{LineString, Point}; +use sedona_geos::wkb_to_geos::GEOSWkbFactory; +use wkb::Endianness; + +fn generate_wkb_linestring(num_points: usize, endianness: Endianness) -> Vec { + let mut points = Vec::new(); + for i in 0..num_points { + points.push(Point::new(i as f64, i as f64)); + } + let linestring = LineString::from(points); + let mut buffer = Vec::new(); + wkb::writer::write_geometry( + &mut buffer, + &linestring, + &wkb::writer::WriteOptions { endianness }, + ) + .unwrap(); + buffer +} + +fn bench_parse(c: &mut criterion::Criterion) { + for num_points in [4, 10, 100, 500, 1000] { + for endianness in [Endianness::BigEndian, Endianness::LittleEndian] { + let wkb_buf = generate_wkb_linestring(num_points, endianness); + let wkb = wkb::reader::read_wkb(&wkb_buf).unwrap(); + let endianness_name: &str = match endianness { + Endianness::BigEndian => "big endian", + Endianness::LittleEndian => "little endian", + }; + + c.bench_function( + &format!( + "convert linestring containing {num_points} points using to_geos ({endianness_name})" + ), + |b| { + let factory = GEOSWkbFactory::new(); + b.iter(|| { + let g = factory.create(&wkb).unwrap(); + criterion::black_box(g); + }); + }, + ); + + c.bench_function( + &format!( + "convert linestring containing {num_points} points using geos wkb parser ({endianness_name})" + ), + |b| { + b.iter(|| { + let g = geos::Geometry::new_from_wkb(wkb.buf()).unwrap(); + criterion::black_box(g); + }); + }, + ); + } + } +} + +criterion_group!(benches, bench_parse); +criterion_main!(benches); diff --git a/c/sedona-geos/src/executor.rs b/c/sedona-geos/src/executor.rs index 3d806cc3..5e8a021d 100644 --- a/c/sedona-geos/src/executor.rs +++ b/c/sedona-geos/src/executor.rs @@ -17,13 +17,15 @@ use datafusion_common::{DataFusionError, Result}; use sedona_functions::executor::{GenericExecutor, GeometryFactory}; +use crate::wkb_to_geos::GEOSWkbFactory; + /// A [GenericExecutor] that iterates over [geos::Geometry] pub type GeosExecutor<'a, 'b> = GenericExecutor<'a, 'b, GeosGeometryFactory, GeosGeometryFactory>; /// [GeometryFactory] implementation for iterating over [geos::Geometry] #[derive(Default)] pub struct GeosGeometryFactory { - inner: wkb::reader::to_geos::GEOSWkbFactory, + inner: GEOSWkbFactory, } impl GeometryFactory for GeosGeometryFactory { diff --git a/c/sedona-geos/src/lib.rs b/c/sedona-geos/src/lib.rs index 667bc823..74d57b94 100644 --- a/c/sedona-geos/src/lib.rs +++ b/c/sedona-geos/src/lib.rs @@ -27,3 +27,4 @@ mod st_convexhull; mod st_dwithin; mod st_length; mod st_perimeter; +pub mod wkb_to_geos; diff --git a/c/sedona-geos/src/wkb_to_geos.rs b/c/sedona-geos/src/wkb_to_geos.rs new file mode 100644 index 00000000..9bd83507 --- /dev/null +++ b/c/sedona-geos/src/wkb_to_geos.rs @@ -0,0 +1,1335 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use std::cell::RefCell; + +use byteorder::{BigEndian, ByteOrder, LittleEndian}; +use geo_traits::*; +use geos::GResult; +use wkb::{reader::*, Endianness}; + +/// A factory for converting WKB to GEOS geometries. +/// +/// This factory uses a scratch buffer to store intermediate coordinate data. +/// The scratch buffer is reused for each conversion, which reduces memory allocation +/// overhead. +pub struct GEOSWkbFactory { + scratch: RefCell>, +} + +impl Default for GEOSWkbFactory { + fn default() -> Self { + Self::new() + } +} + +impl GEOSWkbFactory { + /// Create a new GEOSWkbFactory. + pub fn new() -> Self { + Self { + scratch: RefCell::new(Vec::new()), + } + } + + /// Create a GEOS geometry from a WKB. + pub fn create(&self, wkb: &Wkb) -> GResult { + let scratch = &mut self.scratch.borrow_mut(); + geometry_to_geos(scratch, wkb) + } +} + +fn geometry_to_geos(scratch: &mut Vec, wkb: &Wkb) -> GResult { + let geom = wkb.as_type(); + match geom { + geo_traits::GeometryType::Point(p) => point_to_geos(scratch, p), + geo_traits::GeometryType::LineString(ls) => line_string_to_geos(scratch, ls), + geo_traits::GeometryType::Polygon(poly) => polygon_to_geos(scratch, poly), + geo_traits::GeometryType::MultiPoint(mp) => multi_point_to_geos(scratch, mp), + geo_traits::GeometryType::MultiLineString(mls) => multi_line_string_to_geos(scratch, mls), + geo_traits::GeometryType::MultiPolygon(mpoly) => multi_polygon_to_geos(scratch, mpoly), + geo_traits::GeometryType::GeometryCollection(gc) => { + geometry_collection_to_geos(scratch, gc) + } + _ => Err(geos::Error::ConversionError( + "Unsupported geometry type".to_string(), + )), + } +} + +fn point_to_geos(scratch: &mut Vec, p: &Point) -> GResult { + if p.is_empty() { + geos::Geometry::create_empty_point() + } else { + let coord_seq = create_coord_sequence_from_raw_parts( + p.coord_slice(), + p.dimension(), + p.byte_order(), + 1, + scratch, + )?; + let point = geos::Geometry::create_point(coord_seq)?; + Ok(point) + } +} + +fn line_string_to_geos(scratch: &mut Vec, ls: &LineString) -> GResult { + let num_points = ls.num_coords(); + if num_points == 0 { + geos::Geometry::create_empty_line_string() + } else { + let coord_seq = create_coord_sequence_from_raw_parts( + ls.coords_slice(), + ls.dimension(), + ls.byte_order(), + num_points, + scratch, + )?; + geos::Geometry::create_line_string(coord_seq) + } +} + +fn polygon_to_geos(scratch: &mut Vec, poly: &Polygon) -> GResult { + // Create exterior ring + let exterior = if let Some(ring) = poly.exterior() { + let coord_seq = create_coord_sequence_from_raw_parts( + ring.coords_slice(), + ring.dimension(), + ring.byte_order(), + ring.num_coords(), + scratch, + )?; + geos::Geometry::create_linear_ring(coord_seq)? + } else { + return geos::Geometry::create_empty_polygon(); + }; + + // Create interior rings + let num_interiors = poly.num_interiors(); + let mut interior_rings = Vec::with_capacity(num_interiors); + for i in 0..num_interiors { + let ring = poly.interior(i).unwrap(); + let coord_seq = create_coord_sequence_from_raw_parts( + ring.coords_slice(), + ring.dimension(), + ring.byte_order(), + ring.num_coords(), + scratch, + )?; + let interior_ring = geos::Geometry::create_linear_ring(coord_seq)?; + interior_rings.push(interior_ring); + } + + geos::Geometry::create_polygon(exterior, interior_rings) +} + +fn multi_point_to_geos(scratch: &mut Vec, mp: &MultiPoint) -> GResult { + let num_points = mp.num_points(); + if num_points == 0 { + // Create an empty multi-point by creating a geometry collection with no geometries + geos::Geometry::create_empty_collection(geos::GeometryTypes::MultiPoint) + } else { + let mut points = Vec::with_capacity(num_points); + for i in 0..num_points { + let point = unsafe { mp.point_unchecked(i) }; + let geos_point = point_to_geos(scratch, &point)?; + points.push(geos_point); + } + geos::Geometry::create_multipoint(points) + } +} + +fn multi_line_string_to_geos( + scratch: &mut Vec, + mls: &MultiLineString, +) -> GResult { + let num_line_strings = mls.num_line_strings(); + if num_line_strings == 0 { + geos::Geometry::create_empty_collection(geos::GeometryTypes::MultiLineString) + } else { + let mut line_strings = Vec::with_capacity(num_line_strings); + for i in 0..num_line_strings { + let ls = unsafe { mls.line_string_unchecked(i) }; + let geos_line_string = line_string_to_geos(scratch, ls)?; + line_strings.push(geos_line_string); + } + geos::Geometry::create_multiline_string(line_strings) + } +} + +fn multi_polygon_to_geos(scratch: &mut Vec, mpoly: &MultiPolygon) -> GResult { + let num_polygons = mpoly.num_polygons(); + if num_polygons == 0 { + geos::Geometry::create_empty_collection(geos::GeometryTypes::MultiPolygon) + } else { + let mut polygons = Vec::with_capacity(num_polygons); + for i in 0..num_polygons { + let poly = unsafe { mpoly.polygon_unchecked(i) }; + let geos_polygon = polygon_to_geos(scratch, poly)?; + polygons.push(geos_polygon); + } + geos::Geometry::create_multipolygon(polygons) + } +} + +fn geometry_collection_to_geos( + scratch: &mut Vec, + gc: &GeometryCollection, +) -> GResult { + if gc.num_geometries() == 0 { + geos::Geometry::create_empty_collection(geos::GeometryTypes::GeometryCollection) + } else { + let num_geometries = gc.num_geometries(); + let mut geometries = Vec::with_capacity(num_geometries); + for i in 0..num_geometries { + let geom = gc.geometry(i).unwrap(); + let geos_geom = geometry_to_geos(scratch, geom)?; + geometries.push(geos_geom); + } + geos::Geometry::create_geometry_collection(geometries) + } +} + +const NATIVE_ENDIANNESS: Endianness = if cfg!(target_endian = "big") { + Endianness::BigEndian +} else { + Endianness::LittleEndian +}; + +fn create_coord_sequence_from_raw_parts( + buf: &[u8], + dim: Dimension, + byte_order: Endianness, + num_coords: usize, + scratch: &mut Vec, +) -> GResult { + let (has_z, has_m, dim_size) = match dim { + Dimension::Xy => (false, false, 2), + Dimension::Xyz => (true, false, 3), + Dimension::Xym => (false, true, 3), + Dimension::Xyzm => (true, true, 4), + }; + let num_ordinates = dim_size * num_coords; + + // If the byte order matches native endianness, we can potentially use zero-copy + if byte_order == NATIVE_ENDIANNESS { + let ptr = buf.as_ptr(); + + // On platforms with unaligned memory access support, we can construct the coord seq + // directly from the raw parts without copying to the scratch buffer. + #[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))] + { + let coords_f64 = + unsafe { &*core::ptr::slice_from_raw_parts(ptr as *const f64, num_ordinates) }; + geos::CoordSeq::new_from_buffer(coords_f64, num_coords, has_z, has_m) + } + + // On platforms without unaligned memory access support, we need to copy the data to the + // scratch buffer to make sure the data is aligned. + #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] + { + unsafe { + scratch.clear(); + scratch.reserve(num_ordinates); + scratch.set_len(num_ordinates); + std::ptr::copy_nonoverlapping( + ptr, + scratch.as_mut_ptr() as *mut u8, + num_ordinates * std::mem::size_of::(), + ); + geos::CoordSeq::new_from_buffer(scratch.as_slice(), num_coords, has_z, has_m) + } + } + } else { + // Need to convert byte order + match byte_order { + Endianness::BigEndian => { + save_f64_to_scratch::(scratch, buf, num_ordinates); + } + Endianness::LittleEndian => { + save_f64_to_scratch::(scratch, buf, num_ordinates); + } + } + geos::CoordSeq::new_from_buffer(scratch.as_slice(), num_coords, has_z, has_m) + } +} + +fn save_f64_to_scratch(scratch: &mut Vec, buf: &[u8], num_ordinates: usize) { + scratch.clear(); + scratch.reserve(num_ordinates); + // Safety: we have already reserved the capacity, so we can set the length safely. + // Justification: rewriting the loop to not use Vec::push makes it many times faster, + // since it eliminates several memory loads and stores for vector's length and capacity, + // and it enables the compiler to generate vectorized code. + #[allow(clippy::uninit_vec)] + unsafe { + scratch.set_len(num_ordinates); + } + assert!(num_ordinates * 8 <= buf.len()); + for (i, tgt) in scratch.iter_mut().enumerate().take(num_ordinates) { + let offset = i * 8; + let value = B::read_f64(&buf[offset..]); + *tgt = value; + } +} + +#[cfg(test)] +mod test { + use super::*; + use geo_types::{ + line_string, point, polygon, Geometry, GeometryCollection, LineString, MultiLineString, + MultiPoint, MultiPolygon, Point, Polygon, + }; + use geos::Geom; + use wkb::{ + writer::{ + write_geometry_collection, write_line_string, write_multi_line_string, + write_multi_point, write_multi_polygon, write_point, write_polygon, WriteOptions, + }, + Endianness, + }; + + pub(super) fn point_2d() -> Point { + point!( + x: 0., y: 1. + ) + } + + pub(super) fn linestring_2d() -> LineString { + line_string![ + (x: 0., y: 1.), + (x: 1., y: 2.) + ] + } + + pub(super) fn polygon_2d() -> Polygon { + polygon![ + (x: -111., y: 45.), + (x: -111., y: 41.), + (x: -104., y: 41.), + (x: -104., y: 45.), + ] + } + + pub(super) fn polygon_2d_with_interior() -> Polygon { + polygon!( + exterior: [ + (x: -111., y: 45.), + (x: -111., y: 41.), + (x: -104., y: 41.), + (x: -104., y: 45.), + ], + interiors: [ + [ + (x: -110., y: 44.), + (x: -110., y: 42.), + (x: -105., y: 42.), + (x: -105., y: 44.), + ], + ], + ) + } + + pub(super) fn multi_point_2d() -> MultiPoint { + MultiPoint::new(vec![ + point!( + x: 0., y: 1. + ), + point!( + x: 1., y: 2. + ), + ]) + } + + pub(super) fn multi_line_string_2d() -> MultiLineString { + MultiLineString::new(vec![ + line_string![ + (x: -111., y: 45.), + (x: -111., y: 41.), + (x: -104., y: 41.), + (x: -104., y: 45.), + ], + line_string![ + (x: -110., y: 44.), + (x: -110., y: 42.), + (x: -105., y: 42.), + (x: -105., y: 44.), + ], + ]) + } + + pub(super) fn multi_polygon_2d() -> MultiPolygon { + MultiPolygon::new(vec![ + polygon![ + (x: -111., y: 45.), + (x: -111., y: 41.), + (x: -104., y: 41.), + (x: -104., y: 45.), + ], + polygon!( + exterior: [ + (x: -111., y: 45.), + (x: -111., y: 41.), + (x: -104., y: 41.), + (x: -104., y: 45.), + ], + interiors: [ + [ + (x: -110., y: 44.), + (x: -110., y: 42.), + (x: -105., y: 42.), + (x: -105., y: 44.), + ], + ], + ), + ]) + } + + pub(super) fn geometry_collection_2d() -> GeometryCollection { + GeometryCollection::new_from(vec![ + Geometry::Point(point_2d()), + Geometry::LineString(linestring_2d()), + Geometry::Polygon(polygon_2d()), + Geometry::Polygon(polygon_2d_with_interior()), + Geometry::MultiPoint(multi_point_2d()), + Geometry::MultiLineString(multi_line_string_2d()), + Geometry::MultiPolygon(multi_polygon_2d()), + ]) + } + + fn test_geometry_conversion(geo_geom: &Geometry, endianness: Endianness) { + // Convert geo geometry to WKB + let mut buf = Vec::new(); + let write_options = WriteOptions { endianness }; + match geo_geom { + Geometry::Point(p) => write_point(&mut buf, p, &write_options).unwrap(), + Geometry::LineString(ls) => write_line_string(&mut buf, ls, &write_options).unwrap(), + Geometry::Polygon(p) => write_polygon(&mut buf, p, &write_options).unwrap(), + Geometry::MultiPoint(mp) => write_multi_point(&mut buf, mp, &write_options).unwrap(), + Geometry::MultiLineString(mls) => { + write_multi_line_string(&mut buf, mls, &write_options).unwrap() + } + Geometry::MultiPolygon(mp) => { + write_multi_polygon(&mut buf, mp, &write_options).unwrap() + } + Geometry::GeometryCollection(gc) => { + write_geometry_collection(&mut buf, gc, &write_options).unwrap() + } + Geometry::Line(_) => panic!("Line geometry not supported in tests"), + Geometry::Rect(_) => panic!("Rect geometry not supported in tests"), + Geometry::Triangle(_) => panic!("Triangle geometry not supported in tests"), + } + + // Read WKB back + let wkb = wkb::reader::read_wkb(&buf).unwrap(); + + // Convert to GEOS using our ToGeos converter + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + // Convert back to geo for comparison + let geo_from_geos: Geometry = geos_geom.try_into().unwrap(); + + // Compare the geometries + assert_eq!(*geo_geom, geo_from_geos); + } + + #[test] + fn test_point_conversion() { + let point = point_2d(); + let geo_geom = Geometry::Point(point); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_empty_point_conversion() { + // Create an empty point by writing NaN coordinates + let mut buf = Vec::new(); + buf.push(1); // Little endian + buf.extend_from_slice(&1u32.to_le_bytes()); // Point type + buf.extend_from_slice(&f64::NAN.to_le_bytes()); // x = NaN + buf.extend_from_slice(&f64::NAN.to_le_bytes()); // y = NaN + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + assert!(geos_geom.is_empty().unwrap()); + } + + #[test] + fn test_line_string_conversion() { + let line_string = linestring_2d(); + let geo_geom = Geometry::LineString(line_string); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_empty_line_string_conversion() { + let mut buf = Vec::new(); + write_line_string( + &mut buf, + &LineString::new(vec![]), + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + assert!(geos_geom.is_empty().unwrap()); + } + + #[test] + fn test_polygon_conversion() { + let polygon = polygon_2d(); + let geo_geom = Geometry::Polygon(polygon); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_polygon_with_interior_conversion() { + let polygon = polygon_2d_with_interior(); + let geo_geom = Geometry::Polygon(polygon); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_multi_point_conversion() { + let multi_point = multi_point_2d(); + let geo_geom = Geometry::MultiPoint(multi_point); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_empty_multi_point_conversion() { + let mut buf = Vec::new(); + write_multi_point( + &mut buf, + &MultiPoint::new(vec![]), + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + assert!(geos_geom.is_empty().unwrap()); + } + + #[test] + fn test_multi_line_string_conversion() { + let multi_line_string = multi_line_string_2d(); + let geo_geom = Geometry::MultiLineString(multi_line_string); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_empty_multi_line_string_conversion() { + let mut buf = Vec::new(); + write_multi_line_string( + &mut buf, + &MultiLineString::new(vec![]), + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + assert!(geos_geom.is_empty().unwrap()); + } + + #[test] + fn test_multi_polygon_conversion() { + let multi_polygon = multi_polygon_2d(); + let geo_geom = Geometry::MultiPolygon(multi_polygon); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_empty_multi_polygon_conversion() { + let mut buf = Vec::new(); + write_multi_polygon( + &mut buf, + &MultiPolygon::new(vec![]), + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + assert!(geos_geom.is_empty().unwrap()); + } + + #[test] + fn test_geometry_collection_conversion() { + let geometry_collection = geometry_collection_2d(); + let geo_geom = Geometry::GeometryCollection(geometry_collection); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_empty_geometry_collection_conversion() { + let mut buf = Vec::new(); + write_geometry_collection( + &mut buf, + &GeometryCollection::new_from(vec![]), + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + assert!(geos_geom.is_empty().unwrap()); + } + + #[test] + fn test_nested_geometry_collection() { + // Create a geometry collection containing other geometry collections + let inner_gc1 = GeometryCollection::new_from(vec![ + Geometry::Point(point_2d()), + Geometry::LineString(linestring_2d()), + ]); + + let inner_gc2 = GeometryCollection::new_from(vec![ + Geometry::Polygon(polygon_2d()), + Geometry::MultiPoint(multi_point_2d()), + ]); + + let outer_gc = GeometryCollection::new_from(vec![ + Geometry::GeometryCollection(inner_gc1), + Geometry::GeometryCollection(inner_gc2), + Geometry::MultiLineString(multi_line_string_2d()), + ]); + + let geo_geom = Geometry::GeometryCollection(outer_gc); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_coordinate_precision() { + // Test with high precision coordinates + let high_precision_point = Point::new(123.456789012345, -98.765432109876); + let geo_geom = Geometry::Point(high_precision_point); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_large_coordinates() { + // Test with very large coordinate values + let large_point = Point::new(1e10, -1e10); + let geo_geom = Geometry::Point(large_point); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_negative_coordinates() { + // Test with negative coordinates + let negative_point = Point::new(-180.0, -90.0); + let geo_geom = Geometry::Point(negative_point); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_zero_coordinates() { + // Test with zero coordinates + let zero_point = Point::new(0.0, 0.0); + let geo_geom = Geometry::Point(zero_point); + + test_geometry_conversion(&geo_geom, Endianness::LittleEndian); + test_geometry_conversion(&geo_geom, Endianness::BigEndian); + } + + #[test] + fn test_endianness_handling() { + let factory = GEOSWkbFactory::new(); + // Test that both endianness variants work correctly + let point = point_2d(); + let geo_geom = Geometry::Point(point); + + // Test little endian + let mut buf_le = Vec::new(); + write_point( + &mut buf_le, + &point_2d(), + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); + let wkb_le = read_wkb(&buf_le).unwrap(); + let geos_geom_le = factory.create(&wkb_le).unwrap(); + let geo_from_geos_le: Geometry = geos_geom_le.try_into().unwrap(); + + // Test big endian + let mut buf_be = Vec::new(); + write_point( + &mut buf_be, + &point_2d(), + &WriteOptions { + endianness: Endianness::BigEndian, + }, + ) + .unwrap(); + let wkb_be = read_wkb(&buf_be).unwrap(); + let geos_geom_be = factory.create(&wkb_be).unwrap(); + let geo_from_geos_be: Geometry = geos_geom_be.try_into().unwrap(); + + // Both should produce the same result + assert_eq!(geo_from_geos_le, geo_from_geos_be); + assert_eq!(geo_geom, geo_from_geos_le); + } + + #[test] + fn test_xyz_dimension_handling() { + // Test XYZ dimension handling by manually creating WKB with XYZ coordinates + let mut buf = Vec::new(); + + // Write WKB header for LineString XYZ (type 1002) + buf.push(1); // Little endian + buf.extend_from_slice(&1002u32.to_le_bytes()); // LineString XYZ + buf.extend_from_slice(&2u32.to_le_bytes()); // 2 points + + // Write XYZ coordinates: (0.0, 1.0, 10.0), (1.0, 2.0, 20.0) + buf.extend_from_slice(&0.0f64.to_le_bytes()); + buf.extend_from_slice(&1.0f64.to_le_bytes()); + buf.extend_from_slice(&10.0f64.to_le_bytes()); + buf.extend_from_slice(&1.0f64.to_le_bytes()); + buf.extend_from_slice(&2.0f64.to_le_bytes()); + buf.extend_from_slice(&20.0f64.to_le_bytes()); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + // Verify the geometry was created successfully + assert!(!geos_geom.is_empty().unwrap()); + + // Verify coordinates by checking the WKT representation + let wkt = geos_geom.to_wkt().unwrap(); + // Expected WKT for LineString with XYZ coordinates (0.0, 1.0, 10.0), (1.0, 2.0, 20.0) + let expected_wkt = "LINESTRING Z (0 1 10, 1 2 20)"; + assert_eq!(wkt, expected_wkt); + } + + #[test] + fn test_xym_dimension_handling() { + // Test XYM dimension handling by manually creating WKB with XYM coordinates + let mut buf = Vec::new(); + + // Write WKB header for LineString XYM (type 2002) + buf.push(1); // Little endian + buf.extend_from_slice(&2002u32.to_le_bytes()); // LineString XYM + buf.extend_from_slice(&2u32.to_le_bytes()); // 2 points + + // Write XYM coordinates: (0.0, 1.0, 100.0), (1.0, 2.0, 200.0) + buf.extend_from_slice(&0.0f64.to_le_bytes()); + buf.extend_from_slice(&1.0f64.to_le_bytes()); + buf.extend_from_slice(&100.0f64.to_le_bytes()); + buf.extend_from_slice(&1.0f64.to_le_bytes()); + buf.extend_from_slice(&2.0f64.to_le_bytes()); + buf.extend_from_slice(&200.0f64.to_le_bytes()); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + // Verify the geometry was created successfully + assert!(!geos_geom.is_empty().unwrap()); + + // Verify coordinates by checking the WKT representation + let wkt = geos_geom.to_wkt().unwrap(); + // Expected WKT for LineString with XYM coordinates (0.0, 1.0, 100.0), (1.0, 2.0, 200.0) + let expected_wkt = "LINESTRING M (0 1 100, 1 2 200)"; + assert_eq!(wkt, expected_wkt); + } + + #[test] + fn test_xyzm_dimension_handling() { + // Test XYZM dimension handling by manually creating WKB with XYZM coordinates + let mut buf = Vec::new(); + + // Write WKB header for LineString XYZM (type 3002) + buf.push(1); // Little endian + buf.extend_from_slice(&3002u32.to_le_bytes()); // LineString XYZM + buf.extend_from_slice(&2u32.to_le_bytes()); // 2 points + + // Write XYZM coordinates: (0.0, 1.0, 10.0, 100.0), (1.0, 2.0, 20.0, 200.0) + buf.extend_from_slice(&0.0f64.to_le_bytes()); + buf.extend_from_slice(&1.0f64.to_le_bytes()); + buf.extend_from_slice(&10.0f64.to_le_bytes()); + buf.extend_from_slice(&100.0f64.to_le_bytes()); + buf.extend_from_slice(&1.0f64.to_le_bytes()); + buf.extend_from_slice(&2.0f64.to_le_bytes()); + buf.extend_from_slice(&20.0f64.to_le_bytes()); + buf.extend_from_slice(&200.0f64.to_le_bytes()); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + // Verify the geometry was created successfully + assert!(!geos_geom.is_empty().unwrap()); + + // Verify coordinates by checking the WKT representation + let wkt = geos_geom.to_wkt().unwrap(); + // Expected WKT for LineString with XYZM coordinates (0.0, 1.0, 10.0, 100.0), (1.0, 2.0, 20.0, 200.0) + let expected_wkt = "LINESTRING ZM (0 1 10 100, 1 2 20 200)"; + assert_eq!(wkt, expected_wkt); + } + + #[test] + fn test_big_endian_xyz_dimension_handling() { + // Test XYZ dimension handling with big endian byte order + let mut buf = Vec::new(); + + // Write WKB header for LineString XYZ (type 1002) in big endian + buf.push(0); // Big endian + buf.extend_from_slice(&1002u32.to_be_bytes()); // LineString XYZ + buf.extend_from_slice(&2u32.to_be_bytes()); // 2 points + + // Write XYZ coordinates in big endian: (0.0, 1.0, 10.0), (1.0, 2.0, 20.0) + buf.extend_from_slice(&0.0f64.to_be_bytes()); + buf.extend_from_slice(&1.0f64.to_be_bytes()); + buf.extend_from_slice(&10.0f64.to_be_bytes()); + buf.extend_from_slice(&1.0f64.to_be_bytes()); + buf.extend_from_slice(&2.0f64.to_be_bytes()); + buf.extend_from_slice(&20.0f64.to_be_bytes()); + + let wkb = read_wkb(&buf).unwrap(); + let geos_geom = GEOSWkbFactory::new().create(&wkb).unwrap(); + + // Verify the geometry was created successfully + assert!(!geos_geom.is_empty().unwrap()); + + // Verify coordinates by checking the WKT representation + let wkt = geos_geom.to_wkt().unwrap(); + // Expected WKT for LineString with XYZ coordinates (0.0, 1.0, 10.0), (1.0, 2.0, 20.0) + let expected_wkt = "LINESTRING Z (0 1 10, 1 2 20)"; + assert_eq!(wkt, expected_wkt); + } + + /// Represents a single WKB test case, holding the expected geometry type, Dimension, + /// the raw WKB bytes, and the WKT string. + /// This is the direct Rust equivalent of your C++ `WKBTestCase` struct, with WKT added. + #[derive(Debug, PartialEq, Clone)] + pub struct WkbTestCase { + pub dimension: Dimension, + pub wkb_bytes: Vec, + pub wkt_string: String, // Added WKT field + } + + // You can then define your test cases as a `Vec` + pub fn get_wkb_test_cases() -> Vec { + vec![ + // POINT EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f, + ], + wkt_string: "POINT EMPTY".to_string(), + }, + // POINT (30 10) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + ], + wkt_string: "POINT (30 10)".to_string(), + }, + // POINT Z (30 10 40) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, + ], + wkt_string: "POINT Z (30 10 40)".to_string(), + }, + // POINT M (30 10 300) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40, + ], + wkt_string: "POINT M (30 10 300)".to_string(), + }, + // POINT ZM (30 10 40 300) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "POINT ZM (30 10 40 300)".to_string(), + }, + // POINT (30 10) (big endian) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + wkt_string: "POINT (30 10)".to_string(), // WKT is endian-agnostic + }, + // LINESTRING EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + wkt_string: "LINESTRING EMPTY".to_string(), + }, + // LINESTRING (30 10, 10 30, 40 40) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, + ], + wkt_string: "LINESTRING (30 10, 10 30, 40 40)".to_string(), + }, + // LINESTRING Z (30 10 40, 10 30 40, 40 40 80) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xea, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x40 + ], + wkt_string: "LINESTRING Z (30 10 40, 10 30 40, 40 40 80)".to_string(), + }, + // LINESTRING M (30 10 300, 10 30 300, 40 40 1600) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd2, 0x07, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x99, 0x40, + ], + wkt_string: "LINESTRING M (30 10 300, 10 30 300, 40 40 1600)".to_string(), + }, + // LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xba, 0x0b, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, + ], + wkt_string: "LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600)".to_string(), + }, + // LINESTRING (30 10, 10 30, 40 40) (big endian) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x40, 0x3e, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x3e, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x40, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x44, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, + ], + wkt_string: "LINESTRING (30 10, 10 30, 40 40)".to_string(), + }, + // POLYGON EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![0x01, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + wkt_string: "POLYGON EMPTY".to_string(), + }, + // POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, + ], + wkt_string: "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))".to_string(), + }, + // POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xeb, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, + ], + wkt_string: "POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))".to_string(), + }, + // POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd3, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40, + ], + wkt_string: "POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))".to_string(), + }, + // POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 + // 10 40 300)) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))".to_string(), + }, + // MULTIPOINT EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![0x01, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + wkt_string: "MULTIPOINT EMPTY".to_string(), + }, + // MULTIPOINT ((30 10)) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, + ], + wkt_string: "MULTIPOINT ((30 10))".to_string(), + }, + // MULTIPOINT Z ((30 10 40)) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xec, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xe9, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + ], + wkt_string: "MULTIPOINT Z ((30 10 40))".to_string(), + }, + // MULTIPOINT M ((30 10 300)) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd4, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd1, 0x07, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "MULTIPOINT M ((30 10 300))".to_string(), + }, + // MULTIPOINT ZM ((30 10 40 300)) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xbc, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xb9, 0x0b, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "MULTIPOINT ZM ((30 10 40 300))".to_string(), + }, + // MULTILINESTRING EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![0x01, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + wkt_string: "MULTILINESTRING EMPTY".to_string(), + }, + // MULTILINESTRING ((30 10, 10 30, 40 40)) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, + ], + wkt_string: "MULTILINESTRING ((30 10, 10 30, 40 40))".to_string(), + }, + // MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80)) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xed, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xea, 0x03, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, + ], + wkt_string: "MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80))".to_string(), + }, + // MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600)) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd5, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd2, 0x07, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, + ], + wkt_string: "MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600))".to_string(), + }, + // MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600)) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xbd, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xba, 0x0b, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40 + ], + wkt_string: "MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600))".to_string(), + }, + // MULTIPOLYGON EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + wkt_string: "MULTIPOLYGON EMPTY".to_string(), + }, + // MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + ], + wkt_string: "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))".to_string(), + }, + // MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xee, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xeb, 0x03, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + ], + wkt_string: "MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)))".to_string(), + }, + // MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd6, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd3, 0x07, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)))".to_string(), + }, + // MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 + // 10 40 300))) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xbe, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xbb, 0x0b, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, + 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300)))".to_string(), + }, + // GEOMETRYCOLLECTION EMPTY + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![0x01, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + wkt_string: "GEOMETRYCOLLECTION EMPTY".to_string(), + }, + // GEOMETRYCOLLECTION (POINT (30 10)) + WkbTestCase { + dimension: Dimension::Xy, + wkb_bytes: vec![ + 0x01, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, + ], + wkt_string: "GEOMETRYCOLLECTION (POINT (30 10))".to_string(), + }, + // GEOMETRYCOLLECTION Z (POINT Z (30 10 40)) + WkbTestCase { + dimension: Dimension::Xyz, + wkb_bytes: vec![ + 0x01, 0xef, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xe9, 0x03, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + ], + wkt_string: "GEOMETRYCOLLECTION Z (POINT Z (30 10 40))".to_string(), + }, + // GEOMETRYCOLLECTION M (POINT M (30 10 300)) + WkbTestCase { + dimension: Dimension::Xym, + wkb_bytes: vec![ + 0x01, 0xd7, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd1, 0x07, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "GEOMETRYCOLLECTION M (POINT M (30 10 300))".to_string(), + }, + // GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300)) + WkbTestCase { + dimension: Dimension::Xyzm, + wkb_bytes: vec![ + 0x01, 0xbf, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xb9, 0x0b, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + ], + wkt_string: "GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300))".to_string(), + }, + ] + } + + #[test] + fn test_using_comprehensive_cases() { + let factory = GEOSWkbFactory::new(); + let test_cases = get_wkb_test_cases(); + for test_case in test_cases { + let wkb = read_wkb(&test_case.wkb_bytes).unwrap(); + let geos_geom = factory.create(&wkb).unwrap(); + let wkt_from_geos = geos_geom.to_wkt().unwrap(); + assert_eq!( + wkt_from_geos, test_case.wkt_string, + "Failed for test case {}", + test_case.wkt_string + ); + } + } +} diff --git a/c/sedona-tg/benches/parse-wkb.rs b/c/sedona-tg/benches/parse-wkb.rs index 9632afec..79a920d9 100644 --- a/c/sedona-tg/benches/parse-wkb.rs +++ b/c/sedona-tg/benches/parse-wkb.rs @@ -27,14 +27,18 @@ fn criterion_benchmark(c: &mut Criterion) { wkb::writer::write_geometry( &mut large_geom_wkb_big_endian, &large_geom, - wkb::Endianness::BigEndian, + &wkb::writer::WriteOptions { + endianness: wkb::Endianness::BigEndian, + }, ) .unwrap(); let mut large_geom_wkb_little_endian = Vec::new(); wkb::writer::write_geometry( &mut large_geom_wkb_little_endian, &large_geom, - wkb::Endianness::LittleEndian, + &wkb::writer::WriteOptions { + endianness: wkb::Endianness::LittleEndian, + }, ) .unwrap(); diff --git a/rust/sedona-functions/src/st_geomfromwkt.rs b/rust/sedona-functions/src/st_geomfromwkt.rs index b35d628b..558fbec3 100644 --- a/rust/sedona-functions/src/st_geomfromwkt.rs +++ b/rust/sedona-functions/src/st_geomfromwkt.rs @@ -29,7 +29,8 @@ use sedona_schema::{ datatypes::{SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY}, matchers::ArgMatcher, }; -use wkb::writer::write_geometry; +use wkb::writer::{write_geometry, WriteOptions}; +use wkb::Endianness; use wkt::Wkt; use crate::executor::WkbExecutor; @@ -128,8 +129,14 @@ fn invoke_scalar(wkt_bytes: &str, builder: &mut BinaryBuilder) -> Result<()> { let geometry: Wkt = Wkt::from_str(wkt_bytes) .map_err(|err| DataFusionError::Internal(format!("WKT parse error: {err}")))?; - write_geometry(builder, &geometry, wkb::Endianness::LittleEndian) - .map_err(|err| DataFusionError::Internal(format!("WKB write error: {err}"))) + write_geometry( + builder, + &geometry, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .map_err(|err| DataFusionError::Internal(format!("WKB write error: {err}"))) } #[cfg(test)] diff --git a/rust/sedona-geo-generic-alg/Cargo.toml b/rust/sedona-geo-generic-alg/Cargo.toml index 666d42b0..2cbcd36e 100644 --- a/rust/sedona-geo-generic-alg/Cargo.toml +++ b/rust/sedona-geo-generic-alg/Cargo.toml @@ -16,40 +16,35 @@ # under the License. [package] name = "sedona-geo-generic-alg" -version = "0.2.0" -authors = ["Apache Sedona "] -license = "Apache-2.0" -homepage = "https://github.com/apache/sedona-db" -repository = "https://github.com/apache/sedona-db" -description = "geo algorithms refactored to work with sedona-geo-traits-ext" -readme = "README.md" -edition = "2021" - -[workspace] +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true [dependencies] -float_next_after = "1" -geo-traits = { version = "0.3.0" } -geo-types = { version = "0.7.17", features = ["approx", "use-rstar_0_12"] } +float_next_after = { workspace = true } +geo-traits = { workspace = true } +geo-types = { workspace = true, features = ["approx", "use-rstar_0_12"] } sedona-geo-traits-ext = { path = "../sedona-geo-traits-ext" } log = "0.4.11" -num-traits = { version = "0.2", default-features = false, features = ["libm"] } +num-traits = { workspace = true } robust = "1.1.0" rstar = "0.12.0" i_overlay = { version = "4.0.0, < 4.1.0", default-features = false } [dev-dependencies] -approx = "0.5" -criterion = { version = "0.5", features = ["html_reports"] } +sedona-testing = { path = "../sedona-testing" } +approx = { workspace = true } +criterion = { workspace = true } pretty_env_logger = "0.4" -rand = "0.8" +rand = { workspace = true } rand_distr = "0.4.3" -geo = "0.31.0" -wkb = "0.9.1" -wkt = "0.14.0" - -[patch.crates-io] -wkb = { git = "https://github.com/georust/wkb.git", rev = "130eb0c2b343bc9299aeafba6d34c2a6e53f3b6a" } +geo = { workspace = true } +wkb = { workspace = true } +wkt = { workspace = true } [[bench]] name = "area" diff --git a/rust/sedona-geo-traits-ext/Cargo.toml b/rust/sedona-geo-traits-ext/Cargo.toml index 9a2a4651..fb9a4950 100644 --- a/rust/sedona-geo-traits-ext/Cargo.toml +++ b/rust/sedona-geo-traits-ext/Cargo.toml @@ -16,27 +16,21 @@ # under the License. [package] name = "sedona-geo-traits-ext" -version = "0.2.0" -authors = ["Apache Sedona "] -license = "Apache-2.0" -homepage = "https://github.com/apache/sedona-db" -repository = "https://github.com/apache/sedona-db" -description = "geo-traits extended for implementing generic algorithms" -readme = "README.md" -edition = "2021" - -[workspace] +version.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true [dependencies] -geo-traits = "0.3.0" -geo-types = "0.7.17" -num-traits = { version = "0.2", default-features = false, features = ["libm"] } -wkb = "0.9.1" -byteorder = "1" +geo-traits = { workspace = true } +geo-types = { workspace = true } +num-traits = { workspace = true } +wkb = { workspace = true } +byteorder ={ workspace = true } [dev-dependencies] -wkt = "0.14.0" -rstest = "0.24.0" - -[patch.crates-io] -wkb = { git = "https://github.com/georust/wkb.git", rev = "130eb0c2b343bc9299aeafba6d34c2a6e53f3b6a" } +wkt = { workspace = true } +rstest = { workspace = true } diff --git a/rust/sedona-geo/Cargo.toml b/rust/sedona-geo/Cargo.toml index f9c947a5..df439182 100644 --- a/rust/sedona-geo/Cargo.toml +++ b/rust/sedona-geo/Cargo.toml @@ -32,7 +32,7 @@ criterion = { workspace = true } rstest = { workspace = true } sedona-geometry = { path = "../sedona-geometry" } sedona-schema = { path = "../sedona-schema" } -sedona-testing = { path = "../sedona-testing", features = ["criterion"] } +sedona-testing = { path = "../sedona-testing", features = ["criterion", "geo"] } wkt = { workspace = true } [dependencies] @@ -40,7 +40,7 @@ arrow-schema = { workspace = true } arrow-array = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } -geo-generic-alg = { workspace = true } +sedona-geo-generic-alg = { path = "../sedona-geo-generic-alg" } geo-traits = { workspace = true, features = ["geo-types"] } geo-types = { workspace = true } geo = { workspace = true } diff --git a/rust/sedona-geo/src/centroid.rs b/rust/sedona-geo/src/centroid.rs index e7ab25f5..ee74df13 100644 --- a/rust/sedona-geo/src/centroid.rs +++ b/rust/sedona-geo/src/centroid.rs @@ -17,11 +17,11 @@ //! Centroid extraction functionality for WKB geometries use datafusion_common::{error::DataFusionError, Result}; -use geo_generic_alg::Centroid; -use geo_generic_alg::HasDimensions; use geo_traits::CoordTrait; use geo_traits::GeometryTrait; use geo_traits::PointTrait; +use sedona_geo_generic_alg::Centroid; +use sedona_geo_generic_alg::HasDimensions; use crate::to_geo::item_to_geometry; diff --git a/rust/sedona-geo/src/st_area.rs b/rust/sedona-geo/src/st_area.rs index 8a274d56..5efd5f07 100644 --- a/rust/sedona-geo/src/st_area.rs +++ b/rust/sedona-geo/src/st_area.rs @@ -20,9 +20,9 @@ use arrow_array::builder::Float64Builder; use arrow_schema::DataType; use datafusion_common::error::Result; use datafusion_expr::ColumnarValue; -use geo_generic_alg::Area; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::Area; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::Wkb; diff --git a/rust/sedona-geo/src/st_centroid.rs b/rust/sedona-geo/src/st_centroid.rs index 31c3dd70..bc6b8188 100644 --- a/rust/sedona-geo/src/st_centroid.rs +++ b/rust/sedona-geo/src/st_centroid.rs @@ -20,9 +20,9 @@ use std::sync::Arc; use arrow_array::builder::BinaryBuilder; use datafusion_common::{error::Result, exec_err}; use datafusion_expr::ColumnarValue; -use geo_generic_alg::Centroid; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::Centroid; use sedona_geometry::is_empty::is_geometry_empty; use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, diff --git a/rust/sedona-geo/src/st_distance.rs b/rust/sedona-geo/src/st_distance.rs index 4900690a..e2f48a15 100644 --- a/rust/sedona-geo/src/st_distance.rs +++ b/rust/sedona-geo/src/st_distance.rs @@ -20,9 +20,9 @@ use arrow_array::builder::Float64Builder; use arrow_schema::DataType; use datafusion_common::error::Result; use datafusion_expr::ColumnarValue; -use geo_generic_alg::line_measures::DistanceExt; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::line_measures::DistanceExt; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::Wkb; diff --git a/rust/sedona-geo/src/st_dwithin.rs b/rust/sedona-geo/src/st_dwithin.rs index 2ba3f5de..25e8bf26 100644 --- a/rust/sedona-geo/src/st_dwithin.rs +++ b/rust/sedona-geo/src/st_dwithin.rs @@ -20,9 +20,9 @@ use arrow_array::builder::BooleanBuilder; use arrow_schema::DataType; use datafusion_common::{cast::as_float64_array, error::Result}; use datafusion_expr::ColumnarValue; -use geo_generic_alg::line_measures::DistanceExt; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::line_measures::DistanceExt; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::Wkb; diff --git a/rust/sedona-geo/src/st_intersection_aggr.rs b/rust/sedona-geo/src/st_intersection_aggr.rs index 446290f8..b9cad44b 100644 --- a/rust/sedona-geo/src/st_intersection_aggr.rs +++ b/rust/sedona-geo/src/st_intersection_aggr.rs @@ -31,9 +31,9 @@ use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, matchers::ArgMatcher, }; -use wkb::reader::Wkb; use wkb::writer::write_geometry; use wkb::Endianness; +use wkb::{reader::Wkb, writer::WriteOptions}; /// ST_Intersection_Aggr() implementation pub fn st_intersection_aggr_impl() -> SedonaAccumulatorRef { @@ -133,7 +133,13 @@ impl IntersectionAccumulator { fn geometry_to_wkb(&self, geom: &geo::Geometry) -> Option> { let mut wkb_bytes = Vec::new(); - match write_geometry(&mut wkb_bytes, geom, Endianness::LittleEndian) { + match write_geometry( + &mut wkb_bytes, + geom, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) { Ok(_) => Some(wkb_bytes), Err(_) => None, } @@ -223,7 +229,9 @@ mod test { use rstest::rstest; use sedona_functions::st_intersection_aggr::st_intersection_aggr_udf; use sedona_schema::datatypes::WKB_VIEW_GEOMETRY; - use sedona_testing::{compare::assert_scalar_equal_wkb_geometry, testers::AggregateUdfTester}; + use sedona_testing::{ + compare::assert_scalar_equal_wkb_geometry_topologically, testers::AggregateUdfTester, + }; #[rstest] fn polygon_polygon_cases(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { @@ -238,16 +246,19 @@ mod test { vec![Some("POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))")], vec![Some("POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))")], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(batches).unwrap(), Some("MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)))"), ); // Empty input - assert_scalar_equal_wkb_geometry(&tester.aggregate_wkt(vec![]).unwrap(), None); + assert_scalar_equal_wkb_geometry_topologically( + &tester.aggregate_wkt(vec![]).unwrap(), + None, + ); // Single polygon input - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester .aggregate_wkt(vec![vec![Some("POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))")]]) .unwrap(), @@ -259,14 +270,17 @@ mod test { vec![Some("POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))")], vec![Some("POLYGON((2 2, 3 2, 3 3, 2 3, 2 2))")], ]; - assert_scalar_equal_wkb_geometry(&tester.aggregate_wkt(non_intersecting).unwrap(), None); + assert_scalar_equal_wkb_geometry_topologically( + &tester.aggregate_wkt(non_intersecting).unwrap(), + None, + ); // Input with nulls let nulls_input = vec![ vec![Some("POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))"), None], vec![Some("POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))"), None], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(nulls_input).unwrap(), Some("MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)))"), ); @@ -276,7 +290,7 @@ mod test { vec![Some("POLYGON((0 0, 3 0, 3 3, 0 3, 0 0))")], vec![Some("POLYGON((1 1, 2 1, 2 2, 1 2, 1 1))")], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(contained).unwrap(), Some("MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)))"), ); @@ -298,7 +312,7 @@ mod test { "MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)), ((4 4, 5 4, 5 5, 4 5, 4 4)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(poly_and_multi).unwrap(), Some("MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)))"), ); @@ -310,7 +324,7 @@ mod test { "MULTIPOLYGON(((2 2, 3 2, 3 3, 2 3, 2 2)), ((4 4, 5 4, 5 5, 4 5, 4 4)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(poly_and_nonoverlap_multi).unwrap(), None, ); @@ -324,7 +338,7 @@ mod test { "MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)), ((11 11, 12 11, 12 12, 11 12, 11 11)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_and_multi).unwrap(), Some("MULTIPOLYGON(((1 1,2 1,2 2,1 2,1 1)),((11 11,12 11,12 12,11 12,11 11)))"), ); @@ -348,7 +362,7 @@ mod test { "MULTIPOLYGON(((2 2, 5 2, 5 5, 2 5, 2 2)), ((9 9, 12 9, 12 12, 9 12, 9 9)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_multi_case1).unwrap(), Some("MULTIPOLYGON(((2 2, 3 2, 3 3, 2 3, 2 2)))"), ); @@ -362,7 +376,10 @@ mod test { "MULTIPOLYGON(((2 2, 3 2, 3 3, 2 3, 2 2)), ((7 7, 8 7, 8 8, 7 8, 7 7)))", )], ]; - assert_scalar_equal_wkb_geometry(&tester.aggregate_wkt(multi_multi_case2).unwrap(), None); + assert_scalar_equal_wkb_geometry_topologically( + &tester.aggregate_wkt(multi_multi_case2).unwrap(), + None, + ); // Test case 3: Three MultiPolygons intersection let multi_multi_case3 = vec![ @@ -376,7 +393,7 @@ mod test { "MULTIPOLYGON(((3 3, 5 3, 5 5, 3 5, 3 3)), ((13 13, 15 13, 15 15, 13 15, 13 13)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_multi_case3).unwrap(), Some("MULTIPOLYGON(((3 3,4 3,4 4,3 4,3 3)),((13 13,14 13,14 14,13 14,13 13)))"), ); diff --git a/rust/sedona-geo/src/st_intersects.rs b/rust/sedona-geo/src/st_intersects.rs index 9fac6ed8..8c624fd1 100644 --- a/rust/sedona-geo/src/st_intersects.rs +++ b/rust/sedona-geo/src/st_intersects.rs @@ -20,9 +20,9 @@ use arrow_array::builder::BooleanBuilder; use arrow_schema::DataType; use datafusion_common::error::Result; use datafusion_expr::ColumnarValue; -use geo_generic_alg::Intersects; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::Intersects; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::Wkb; diff --git a/rust/sedona-geo/src/st_length.rs b/rust/sedona-geo/src/st_length.rs index 0ab4461e..21b2be1d 100644 --- a/rust/sedona-geo/src/st_length.rs +++ b/rust/sedona-geo/src/st_length.rs @@ -21,9 +21,9 @@ use arrow_array::builder::Float64Builder; use arrow_schema::DataType; use datafusion_common::error::Result; use datafusion_expr::ColumnarValue; -use geo_generic_alg::algorithm::{line_measures::Euclidean, LengthMeasurableExt}; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::algorithm::{line_measures::Euclidean, LengthMeasurableExt}; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::Wkb; diff --git a/rust/sedona-geo/src/st_perimeter.rs b/rust/sedona-geo/src/st_perimeter.rs index ad5ffbca..11111b66 100644 --- a/rust/sedona-geo/src/st_perimeter.rs +++ b/rust/sedona-geo/src/st_perimeter.rs @@ -21,9 +21,9 @@ use arrow_array::builder::Float64Builder; use arrow_schema::DataType; use datafusion_common::error::Result; use datafusion_expr::ColumnarValue; -use geo_generic_alg::algorithm::{line_measures::Euclidean, LengthMeasurableExt}; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geo_generic_alg::algorithm::{line_measures::Euclidean, LengthMeasurableExt}; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; use wkb::reader::Wkb; diff --git a/rust/sedona-geo/src/st_union_aggr.rs b/rust/sedona-geo/src/st_union_aggr.rs index 1260de58..2462e48c 100644 --- a/rust/sedona-geo/src/st_union_aggr.rs +++ b/rust/sedona-geo/src/st_union_aggr.rs @@ -31,9 +31,9 @@ use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, matchers::ArgMatcher, }; -use wkb::reader::Wkb; use wkb::writer::write_geometry; use wkb::Endianness; +use wkb::{reader::Wkb, writer::WriteOptions}; /// ST_Union_Aggr() implementation pub fn st_union_aggr_impl() -> SedonaAccumulatorRef { @@ -127,7 +127,13 @@ impl UnionAccumulator { fn geometry_to_wkb(&self, geom: &geo::Geometry) -> Option> { let mut wkb_bytes = Vec::new(); - match write_geometry(&mut wkb_bytes, geom, Endianness::LittleEndian) { + match write_geometry( + &mut wkb_bytes, + geom, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) { Ok(_) => Some(wkb_bytes), Err(_) => None, } @@ -217,7 +223,9 @@ mod test { use rstest::rstest; use sedona_functions::st_union_aggr::st_union_aggr_udf; use sedona_schema::datatypes::WKB_VIEW_GEOMETRY; - use sedona_testing::{compare::assert_scalar_equal_wkb_geometry, testers::AggregateUdfTester}; + use sedona_testing::{ + compare::assert_scalar_equal_wkb_geometry_topologically, testers::AggregateUdfTester, + }; #[rstest] fn polygon_polygon_cases(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { @@ -233,16 +241,16 @@ mod test { vec![Some("POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))")], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(batches).unwrap(), Some("MULTIPOLYGON(((0 0, 2 0, 2 1, 3 1, 3 3, 1 3, 1 2, 0 2, 0 0)))"), ); // Empty input - assert_scalar_equal_wkb_geometry(&tester.aggregate(&vec![]).unwrap(), None); + assert_scalar_equal_wkb_geometry_topologically(&tester.aggregate(&vec![]).unwrap(), None); // Single polygon input - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester .aggregate_wkt(vec![vec![Some("POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))")]]) .unwrap(), @@ -254,7 +262,7 @@ mod test { vec![Some("POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))")], vec![Some("POLYGON((2 2, 3 2, 3 3, 2 3, 2 2))")], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(non_intersecting).unwrap(), Some("MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)),((2 2, 3 2, 3 3, 2 3, 2 2)))"), ); @@ -264,7 +272,7 @@ mod test { vec![Some("POLYGON((0 0, 2 0, 2 2, 0 2, 0 0))"), None], vec![Some("POLYGON((1 1, 3 1, 3 3, 1 3, 1 1))"), None], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(nulls_input).unwrap(), Some("MULTIPOLYGON(((0 0, 2 0, 2 1, 3 1, 3 3, 1 3, 1 2, 0 2, 0 0)))"), ); @@ -274,7 +282,7 @@ mod test { vec![Some("POLYGON((0 0, 3 0, 3 3, 0 3, 0 0))")], vec![Some("POLYGON((1 1, 2 1, 2 2, 1 2, 1 1))")], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(contained).unwrap(), Some("MULTIPOLYGON(((0 0, 3 0, 3 3, 0 3, 0 0)))"), ); @@ -296,7 +304,7 @@ mod test { "MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)), ((4 4, 5 4, 5 5, 4 5, 4 4)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(poly_and_multi).unwrap(), Some("MULTIPOLYGON(((0 0, 3 0, 3 3, 0 3, 0 0)),((4 4, 5 4, 5 5, 4 5, 4 4)))"), ); @@ -308,7 +316,7 @@ mod test { "MULTIPOLYGON(((2 2, 3 2, 3 3, 2 3, 2 2)), ((4 4, 5 4, 5 5, 4 5, 4 4)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(poly_and_nonoverlap_multi).unwrap(), Some("MULTIPOLYGON(((0 0, 1 0, 1 1, 0 1, 0 0)),((2 2, 3 2, 3 3, 2 3, 2 2)),((4 4, 5 4, 5 5, 4 5, 4 4)))"), ); @@ -322,7 +330,7 @@ mod test { "MULTIPOLYGON(((1 1, 2 1, 2 2, 1 2, 1 1)), ((11 11, 13 11, 13 13, 11 13, 11 11)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_and_multi).unwrap(), Some("MULTIPOLYGON(((0 0, 3 0, 3 3, 0 3, 0 0)),((10 10, 12 10, 12 11, 13 11, 13 13, 11 13, 11 12, 10 12, 10 10)))"), ); @@ -346,7 +354,7 @@ mod test { "MULTIPOLYGON(((2 2, 5 2, 5 5, 2 5, 2 2)), ((7 7, 10 7, 10 10, 7 10, 7 7)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_multi_case1).unwrap(), Some("MULTIPOLYGON(((0 0, 3 0, 3 2, 5 2, 5 5, 2 5, 2 3, 0 3, 0 0)),((5 5, 8 5, 8 7, 10 7, 10 10, 7 10, 7 8, 5 8, 5 5)))"), ); @@ -360,7 +368,7 @@ mod test { "MULTIPOLYGON(((2 2, 3 2, 3 3, 2 3, 2 2)), ((7 7, 8 7, 8 8, 7 8, 7 7)))", )], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_multi_case2).unwrap(), Some("MULTIPOLYGON(((0 0,1 0,1 1,0 1,0 0)),((2 2,3 2,3 3,2 3,2 2)),((5 5,6 5,6 6,5 6,5 5)),((7 7,8 7,8 8,7 8,7 7)))"), ); @@ -371,7 +379,7 @@ mod test { vec![Some("MULTIPOLYGON(((3 3, 7 3, 7 7, 3 7, 3 3)), ((13 13, 17 13, 17 17, 13 17, 13 13)))")], vec![Some("MULTIPOLYGON(((6 6, 10 6, 10 10, 6 10, 6 6)), ((16 16, 20 16, 20 20, 16 20, 16 16)))")], ]; - assert_scalar_equal_wkb_geometry( + assert_scalar_equal_wkb_geometry_topologically( &tester.aggregate_wkt(multi_multi_case3).unwrap(), Some("MULTIPOLYGON(((0 0, 4 0, 4 3, 7 3, 7 6, 10 6, 10 10, 6 10, 6 7, 3 7, 3 4, 0 4, 0 0)),((10 10, 14 10, 14 13, 17 13, 17 16, 20 16, 20 20, 16 20, 16 17, 13 17, 13 14, 10 14, 10 10)))"), ); diff --git a/rust/sedona-geo/src/to_geo.rs b/rust/sedona-geo/src/to_geo.rs index 56a92178..a4401125 100644 --- a/rust/sedona-geo/src/to_geo.rs +++ b/rust/sedona-geo/src/to_geo.rs @@ -69,8 +69,10 @@ pub fn item_to_geometry(geo: impl GeometryTrait) -> Result { } // GeometryCollection causes issues because it has a recursive definition and won't work -// with cargo run --release. Thus, we need our own version of this that limits the -// recursion supported in a GeometryCollection. +// with cargo run --release. Thus, we need our own version of this that works around this +// problem by processing GeometryCollection using a free function instead of relying +// on trait resolver. +// See also https://github.com/geoarrow/geoarrow-rs/pull/956. fn to_geometry(item: impl GeometryTrait) -> Option { match item.as_type() { Point(geom) => geom.try_to_point().map(Geometry::Point), @@ -79,35 +81,38 @@ fn to_geometry(item: impl GeometryTrait) -> Option { MultiPoint(geom) => geom.try_to_multi_point().map(Geometry::MultiPoint), MultiLineString(geom) => Some(Geometry::MultiLineString(geom.to_multi_line_string())), MultiPolygon(geom) => Some(Geometry::MultiPolygon(geom.to_multi_polygon())), - GeometryCollection(geom) => { - let geometries = geom - .geometries() - .filter_map(|child| match child.as_type() { - Point(geom) => geom.try_to_point().map(Geometry::Point), - LineString(geom) => Some(Geometry::LineString(geom.to_line_string())), - Polygon(geom) => Some(Geometry::Polygon(geom.to_polygon())), - MultiPoint(geom) => geom.try_to_multi_point().map(Geometry::MultiPoint), - MultiLineString(geom) => { - Some(Geometry::MultiLineString(geom.to_multi_line_string())) - } - MultiPolygon(geom) => Some(Geometry::MultiPolygon(geom.to_multi_polygon())), - _ => None, - }) - .collect::>(); - - // If any child conversions failed, also return None - if geometries.len() != geom.num_geometries() { - return None; - } - - Some(Geometry::GeometryCollection(geo_types::GeometryCollection( - geometries, - ))) - } + GeometryCollection(geom) => geometry_collection_to_geometry(geom), _ => None, } } +fn geometry_collection_to_geometry>( + geom: &GC, +) -> Option { + let geometries = geom + .geometries() + .filter_map(|child| match child.as_type() { + Point(geom) => geom.try_to_point().map(Geometry::Point), + LineString(geom) => Some(Geometry::LineString(geom.to_line_string())), + Polygon(geom) => Some(Geometry::Polygon(geom.to_polygon())), + MultiPoint(geom) => geom.try_to_multi_point().map(Geometry::MultiPoint), + MultiLineString(geom) => Some(Geometry::MultiLineString(geom.to_multi_line_string())), + MultiPolygon(geom) => Some(Geometry::MultiPolygon(geom.to_multi_polygon())), + GeometryCollection(geom) => geometry_collection_to_geometry(geom), + _ => None, + }) + .collect::>(); + + // If any child conversions failed, also return None + if geometries.len() != geom.num_geometries() { + return None; + } + + Some(Geometry::GeometryCollection(geo_types::GeometryCollection( + geometries, + ))) +} + #[cfg(test)] mod tests { use datafusion_expr::ColumnarValue; @@ -126,11 +131,6 @@ mod tests { let err = item_to_geometry(unsupported).unwrap_err(); assert!(err.message().starts_with("geo kernel implementation")); - let unsupported = - Wkt::from_str("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION(POINT (1 2)))").unwrap(); - let err = item_to_geometry(unsupported).unwrap_err(); - assert!(err.message().starts_with("geo kernel implementation")); - let unsupported = Wkt::from_str("GEOMETRYCOLLECTION (POINT EMPTY)").unwrap(); let err = item_to_geometry(unsupported).unwrap_err(); assert!(err.message().starts_with("geo kernel implementation")); @@ -145,7 +145,8 @@ mod tests { "MULTIPOINT (1 2, 3 4)", "MULTILINESTRING ((1 2, 3 4))", "MULTIPOLYGON (((0 0, 1 0, 0 1, 0 0)))", - "GEOMETRYCOLLECTION(POINT (1 2))" + "GEOMETRYCOLLECTION(POINT (1 2))", + "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION(POINT (1 2)))" )] wkt_value: &str, ) { @@ -163,6 +164,7 @@ mod tests { Some("MULTILINESTRING ((1 2, 3 4))"), Some("MULTIPOLYGON (((0 0, 1 0, 0 1, 0 0)))"), Some("GEOMETRYCOLLECTION(POINT (1 2))"), + Some("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION(POINT (1 2)))"), None, ]; let args = vec![ColumnarValue::Array(create_array_storage( diff --git a/rust/sedona-geometry/src/bounds.rs b/rust/sedona-geometry/src/bounds.rs index 0f50040b..e918d448 100644 --- a/rust/sedona-geometry/src/bounds.rs +++ b/rust/sedona-geometry/src/bounds.rs @@ -223,6 +223,7 @@ mod test { use super::*; use rstest::rstest; use std::{iter::zip, str::FromStr}; + use wkb::{writer::WriteOptions, Endianness}; use wkt::Wkt; pub fn wkt_bounds_xy(wkt_value: &str) -> Result { @@ -441,7 +442,14 @@ mod test { fn test_wkb_bounds_xy() { let wkt: Wkt = Wkt::from_str("POINT (0 1)").unwrap(); let mut out = Vec::new(); - wkb::writer::write_geometry(&mut out, &wkt, wkb::Endianness::LittleEndian).unwrap(); + wkb::writer::write_geometry( + &mut out, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!( wkb_bounds_xy(&out).unwrap(), BoundingBox::xy((0, 0), (1, 1)) diff --git a/rust/sedona-geometry/src/is_empty.rs b/rust/sedona-geometry/src/is_empty.rs index ad3f7d50..444b3d9f 100644 --- a/rust/sedona-geometry/src/is_empty.rs +++ b/rust/sedona-geometry/src/is_empty.rs @@ -51,13 +51,21 @@ mod tests { use super::*; use std::str::FromStr; use wkb::reader::read_wkb; - use wkb::writer::write_geometry; + use wkb::writer::{write_geometry, WriteOptions}; + use wkb::Endianness; use wkt::Wkt; fn create_wkb_bytes_from_wkt(wkt_str: &str) -> Vec { let wkt: Wkt = Wkt::from_str(wkt_str).unwrap(); let mut wkb_bytes = vec![]; - write_geometry(&mut wkb_bytes, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb_bytes, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); wkb_bytes } diff --git a/rust/sedona-geometry/src/wkb_factory.rs b/rust/sedona-geometry/src/wkb_factory.rs index efa9f09f..000788f0 100644 --- a/rust/sedona-geometry/src/wkb_factory.rs +++ b/rust/sedona-geometry/src/wkb_factory.rs @@ -469,7 +469,8 @@ fn count_to_u32(count: usize) -> Result { mod test { use std::str::FromStr; use wkb::reader::read_wkb; - use wkb::writer::write_geometry; + use wkb::writer::{write_geometry, WriteOptions}; + use wkb::Endianness; use wkt::Wkt; use super::*; @@ -478,7 +479,14 @@ mod test { fn test_wkb_point() { let wkt: Wkt = Wkt::from_str("POINT (0 1)").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!(wkb_point((0.0, 1.0)).unwrap(), wkb); } @@ -533,12 +541,26 @@ mod test { fn test_wkb_linestring() { let wkt: Wkt = Wkt::from_str("LINESTRING EMPTY").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!(wkb_linestring([].into_iter()).unwrap(), wkb); let wkt: Wkt = Wkt::from_str("LINESTRING (0 1, 2 3)").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!( wkb_linestring([(0.0, 1.0), (2.0, 3.0)].into_iter()).unwrap(), wkb @@ -584,12 +606,26 @@ mod test { fn test_wkb_multilinestring() { let wkt: Wkt = Wkt::from_str("MULTILINESTRING EMPTY").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!(wkb_multilinestring([].into_iter()).unwrap(), wkb); let wkt: Wkt = Wkt::from_str("MULTILINESTRING ((0 0, 1 1, 2 2), (3 3, 4 4))").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); let linestrings = vec![ vec![(0.0, 0.0), (1.0, 1.0), (2.0, 2.0)], @@ -603,12 +639,26 @@ mod test { fn test_wkb_polygon() { let wkt: Wkt = Wkt::from_str("POLYGON EMPTY").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!(wkb_polygon([].into_iter()).unwrap(), wkb); let wkt: Wkt = Wkt::from_str("POLYGON ((0 0, 1 0, 0 1, 0 0))").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!( wkb_polygon([(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0)].into_iter()).unwrap(), wkb @@ -697,13 +747,27 @@ mod test { fn test_wkb_multipolygon() { let wkt: Wkt = Wkt::from_str("MULTIPOLYGON EMPTY").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!(wkb_multipolygon([].into_iter()).unwrap(), wkb); let wkt: Wkt = Wkt::from_str("MULTIPOLYGON (((0 0, 1 0, 0 1, 0 0)), ((2 2, 3 2, 2 3, 2 2)))").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); let polygons = vec![ vec![(0.0, 0.0), (1.0, 0.0), (0.0, 1.0), (0.0, 0.0)], @@ -717,12 +781,26 @@ mod test { fn test_wkb_multipoint() { let wkt: Wkt = Wkt::from_str("MULTIPOINT EMPTY").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); assert_eq!(wkb_multipoint([].into_iter()).unwrap(), wkb); let wkt: Wkt = Wkt::from_str("MULTIPOINT ((0 0), (1 1))").unwrap(); let mut wkb = vec![]; - write_geometry(&mut wkb, &wkt, wkb::Endianness::LittleEndian).unwrap(); + write_geometry( + &mut wkb, + &wkt, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); let points = vec![(0.0, 0.0), (1.0, 1.0)]; assert_eq!(wkb_multipoint(points.into_iter()).unwrap(), wkb); diff --git a/rust/sedona-spatial-join/Cargo.toml b/rust/sedona-spatial-join/Cargo.toml index 4720b863..d1037145 100644 --- a/rust/sedona-spatial-join/Cargo.toml +++ b/rust/sedona-spatial-join/Cargo.toml @@ -44,9 +44,10 @@ datafusion-common-runtime = { workspace = true } futures = { workspace = true } once_cell = { workspace = true } parking_lot = { workspace = true } -geo-generic-alg = { workspace = true } +geo = { workspace = true } +sedona-geo-generic-alg = { path = "../sedona-geo-generic-alg" } geo-traits = { workspace = true, features = ["geo-types"] } -geo-traits-ext = { workspace = true } +sedona-geo-traits-ext = { path = "../sedona-geo-traits-ext" } geo-types = { workspace = true } sedona-common = { path = "../sedona-common" } sedona-expr = { path = "../sedona-expr" } @@ -55,6 +56,7 @@ sedona-geo = { path = "../sedona-geo" } sedona-geometry = { path = "../sedona-geometry" } sedona-schema = { path = "../sedona-schema" } sedona-tg = { path = "../../c/sedona-tg" } +sedona-geos = { path = "../../c/sedona-geos" } wkb = { workspace = true } geo-index = { workspace = true } geos = { workspace = true } @@ -66,5 +68,4 @@ rstest = { workspace = true } sedona-testing = { path = "../sedona-testing" } wkt = { workspace = true } tokio = { workspace = true, features = ["macros"] } -sedona-geos = { path = "../../c/sedona-geos" } rand = { workspace = true } diff --git a/rust/sedona-spatial-join/src/index.rs b/rust/sedona-spatial-join/src/index.rs index 9c5f4475..4e95527b 100644 --- a/rust/sedona-spatial-join/src/index.rs +++ b/rust/sedona-spatial-join/src/index.rs @@ -587,7 +587,7 @@ impl SpatialIndex { let max_distance = distances_with_indices[k_idx].0; // For tie-breakers, create spatial envelope around probe centroid and use rtree.search() - use geo_generic_alg::algorithm::Centroid; + use sedona_geo_generic_alg::algorithm::Centroid; let probe_centroid = probe_geom.centroid().unwrap_or(Point::new(0.0, 0.0)); let probe_x = probe_centroid.x() as f32; let probe_y = probe_centroid.y() as f32; diff --git a/rust/sedona-spatial-join/src/operand_evaluator.rs b/rust/sedona-spatial-join/src/operand_evaluator.rs index 56dca647..114d4309 100644 --- a/rust/sedona-spatial-join/src/operand_evaluator.rs +++ b/rust/sedona-spatial-join/src/operand_evaluator.rs @@ -25,10 +25,10 @@ use datafusion_common::{ use datafusion_expr::ColumnarValue; use datafusion_physical_expr::PhysicalExpr; use float_next_after::NextAfter; -use geo_generic_alg::BoundingRect; use geo_index::rtree::util::f64_box_to_f32; use geo_types::{coord, Rect}; use sedona_functions::executor::IterGeo; +use sedona_geo_generic_alg::BoundingRect; use sedona_schema::datatypes::SedonaType; use wkb::reader::Wkb; diff --git a/rust/sedona-spatial-join/src/refine/geo.rs b/rust/sedona-spatial-join/src/refine/geo.rs index 3b555e74..5d13b5e4 100644 --- a/rust/sedona-spatial-join/src/refine/geo.rs +++ b/rust/sedona-spatial-join/src/refine/geo.rs @@ -17,12 +17,11 @@ use std::sync::{Arc, OnceLock}; use datafusion_common::Result; -use geo_generic_alg::{ - line_measures::DistanceExt, Contains, Distance, Euclidean, Intersects, Relate, Within, -}; +use geo::{Contains, Relate, Within}; use sedona_common::{sedona_internal_err, ExecutionMode, SpatialJoinOptions}; use sedona_expr::statistics::GeoStatistics; use sedona_geo::to_geo::item_to_geometry; +use sedona_geo_generic_alg::{line_measures::DistanceExt, Intersects}; use wkb::reader::Wkb; use crate::{ @@ -136,7 +135,7 @@ impl GeoRefiner { Ok(geom) => geom, Err(_) => return Ok(Vec::new()), }; - let probe_geom = geo_generic_alg::PreparedGeometry::from(probe_geom); + let probe_geom = geo::PreparedGeometry::from(probe_geom); for index_result in index_query_results { if self.evaluator.evaluate_prepare_probe( @@ -204,7 +203,7 @@ trait GeoPredicateEvaluator: Send + Sync { fn evaluate_prepare_probe( &self, build: &Wkb, - probe: &geo_generic_alg::PreparedGeometry<'static, geo_types::Geometry>, + probe: &geo::PreparedGeometry<'static, geo_types::Geometry>, distance: Option, ) -> Result; } @@ -237,7 +236,7 @@ impl GeoPredicateEvaluator for GeoIntersects { fn evaluate_prepare_probe( &self, build: &Wkb, - probe: &geo_generic_alg::PreparedGeometry<'static, geo_types::Geometry>, + probe: &geo::PreparedGeometry<'static, geo_types::Geometry>, _distance: Option, ) -> Result { let build_geom = match item_to_geometry(build) { @@ -266,7 +265,7 @@ impl GeoPredicateEvaluator for GeoContains { fn evaluate_prepare_probe( &self, build: &Wkb, - probe: &geo_generic_alg::PreparedGeometry<'static, geo_types::Geometry>, + probe: &geo::PreparedGeometry<'static, geo_types::Geometry>, _distance: Option, ) -> Result { let build_geom = match item_to_geometry(build) { @@ -295,7 +294,7 @@ impl GeoPredicateEvaluator for GeoWithin { fn evaluate_prepare_probe( &self, build: &Wkb, - probe: &geo_generic_alg::PreparedGeometry<'static, geo_types::Geometry>, + probe: &geo::PreparedGeometry<'static, geo_types::Geometry>, _distance: Option, ) -> Result { let build_geom = match item_to_geometry(build) { @@ -320,18 +319,13 @@ impl GeoPredicateEvaluator for GeoDistance { fn evaluate_prepare_probe( &self, build: &Wkb, - probe: &geo_generic_alg::PreparedGeometry<'static, geo_types::Geometry>, + probe: &geo::PreparedGeometry<'static, geo_types::Geometry>, distance: Option, ) -> Result { let Some(distance) = distance else { return Ok(false); }; - let build_geom = match item_to_geometry(build) { - Ok(geom) => geom, - Err(_) => return Ok(false), - }; - let euc = Euclidean; - let dist = euc.distance(&build_geom, probe.geometry()); + let dist = build.distance_ext(probe.geometry()); Ok(dist <= distance) } } @@ -358,7 +352,7 @@ macro_rules! impl_relate_evaluator { fn evaluate_prepare_probe( &self, build: &Wkb, - probe: &geo_generic_alg::PreparedGeometry<'static, geo_types::Geometry>, + probe: &geo::PreparedGeometry<'static, geo_types::Geometry>, _distance: Option, ) -> Result { let build_geom = match item_to_geometry(build) { diff --git a/rust/sedona-spatial-join/src/refine/geos.rs b/rust/sedona-spatial-join/src/refine/geos.rs index b6570b70..9b4bc298 100644 --- a/rust/sedona-spatial-join/src/refine/geos.rs +++ b/rust/sedona-spatial-join/src/refine/geos.rs @@ -24,7 +24,8 @@ use geos::{Geom, PreparedGeometry}; use parking_lot::Mutex; use sedona_common::{sedona_internal_err, ExecutionMode, SpatialJoinOptions}; use sedona_expr::statistics::GeoStatistics; -use wkb::reader::{to_geos::GEOSWkbFactory, Wkb}; +use sedona_geos::wkb_to_geos::GEOSWkbFactory; +use wkb::reader::Wkb; use crate::{ index::IndexQueryResult, diff --git a/rust/sedona-testing/src/create.rs b/rust/sedona-testing/src/create.rs index fd5410ac..9d785d69 100644 --- a/rust/sedona-testing/src/create.rs +++ b/rust/sedona-testing/src/create.rs @@ -20,6 +20,7 @@ use arrow_array::{ArrayRef, BinaryArray, BinaryViewArray}; use datafusion_common::ScalarValue; use datafusion_expr::ColumnarValue; use sedona_schema::datatypes::SedonaType; +use wkb::{writer::WriteOptions, Endianness}; use wkt::Wkt; /// Create a [`ColumnarValue`] array from a sequence of WKT literals @@ -86,7 +87,14 @@ where pub fn make_wkb(wkt_value: &str) -> Vec { let geom = Wkt::::from_str(wkt_value).unwrap(); let mut out: Vec = vec![]; - wkb::writer::write_geometry(&mut out, &geom, Default::default()).unwrap(); + wkb::writer::write_geometry( + &mut out, + &geom, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); out } diff --git a/rust/sedona-testing/src/datagen.rs b/rust/sedona-testing/src/datagen.rs index 4fb73c21..44818603 100644 --- a/rust/sedona-testing/src/datagen.rs +++ b/rust/sedona-testing/src/datagen.rs @@ -40,6 +40,8 @@ use sedona_geometry::types::GeometryTypeId; use sedona_schema::datatypes::{SedonaType, WKB_GEOMETRY}; use std::f64::consts::PI; use std::sync::Arc; +use wkb::writer::WriteOptions; +use wkb::Endianness; /// Builder for generating test data partitions with random geometries. /// @@ -502,7 +504,14 @@ fn generate_random_wkb(rng: &mut R, options: &RandomGeometryOption // Convert geometry to WKB let mut out: Vec = vec![]; - wkb::writer::write_geometry(&mut out, &geometry, Default::default()).unwrap(); + wkb::writer::write_geometry( + &mut out, + &geometry, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .unwrap(); out }