Skip to content

Commit eb47819

Browse files
authored
feat(c/sedona-geos): Implement ST_UnaryUnion (#234)
1 parent 9e3365f commit eb47819

File tree

5 files changed

+181
-1
lines changed

5 files changed

+181
-1
lines changed

c/sedona-geos/benches/geos-functions.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ fn criterion_benchmark(c: &mut Criterion) {
270270
ArrayScalar(Polygon(10), Polygon(500)),
271271
);
272272

273+
benchmark::scalar(c, &f, "geos", "st_unaryunion", Polygon(10));
274+
benchmark::scalar(c, &f, "geos", "st_unaryunion", Polygon(500));
275+
273276
benchmark::scalar(
274277
c,
275278
&f,

c/sedona-geos/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ mod st_isvalid;
3030
mod st_isvalidreason;
3131
mod st_length;
3232
mod st_perimeter;
33+
mod st_unaryunion;
3334
pub mod wkb_to_geos;

c/sedona-geos/src/register.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::{
2121
st_centroid::st_centroid_impl, st_convexhull::st_convex_hull_impl, st_dwithin::st_dwithin_impl,
2222
st_isring::st_is_ring_impl, st_isvalid::st_is_valid_impl,
2323
st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl,
24-
st_perimeter::st_perimeter_impl,
24+
st_perimeter::st_perimeter_impl, st_unaryunion::st_unary_union_impl,
2525
};
2626

2727
use crate::binary_predicates::{
@@ -58,6 +58,7 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
5858
("st_perimeter", st_perimeter_impl()),
5959
("st_symdifference", st_sym_difference_impl()),
6060
("st_touches", st_touches_impl()),
61+
("st_unaryunion", st_unary_union_impl()),
6162
("st_union", st_union_impl()),
6263
("st_within", st_within_impl()),
6364
("st_crosses", st_crosses_impl()),

c/sedona-geos/src/st_unaryunion.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::sync::Arc;
19+
20+
use arrow_array::builder::BinaryBuilder;
21+
use datafusion_common::{DataFusionError, Result};
22+
use datafusion_expr::ColumnarValue;
23+
use geos::Geom;
24+
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
25+
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
26+
use sedona_schema::datatypes::SedonaType;
27+
use sedona_schema::{datatypes::WKB_GEOMETRY, matchers::ArgMatcher};
28+
29+
use crate::executor::GeosExecutor;
30+
31+
/// ST_UnaryUnion() implementation using the geos crate
32+
pub fn st_unary_union_impl() -> ScalarKernelRef {
33+
Arc::new(STUnaryUnion {})
34+
}
35+
36+
#[derive(Debug)]
37+
struct STUnaryUnion {}
38+
39+
impl SedonaScalarKernel for STUnaryUnion {
40+
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
41+
let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY);
42+
matcher.match_args(args)
43+
}
44+
45+
fn invoke_batch(
46+
&self,
47+
arg_types: &[SedonaType],
48+
args: &[ColumnarValue],
49+
) -> Result<ColumnarValue> {
50+
let executor = GeosExecutor::new(arg_types, args);
51+
let mut builder = BinaryBuilder::with_capacity(
52+
executor.num_iterations(),
53+
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
54+
);
55+
executor.execute_wkb_void(|maybe_wkb| {
56+
match maybe_wkb {
57+
Some(wkb) => {
58+
let result_wkb = invoke_scalar(&wkb)?;
59+
builder.append_value(&result_wkb);
60+
}
61+
_ => builder.append_null(),
62+
}
63+
64+
Ok(())
65+
})?;
66+
67+
executor.finish(Arc::new(builder.finish()))
68+
}
69+
}
70+
71+
fn invoke_scalar(geos_geom: &geos::Geometry) -> Result<Vec<u8>> {
72+
let geometry = geos_geom
73+
.unary_union()
74+
.map_err(|e| DataFusionError::Execution(format!("Failed to perform unary union: {e}")))?;
75+
76+
let wkb = geometry
77+
.to_wkb()
78+
.map_err(|e| DataFusionError::Execution(format!("Failed to convert to wkb: {e}")))?;
79+
80+
Ok(wkb.into())
81+
}
82+
83+
#[cfg(test)]
84+
mod tests {
85+
use datafusion_common::ScalarValue;
86+
use rstest::rstest;
87+
use sedona_expr::scalar_udf::SedonaScalarUDF;
88+
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
89+
use sedona_testing::compare::assert_array_equal;
90+
use sedona_testing::create::create_array;
91+
use sedona_testing::testers::ScalarUdfTester;
92+
93+
use super::*;
94+
95+
#[rstest]
96+
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
97+
let udf = SedonaScalarUDF::from_kernel("st_unary_union", st_unary_union_impl());
98+
let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
99+
tester.assert_return_type(WKB_GEOMETRY);
100+
101+
let result = tester
102+
.invoke_scalar(
103+
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((1 0, 2 0, 2 1, 1 1, 1 0)))",
104+
)
105+
.unwrap();
106+
tester.assert_scalar_result_equals(result, "POLYGON ((0 0, 0 1, 1 1, 2 1, 2 0, 1 0, 0 0))");
107+
108+
let result = tester
109+
.invoke_scalar("MULTIPOINT ((0 0), (1 1), (2 2))")
110+
.unwrap();
111+
tester.assert_scalar_result_equals(result, "MULTIPOINT ((0 0), (1 1), (2 2))");
112+
113+
let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
114+
assert!(result.is_null());
115+
116+
let input_wkt = vec![
117+
Some("MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((1 0, 2 0, 2 1, 1 1, 1 0)))"),
118+
Some("LINESTRING (0 0, 1 1, 2 2)"),
119+
Some("POINT EMPTY"),
120+
None,
121+
];
122+
123+
let expected = create_array(
124+
&[
125+
Some("POLYGON ((0 0, 0 1, 1 1, 2 1, 2 0, 1 0, 0 0))"),
126+
Some("LINESTRING (0 0, 1 1, 2 2)"),
127+
Some("POINT EMPTY"),
128+
None,
129+
],
130+
&WKB_GEOMETRY,
131+
);
132+
assert_array_equal(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
133+
}
134+
}

python/sedonadb/tests/functions/test_functions.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,47 @@ def test_st_convexhull(eng, geom, expected):
284284
eng.assert_query_result(f"SELECT ST_ConvexHull({geom_or_null(geom)})", expected)
285285

286286

287+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
288+
@pytest.mark.parametrize(
289+
("geom", "expected"),
290+
[
291+
(None, None),
292+
("POINT (0 0)", "POINT (0 0)"),
293+
("POINT EMPTY", "POINT EMPTY"),
294+
("LINESTRING (0 0, 1 1, 2 2)", "LINESTRING (0 0, 1 1, 2 2)"),
295+
("LINESTRING EMPTY", "LINESTRING EMPTY"),
296+
("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"),
297+
("MULTIPOINT ((0 0), (1 1), (2 2))", "MULTIPOINT (0 0, 1 1, 2 2)"),
298+
(
299+
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((1 0, 2 0, 2 1, 1 1, 1 0)))",
300+
"POLYGON ((0 0, 0 1, 1 1, 2 1, 2 0, 1 0, 0 0))",
301+
),
302+
(
303+
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))",
304+
"MULTIPOLYGON (((0 1, 1 1, 1 0, 0 0, 0 1)), ((2 3, 3 3, 3 2, 2 2, 2 3)))",
305+
),
306+
(
307+
"GEOMETRYCOLLECTION (POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0)), POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0)))",
308+
"POLYGON ((0 0, 0 1, 1 1, 2 1, 2 0, 1 0, 0 0))",
309+
),
310+
],
311+
)
312+
def test_st_unaryunion(eng, geom, expected):
313+
eng = eng.create_or_skip()
314+
315+
if expected is None:
316+
eng.assert_query_result(f"SELECT ST_UnaryUnion({geom_or_null(geom)})", expected)
317+
elif "EMPTY" in expected.upper():
318+
eng.assert_query_result(
319+
f"SELECT ST_IsEmpty(ST_UnaryUnion({geom_or_null(geom)}))", True
320+
)
321+
else:
322+
eng.assert_query_result(
323+
f"SELECT ST_Equals(ST_UnaryUnion({geom_or_null(geom)}), {geom_or_null(expected)})",
324+
True,
325+
)
326+
327+
287328
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
288329
def test_st_makeline(eng):
289330
eng = eng.create_or_skip()

0 commit comments

Comments
 (0)