Skip to content

Commit 8e6234c

Browse files
authored
feat(c/sedona-geos): Implement ST_SimplifyPreserveTopology using geos (#243)
1 parent b8ac5ef commit 8e6234c

File tree

4 files changed

+287
-3
lines changed

4 files changed

+287
-3
lines changed

c/sedona-geos/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,6 @@ mod st_isvalid;
3131
mod st_isvalidreason;
3232
mod st_length;
3333
mod st_perimeter;
34+
mod st_simplifypreservetopology;
3435
mod st_unaryunion;
3536
pub mod wkb_to_geos;

c/sedona-geos/src/register.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ use crate::{
2121
st_centroid::st_centroid_impl, st_convexhull::st_convex_hull_impl, st_dwithin::st_dwithin_impl,
2222
st_isring::st_is_ring_impl, st_issimple::st_is_simple_impl, st_isvalid::st_is_valid_impl,
2323
st_isvalidreason::st_is_valid_reason_impl, st_length::st_length_impl,
24-
st_perimeter::st_perimeter_impl, st_unaryunion::st_unary_union_impl,
24+
st_perimeter::st_perimeter_impl,
25+
st_simplifypreservetopology::st_simplify_preserve_topology_impl,
26+
st_unaryunion::st_unary_union_impl,
2527
};
2628

2729
use crate::binary_predicates::{
@@ -57,12 +59,14 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
5759
("st_length", st_length_impl()),
5860
("st_overlaps", st_overlaps_impl()),
5961
("st_perimeter", st_perimeter_impl()),
62+
(
63+
"st_simplifypreservetopology",
64+
st_simplify_preserve_topology_impl(),
65+
),
6066
("st_symdifference", st_sym_difference_impl()),
6167
("st_touches", st_touches_impl()),
6268
("st_unaryunion", st_unary_union_impl()),
6369
("st_union", st_union_impl()),
6470
("st_within", st_within_impl()),
65-
("st_crosses", st_crosses_impl()),
66-
("st_overlaps", st_overlaps_impl()),
6771
]
6872
}
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::sync::Arc;
19+
20+
use arrow_array::builder::BinaryBuilder;
21+
use arrow_schema::DataType;
22+
use datafusion_common::cast::as_float64_array;
23+
use datafusion_common::error::Result;
24+
use datafusion_common::DataFusionError;
25+
use datafusion_expr::ColumnarValue;
26+
use geos::Geom;
27+
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
28+
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
29+
use sedona_schema::{
30+
datatypes::{SedonaType, WKB_GEOMETRY},
31+
matchers::ArgMatcher,
32+
};
33+
34+
use crate::executor::GeosExecutor;
35+
36+
/// ST_SimplifyPreserveTopology() implementation using the geos crate
37+
pub fn st_simplify_preserve_topology_impl() -> ScalarKernelRef {
38+
Arc::new(STSimplifyPreserveTopology {})
39+
}
40+
41+
#[derive(Debug)]
42+
struct STSimplifyPreserveTopology {}
43+
44+
impl SedonaScalarKernel for STSimplifyPreserveTopology {
45+
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
46+
let matcher = ArgMatcher::new(
47+
vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
48+
WKB_GEOMETRY,
49+
);
50+
51+
matcher.match_args(args)
52+
}
53+
54+
fn invoke_batch(
55+
&self,
56+
arg_types: &[SedonaType],
57+
args: &[ColumnarValue],
58+
) -> Result<ColumnarValue> {
59+
let executor = GeosExecutor::new(arg_types, args);
60+
61+
let tolerance_value = args[1]
62+
.cast_to(&DataType::Float64, None)?
63+
.to_array(executor.num_iterations())?;
64+
let tolerance_array = as_float64_array(&tolerance_value)?;
65+
66+
let mut tolerance_iter = tolerance_array.iter();
67+
68+
let mut builder = BinaryBuilder::with_capacity(
69+
executor.num_iterations(),
70+
WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
71+
);
72+
73+
executor.execute_wkb_void(|wkb| {
74+
match (wkb, tolerance_iter.next().unwrap()) {
75+
(Some(wkb), Some(tolerance)) => {
76+
invoke_scalar(&wkb, tolerance, &mut builder)?;
77+
builder.append_value([]);
78+
}
79+
_ => builder.append_null(),
80+
}
81+
Ok(())
82+
})?;
83+
84+
executor.finish(Arc::new(builder.finish()))
85+
}
86+
}
87+
88+
fn invoke_scalar(
89+
geos_geom: &geos::Geometry,
90+
tolerance: f64,
91+
writer: &mut impl std::io::Write,
92+
) -> Result<()> {
93+
let geometry = geos_geom
94+
.topology_preserve_simplify(tolerance)
95+
.map_err(|e| DataFusionError::Execution(format!("Failed to simplify geometry: {e}")))?;
96+
97+
let wkb = geometry
98+
.to_wkb()
99+
.map_err(|e| DataFusionError::Execution(format!("Failed to convert to wkb: {e}")))?;
100+
101+
writer.write_all(wkb.as_ref())?;
102+
Ok(())
103+
}
104+
105+
#[cfg(test)]
106+
mod tests {
107+
use datafusion_common::ScalarValue;
108+
use rstest::rstest;
109+
use sedona_expr::scalar_udf::SedonaScalarUDF;
110+
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
111+
use sedona_testing::compare::assert_array_equal;
112+
use sedona_testing::create::create_array;
113+
use sedona_testing::testers::ScalarUdfTester;
114+
115+
use super::*;
116+
117+
#[rstest]
118+
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
119+
let udf = SedonaScalarUDF::from_kernel(
120+
"st_simplifypreservetopology",
121+
st_simplify_preserve_topology_impl(),
122+
);
123+
let tester = ScalarUdfTester::new(
124+
udf.into(),
125+
vec![sedona_type.clone(), SedonaType::Arrow(DataType::Float64)],
126+
);
127+
tester.assert_return_type(WKB_GEOMETRY);
128+
129+
// Should remove the point (0 10)
130+
let result = tester
131+
.invoke_scalar_scalar("LINESTRING(0 0, 0 10, 0 51, 50 20, 30 20, 7 32)", 2.0)
132+
.unwrap();
133+
tester.assert_scalar_result_equals(result, "LINESTRING(0 0,0 51,50 20,30 20,7 32)");
134+
135+
// Short linestring should preserve endpoints
136+
let result = tester
137+
.invoke_scalar_scalar("LINESTRING(0 0,0 10)", 2.0)
138+
.unwrap();
139+
tester.assert_scalar_result_equals(result, "LINESTRING(0 0,0 10)");
140+
141+
// Null geometry
142+
let result = tester.invoke_scalar_scalar(ScalarValue::Null, 1.0).unwrap();
143+
assert!(result.is_null());
144+
145+
// Null tolerance
146+
let result = tester
147+
.invoke_scalar_scalar("LINESTRING(0 0,0 10)", ScalarValue::Null)
148+
.unwrap();
149+
assert!(result.is_null());
150+
151+
// Array processing with tolerance 2.0
152+
let input_wkt_t2 = vec![
153+
Some("LINESTRING(0 0, 0 10, 0 51, 50 20, 30 20, 7 32)"),
154+
Some("POLYGON((0 0, 0 10, 0 11, 0 20, 10 20, 10 0, 0 0))"),
155+
Some("LINESTRING EMPTY"),
156+
Some("POINT EMPTY"),
157+
Some("POLYGON EMPTY"),
158+
None,
159+
];
160+
161+
let expected_t2 = create_array(
162+
&[
163+
Some("LINESTRING(0 0,0 51,50 20,30 20,7 32)"),
164+
Some("POLYGON((0 0,0 20,10 20,10 0,0 0))"),
165+
Some("LINESTRING EMPTY"),
166+
Some("POINT EMPTY"),
167+
Some("POLYGON EMPTY"),
168+
None,
169+
],
170+
&WKB_GEOMETRY,
171+
);
172+
assert_array_equal(
173+
&tester.invoke_wkb_array_scalar(input_wkt_t2, 2.0).unwrap(),
174+
&expected_t2,
175+
);
176+
177+
// Array processing with tolerance 20.0
178+
let input_wkt_t20 = vec![
179+
Some("LINESTRING(0 0,0 10)"),
180+
Some("POLYGON((0 0, 10 0, 10 10, 0 10, 0 0),(5 5, 5 6, 6 6, 8 5, 5 5))"),
181+
Some("MULTIPOLYGON(((100 100, 100 130, 130 130, 130 100, 100 100)),((0 0, 10 0, 10 10, 0 10, 0 0),(5 5, 5 6, 6 6, 8 5, 5 5)))"),
182+
];
183+
184+
let expected_t20 = create_array(
185+
&[
186+
Some("LINESTRING(0 0,0 10)"),
187+
Some("POLYGON((0 0,10 0,10 10,0 10,0 0),(5 5,5 6,8 5,5 5))"),
188+
Some("MULTIPOLYGON(((100 100,100 130,130 130,130 100,100 100)),((0 0,10 0,10 10,0 10,0 0),(5 5,5 6,8 5,5 5)))"),
189+
],
190+
&WKB_GEOMETRY,
191+
);
192+
assert_array_equal(
193+
&tester.invoke_wkb_array_scalar(input_wkt_t20, 20.0).unwrap(),
194+
&expected_t20,
195+
);
196+
197+
// Test array tolerance input - same geometry with different tolerance values
198+
let input_wkt_array = vec![
199+
Some("LINESTRING (0 0, 0 10, 0 51, 50 20, 30 20, 7 32)"),
200+
Some("LINESTRING (0 0, 0 10, 0 51, 50 20, 30 20, 7 32)"),
201+
Some("LINESTRING (0 0, 0 10, 0 51, 50 20, 30 20, 7 32)"),
202+
None,
203+
];
204+
205+
use arrow_array::Float64Array;
206+
let tolerance_array: Arc<Float64Array> = Arc::new(Float64Array::from(vec![
207+
Some(2.0),
208+
Some(10.0),
209+
Some(50.0),
210+
Some(5.0),
211+
]));
212+
213+
let expected_array = create_array(
214+
&[
215+
Some("LINESTRING (0 0, 0 51, 50 20, 30 20, 7 32)"), // Tolerance 2.0
216+
Some("LINESTRING (0 0, 0 51, 50 20, 7 32)"), // Tolerance 10.0
217+
Some("LINESTRING (0 0, 7 32)"), // Tolerance 50.0
218+
None,
219+
],
220+
&WKB_GEOMETRY,
221+
);
222+
223+
assert_array_equal(
224+
&tester
225+
.invoke_arrays(vec![
226+
create_array(&input_wkt_array, &sedona_type),
227+
tolerance_array,
228+
])
229+
.unwrap(),
230+
&expected_array,
231+
);
232+
}
233+
}

python/sedonadb/tests/functions/test_functions.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,3 +1399,49 @@ def test_st_isvalidreason(eng, geom, expected):
13991399
else:
14001400
query = f"SELECT ST_IsValidReason({geom_or_null(geom)})"
14011401
eng.assert_query_result(query, expected)
1402+
1403+
1404+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
1405+
@pytest.mark.parametrize(
1406+
("geom", "tolerance", "expected"),
1407+
[
1408+
# removes intermediate point
1409+
(
1410+
"LINESTRING (0 0, 0 10, 0 51, 50 20, 30 20, 7 32)",
1411+
2,
1412+
"LINESTRING (0 0, 0 51, 50 20, 30 20, 7 32)",
1413+
),
1414+
# Short linestring preserves endpoints
1415+
(
1416+
"LINESTRING (0 0, 0 10)",
1417+
20,
1418+
"LINESTRING (0 0, 0 10)",
1419+
),
1420+
# Null handling
1421+
(None, 2, None),
1422+
(None, None, None),
1423+
("LINESTRING (0 0, 0 10)", None, None),
1424+
# Empty geometries
1425+
("LINESTRING EMPTY", 2, "LINESTRING EMPTY"),
1426+
("POINT EMPTY", 2, "POINT (nan nan)"),
1427+
("POLYGON EMPTY", 2, "POLYGON EMPTY"),
1428+
# inner ring simplified
1429+
(
1430+
"POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (5 5, 5 6, 6 6, 8 5, 5 5))",
1431+
20,
1432+
"POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (5 5, 5 6, 8 5, 5 5))",
1433+
),
1434+
# second polygon's inner ring simplified
1435+
(
1436+
"MULTIPOLYGON (((100 100, 100 130, 130 130, 130 100, 100 100)), ((0 0, 10 0, 10 10, 0 10, 0 0), (5 5, 5 6, 6 6, 8 5, 5 5)))",
1437+
20,
1438+
"MULTIPOLYGON (((100 100, 100 130, 130 130, 130 100, 100 100)), ((0 0, 10 0, 10 10, 0 10, 0 0), (5 5, 5 6, 8 5, 5 5)))",
1439+
),
1440+
],
1441+
)
1442+
def test_st_simplifypreservetopology(eng, geom, tolerance, expected):
1443+
eng = eng.create_or_skip()
1444+
eng.assert_query_result(
1445+
f"SELECT ST_SimplifyPreserveTopology({geom_or_null(geom)}, {val_or_null(tolerance)})",
1446+
expected,
1447+
)

0 commit comments

Comments
 (0)