Skip to content

Commit 6ba858b

Browse files
authored
feat(c/sedona-geos): Implement ST_IsValid using geos library (#229)
1 parent 3d75664 commit 6ba858b

File tree

5 files changed

+187
-0
lines changed

5 files changed

+187
-0
lines changed

c/sedona-geos/benches/geos-functions.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ fn criterion_benchmark(c: &mut Criterion) {
295295
"st_overlaps",
296296
ArrayScalar(Polygon(10), Polygon(500)),
297297
);
298+
benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(10));
299+
benchmark::scalar(c, &f, "geos", "st_isvalid", Polygon(500));
298300
}
299301

300302
criterion_group!(benches, criterion_benchmark);

c/sedona-geos/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ mod st_buffer;
2525
mod st_centroid;
2626
mod st_convexhull;
2727
mod st_dwithin;
28+
mod st_isvalid;
2829
mod st_length;
2930
mod st_perimeter;
3031
pub mod wkb_to_geos;

c/sedona-geos/src/register.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
use sedona_expr::scalar_udf::ScalarKernelRef;
1818

1919
use crate::st_convexhull::st_convex_hull_impl;
20+
use crate::st_isvalid::st_is_valid_impl;
2021
use crate::{
2122
distance::st_distance_impl, st_area::st_area_impl, st_buffer::st_buffer_impl,
2223
st_centroid::st_centroid_impl, st_dwithin::st_dwithin_impl, st_length::st_length_impl,
@@ -56,5 +57,6 @@ pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
5657
("st_within", st_within_impl()),
5758
("st_crosses", st_crosses_impl()),
5859
("st_overlaps", st_overlaps_impl()),
60+
("st_isvalid", st_is_valid_impl()),
5961
]
6062
}

c/sedona-geos/src/st_isvalid.rs

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
use std::sync::Arc;
18+
19+
use arrow_array::builder::BooleanBuilder;
20+
use arrow_schema::DataType;
21+
use datafusion_common::Result;
22+
use datafusion_expr::ColumnarValue;
23+
use geos::Geom;
24+
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
25+
use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher};
26+
27+
use crate::executor::GeosExecutor;
28+
29+
/// ST_IsValid() implementation using the geos crate
30+
pub fn st_is_valid_impl() -> ScalarKernelRef {
31+
Arc::new(STIsValid {})
32+
}
33+
34+
#[derive(Debug)]
35+
struct STIsValid {}
36+
37+
impl SedonaScalarKernel for STIsValid {
38+
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
39+
let matcher = ArgMatcher::new(
40+
vec![ArgMatcher::is_geometry()],
41+
SedonaType::Arrow(DataType::Boolean),
42+
);
43+
44+
matcher.match_args(args)
45+
}
46+
47+
fn invoke_batch(
48+
&self,
49+
arg_types: &[SedonaType],
50+
args: &[ColumnarValue],
51+
) -> Result<ColumnarValue> {
52+
let executor = GeosExecutor::new(arg_types, args);
53+
let mut builder = BooleanBuilder::with_capacity(executor.num_iterations());
54+
executor.execute_wkb_void(|maybe_wkb| {
55+
match maybe_wkb {
56+
Some(wkb) => {
57+
builder.append_value(invoke_scalar(&wkb));
58+
}
59+
_ => builder.append_null(),
60+
}
61+
62+
Ok(())
63+
})?;
64+
65+
executor.finish(Arc::new(builder.finish()))
66+
}
67+
}
68+
69+
fn invoke_scalar(geos_geom: &geos::Geometry) -> bool {
70+
geos_geom.is_valid()
71+
}
72+
73+
#[cfg(test)]
74+
mod tests {
75+
use std::sync::Arc;
76+
77+
use arrow_array::{ArrayRef, BooleanArray};
78+
use arrow_schema::DataType;
79+
use datafusion_common::ScalarValue;
80+
use rstest::rstest;
81+
use sedona_expr::scalar_udf::SedonaScalarUDF;
82+
use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY};
83+
use sedona_testing::testers::ScalarUdfTester;
84+
85+
use super::*;
86+
87+
#[rstest]
88+
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
89+
let udf = SedonaScalarUDF::from_kernel("st_isvalid", st_is_valid_impl());
90+
let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
91+
tester.assert_return_type(DataType::Boolean);
92+
93+
// Valid polygon
94+
let result = tester
95+
.invoke_scalar("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))")
96+
.unwrap();
97+
tester.assert_scalar_result_equals(result, true);
98+
99+
// Invalid polygon (self-intersecting)
100+
let result = tester
101+
.invoke_scalar("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))")
102+
.unwrap();
103+
tester.assert_scalar_result_equals(result, false);
104+
105+
let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
106+
assert!(result.is_null());
107+
108+
let input_wkt = vec![
109+
None,
110+
Some("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))"),
111+
Some("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))"),
112+
Some("LINESTRING (0 0, 1 1)"),
113+
Some("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))"),
114+
];
115+
116+
let expected: ArrayRef = Arc::new(BooleanArray::from(vec![
117+
None,
118+
Some(true),
119+
Some(false),
120+
Some(true),
121+
Some(false),
122+
]));
123+
assert_eq!(&tester.invoke_wkb_array(input_wkt).unwrap(), &expected);
124+
}
125+
}

python/sedonadb/tests/functions/test_functions.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,63 @@ def test_st_centroid(eng, geom, expected):
211211
eng.assert_query_result(f"SELECT ST_Centroid({geom_or_null(geom)})", expected)
212212

213213

214+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
215+
@pytest.mark.parametrize(
216+
("geom", "expected"),
217+
[
218+
(None, None),
219+
("POINT (0 0)", True),
220+
("POINT EMPTY", True),
221+
("LINESTRING (0 0, 1 1)", True),
222+
("LINESTRING (0 0, 1 1, 1 0, 0 1)", True),
223+
(
224+
"MULTILINESTRING ((0 0, 1 1), (0 0, 1 1, 1 0, 0 1))",
225+
True,
226+
),
227+
("LINESTRING EMPTY", True),
228+
# Invalid LineStrings
229+
("LINESTRING (0 0, 0 0)", False), # Degenerate - both points identical
230+
("LINESTRING (0 0, 0 0, 0 0)", False), # All points identical
231+
# Invalid MultiLineStrings
232+
("MULTILINESTRING ((0 0, 0 0), (1 1, 2 2))", False), # Degenerate component
233+
(
234+
"MULTILINESTRING ((0 0, 0 0), (1 1, 1 1))",
235+
False,
236+
), # Multiple degenerate components
237+
("POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))", True),
238+
("POLYGON EMPTY", True),
239+
("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 1, 1 2, 2 2, 2 1, 1 1))", True),
240+
# Invalid Polygons
241+
# Self-intersecting polygon (bowtie)
242+
("POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0))", False),
243+
# Inner ring shares an edge with the outer ring
244+
("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (0 0, 0 1, 1 1, 1 0, 0 0))", False),
245+
# Self-intersecting polygon (figure-8)
246+
("Polygon((0 0, 2 0, 1 1, 2 2, 0 2, 1 1, 0 0))", False),
247+
# Inner ring touches the outer ring at a point
248+
(
249+
"POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0), (1 10, 1 9, 2 9, 2 10, 1 10))",
250+
False,
251+
),
252+
# Overlapping polygons in a multipolygon
253+
(
254+
"MULTIPOLYGON (((0 0, 2 0, 2 2, 0 2, 0 0)), ((1 1, 3 1, 3 3, 1 3, 1 1)))",
255+
False,
256+
),
257+
(
258+
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2 2)))",
259+
True,
260+
),
261+
# Geometry collection with an invalid polygon
262+
("GEOMETRYCOLLECTION (POLYGON ((0 0, 1 1, 0 1, 1 0, 0 0)))", False),
263+
("GEOMETRYCOLLECTION (POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)))", True),
264+
],
265+
)
266+
def test_st_isvalid(eng, geom, expected):
267+
eng = eng.create_or_skip()
268+
eng.assert_query_result(f"SELECT ST_IsValid({geom_or_null(geom)})", expected)
269+
270+
214271
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
215272
@pytest.mark.parametrize(
216273
("geom", "expected"),

0 commit comments

Comments
 (0)