From 09bff9e21160683e4d76b3fbeb298792ed2d6257 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 15:40:47 -0700 Subject: [PATCH 1/8] Fix builder capacity of st_centroid geo --- rust/sedona-geo/src/st_centroid.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rust/sedona-geo/src/st_centroid.rs b/rust/sedona-geo/src/st_centroid.rs index bc6b8188..9b027695 100644 --- a/rust/sedona-geo/src/st_centroid.rs +++ b/rust/sedona-geo/src/st_centroid.rs @@ -54,8 +54,10 @@ impl SedonaScalarKernel for STCentroid { args: &[ColumnarValue], ) -> Result { let executor = WkbExecutor::new(arg_types, args); - let mut builder = - BinaryBuilder::with_capacity(executor.num_iterations(), WKB_MIN_PROBABLE_BYTES); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); executor.execute_wkb_void(|maybe_wkb| { match maybe_wkb { Some(wkb) => { From 91a0b7e1b9d8212caae89a9b16de8ac774ae1558 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 15:41:04 -0700 Subject: [PATCH 2/8] Implement st_buffer geo --- rust/sedona-geo/src/lib.rs | 1 + rust/sedona-geo/src/register.rs | 7 +- rust/sedona-geo/src/st_buffer.rs | 224 +++++++++++++++++++++++++++++++ 3 files changed, 229 insertions(+), 3 deletions(-) create mode 100644 rust/sedona-geo/src/st_buffer.rs diff --git a/rust/sedona-geo/src/lib.rs b/rust/sedona-geo/src/lib.rs index 0a5224ab..315d4054 100644 --- a/rust/sedona-geo/src/lib.rs +++ b/rust/sedona-geo/src/lib.rs @@ -17,6 +17,7 @@ pub mod centroid; pub mod register; mod st_area; +mod st_buffer; mod st_centroid; mod st_distance; mod st_dwithin; diff --git a/rust/sedona-geo/src/register.rs b/rust/sedona-geo/src/register.rs index 9041c2dc..e8118969 100644 --- a/rust/sedona-geo/src/register.rs +++ b/rust/sedona-geo/src/register.rs @@ -21,15 +21,16 @@ use crate::st_intersection_aggr::st_intersection_aggr_impl; use crate::st_line_interpolate_point::st_line_interpolate_point_impl; use crate::st_union_aggr::st_union_aggr_impl; use crate::{ - st_area::st_area_impl, st_centroid::st_centroid_impl, st_distance::st_distance_impl, - st_dwithin::st_dwithin_impl, st_intersects::st_intersects_impl, st_length::st_length_impl, - st_perimeter::st_perimeter_impl, + st_area::st_area_impl, st_buffer::st_buffer_impl, st_centroid::st_centroid_impl, + st_distance::st_distance_impl, st_dwithin::st_dwithin_impl, st_intersects::st_intersects_impl, + st_length::st_length_impl, st_perimeter::st_perimeter_impl, }; pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> { vec![ ("st_intersects", st_intersects_impl()), ("st_area", st_area_impl()), + ("st_buffer", st_buffer_impl()), ("st_centroid", st_centroid_impl()), ("st_distance", st_distance_impl()), ("st_dwithin", st_dwithin_impl()), diff --git a/rust/sedona-geo/src/st_buffer.rs b/rust/sedona-geo/src/st_buffer.rs new file mode 100644 index 00000000..2459e6f8 --- /dev/null +++ b/rust/sedona-geo/src/st_buffer.rs @@ -0,0 +1,224 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow_array::builder::BinaryBuilder; +use arrow_schema::DataType; +use datafusion_common::{error::Result, exec_err, DataFusionError}; +use datafusion_expr::ColumnarValue; +use geo::algorithm::buffer::{Buffer, BufferStyle}; +use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; +use sedona_functions::executor::WkbExecutor; +use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; +use sedona_schema::{ + datatypes::{SedonaType, WKB_GEOMETRY}, + matchers::ArgMatcher, +}; +use wkb::{ + writer::{write_geometry, WriteOptions}, + Endianness, +}; + +/// ST_Centroid() implementation using centroid extraction +pub fn st_buffer_impl() -> ScalarKernelRef { + Arc::new(STBuffer {}) +} + +#[derive(Debug)] +struct STBuffer {} + +impl SedonaScalarKernel for STBuffer { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()], + WKB_GEOMETRY, + ); + + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + // Extract the constant scalar value before looping over the input geometries + let params: Option>; + let arg1 = args[1].cast_to(&DataType::Float64, None)?; + if let ColumnarValue::Scalar(scalar_arg) = &arg1 { + if scalar_arg.is_null() { + params = None; + } else { + let distance = Some(f64::try_from(scalar_arg.clone())?); + params = Some(BufferStyle::new(distance.unwrap())); + } + } else { + return exec_err!("Invalid distance: {:?}", args[1]); + } + + // let executor = GeoTypesExecutor::new(arg_types, args); + let executor = WkbExecutor::new(arg_types, args); + let mut builder = BinaryBuilder::with_capacity( + executor.num_iterations(), + WKB_MIN_PROBABLE_BYTES * executor.num_iterations(), + ); + executor.execute_wkb_void(|maybe_wkb| { + match (maybe_wkb, params.clone()) { + (Some(wkb), Some(params)) => { + invoke_scalar(&wkb, params, &mut builder)?; + builder.append_value([]); + } + _ => builder.append_null(), + } + + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +use wkb::reader::Wkb; +fn invoke_scalar( + wkb: &Wkb, + params: BufferStyle, + writer: &mut impl std::io::Write, +) -> Result<()> { + use crate::to_geo::item_to_geometry; + use geo_types::Polygon; + use sedona_geometry::is_empty::is_geometry_empty; + + // PostGIS returns POLYGON EMPTY for all empty geometries + let is_empty = is_geometry_empty(wkb).map_err(|e| DataFusionError::External(Box::new(e)))?; + if is_empty { + let empty_polygon = Polygon::::empty(); + write_geometry( + writer, + &empty_polygon, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + return Ok(()); + } + + let geom = item_to_geometry(wkb)?; + + let buffer = geom.buffer_with_style(params); + + // Convert type to geo::Geometry + let geometry = geo::Geometry::MultiPolygon(buffer); + + write_geometry( + writer, + &geometry, + &WriteOptions { + endianness: Endianness::LittleEndian, + }, + ) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use arrow_array::ArrayRef; + use datafusion_common::ScalarValue; + use rstest::rstest; + use sedona_expr::scalar_udf::SedonaScalarUDF; + use sedona_schema::datatypes::{WKB_GEOMETRY, WKB_VIEW_GEOMETRY}; + use sedona_testing::compare::assert_array_equal; + use sedona_testing::create::create_array; + use sedona_testing::testers::ScalarUdfTester; + + use super::*; + + #[rstest] + fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) { + let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl()); + let tester = ScalarUdfTester::new( + udf.into(), + vec![sedona_type.clone(), SedonaType::Arrow(DataType::Float64)], + ); + tester.assert_return_type(WKB_GEOMETRY); + + // Check the envelope of the buffers + let envelope_udf = sedona_functions::st_envelope::st_envelope_udf(); + let envelope_tester = ScalarUdfTester::new(envelope_udf.into(), vec![WKB_GEOMETRY]); + + let buffer_result = tester.invoke_scalar_scalar("POINT (1 2)", 2.0).unwrap(); + let envelope_result = envelope_tester.invoke_scalar(buffer_result).unwrap(); + let expected_envelope = "POLYGON((-1 0, -1 4, 3 4, 3 0, -1 0))"; + tester.assert_scalar_result_equals(envelope_result, expected_envelope); + + let result = tester + .invoke_scalar_scalar(ScalarValue::Null, ScalarValue::Null) + .unwrap(); + assert!(result.is_null()); + + let input_wkt = vec![None, Some("POINT (0 0)")]; + let input_dist = 1; + let expected_envelope: ArrayRef = create_array( + &[None, Some("POLYGON((-1 -1, -1 1, 1 1, 1 -1, -1 -1))")], + &WKB_GEOMETRY, + ); + let buffer_result = tester + .invoke_wkb_array_scalar(input_wkt, input_dist) + .unwrap(); + let envelope_result = envelope_tester.invoke_array(buffer_result).unwrap(); + assert_array_equal(&envelope_result, &expected_envelope); + } + + #[test] + fn test_empty_geometry() { + let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl()); + let tester = ScalarUdfTester::new( + udf.into(), + vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)], + ); + + let input_wkt = vec![ + Some("POINT EMPTY"), + Some("LINESTRING EMPTY"), + Some("POLYGON EMPTY"), + Some("MULTIPOINT EMPTY"), + Some("MULTILINESTRING EMPTY"), + Some("MULTIPOLYGON EMPTY"), + Some("GEOMETRYCOLLECTION EMPTY"), + ]; + let input_dist = 2; + + let buffer_result = tester + .invoke_wkb_array_scalar(input_wkt, input_dist) + .unwrap(); + let expected: ArrayRef = create_array( + &[ + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + ], + &WKB_GEOMETRY, + ); + assert_array_equal(&buffer_result, &expected); + } +} From aa4d7042b1c04602d7d0a7c113f6a1e972359af0 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 15:41:42 -0700 Subject: [PATCH 3/8] Copy empty tests to old st_buffer geos --- c/sedona-geos/src/st_buffer.rs | 44 ++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/c/sedona-geos/src/st_buffer.rs b/c/sedona-geos/src/st_buffer.rs index f0e2b650..717e67f6 100644 --- a/c/sedona-geos/src/st_buffer.rs +++ b/c/sedona-geos/src/st_buffer.rs @@ -18,8 +18,8 @@ use std::sync::Arc; use arrow_array::builder::BinaryBuilder; use arrow_schema::DataType; -use datafusion_common::error::Result; use datafusion_common::DataFusionError; +use datafusion_common::{error::Result, exec_err}; use datafusion_expr::ColumnarValue; use geos::{BufferParams, Geom}; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; @@ -71,10 +71,7 @@ impl SedonaScalarKernel for STBuffer { distance = Some(f64::try_from(scalar_arg.clone())?); } } else { - return Err(DataFusionError::Execution(format!( - "Invalid distance: {:?}", - args[1] - ))); + return exec_err!("Invalid distance: {:?}", args[1]); } let executor = GeosExecutor::new(arg_types, args); @@ -163,4 +160,41 @@ mod tests { let envelope_result = envelope_tester.invoke_array(buffer_result).unwrap(); assert_array_equal(&envelope_result, &expected_envelope); } + + #[test] + fn test_empty_geometry() { + let udf = SedonaScalarUDF::from_kernel("st_buffer", st_buffer_impl()); + let tester = ScalarUdfTester::new( + udf.into(), + vec![WKB_GEOMETRY, SedonaType::Arrow(DataType::Float64)], + ); + + let input_wkt = vec![ + Some("POINT EMPTY"), + Some("LINESTRING EMPTY"), + Some("POLYGON EMPTY"), + Some("MULTIPOINT EMPTY"), + Some("MULTILINESTRING EMPTY"), + Some("MULTIPOLYGON EMPTY"), + Some("GEOMETRYCOLLECTION EMPTY"), + ]; + let input_dist = 2; + + let buffer_result = tester + .invoke_wkb_array_scalar(input_wkt, input_dist) + .unwrap(); + let expected: ArrayRef = create_array( + &[ + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + Some("POLYGON EMPTY"), + ], + &WKB_GEOMETRY, + ); + assert_array_equal(&buffer_result, &expected); + } } From faff21d922a8c1833f896b82eaa8755108a8ddcb Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 15:42:21 -0700 Subject: [PATCH 4/8] Add test_st_buffer_empty to python tests --- .../tests/functions/test_functions.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/python/sedonadb/tests/functions/test_functions.py b/python/sedonadb/tests/functions/test_functions.py index 5ddd5f9f..151f4ed8 100644 --- a/python/sedonadb/tests/functions/test_functions.py +++ b/python/sedonadb/tests/functions/test_functions.py @@ -172,7 +172,30 @@ def test_st_buffer(eng, geom, dist, expected_area): eng.assert_query_result( f"SELECT ST_Area(ST_Buffer({geom_or_null(geom)}, {val_or_null(dist)}))", expected_area, - numeric_epsilon=1e-9, + # geos passes with 1e-9, but geo needs it as high as 1e-3 + numeric_epsilon=1e-3, + ) + + +@pytest.mark.parametrize("eng", [SedonaDB, PostGIS]) +@pytest.mark.parametrize( + ("geom", "dist", "expected"), + [ + ("POINT EMPTY", 2.0, "POLYGON EMPTY"), + ("LINESTRING EMPTY", 1.5, "POLYGON EMPTY"), + ("POLYGON EMPTY", 0.5, "POLYGON EMPTY"), + ("MULTIPOINT EMPTY", 1.0, "POLYGON EMPTY"), + ("MULTILINESTRING EMPTY", 1.0, "POLYGON EMPTY"), + ("MULTIPOLYGON EMPTY", 1.0, "POLYGON EMPTY"), + ("GEOMETRYCOLLECTION EMPTY", 1.0, "POLYGON EMPTY"), + ], +) +def test_st_buffer_empty(eng, geom, dist, expected): + eng = SedonaDB.create_or_skip() + + eng.assert_query_result( + f"SELECT ST_Buffer({geom_or_null(geom)}, {val_or_null(dist)})", + expected, ) From b841b0ecf1266c2d635957f59b044077469b068c Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 15:45:52 -0700 Subject: [PATCH 5/8] Add benches for geo --- rust/sedona-geo/benches/geo-functions.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/rust/sedona-geo/benches/geo-functions.rs b/rust/sedona-geo/benches/geo-functions.rs index 2630e6c9..61cbbc3b 100644 --- a/rust/sedona-geo/benches/geo-functions.rs +++ b/rust/sedona-geo/benches/geo-functions.rs @@ -27,6 +27,21 @@ fn criterion_benchmark(c: &mut Criterion) { benchmark::scalar(c, &f, "geo", "st_area", Polygon(10)); benchmark::scalar(c, &f, "geo", "st_area", Polygon(500)); + benchmark::scalar( + c, + &f, + "geo", + "st_buffer", + ArrayScalar(Polygon(10), Float64(1.0, 10.0)), + ); + benchmark::scalar( + c, + &f, + "geo", + "st_buffer", + ArrayScalar(Polygon(500), Float64(1.0, 10.0)), + ); + benchmark::scalar(c, &f, "geo", "st_perimeter", Polygon(10)); benchmark::scalar(c, &f, "geo", "st_perimeter", Polygon(500)); From 0046d5cb58ff497649ee0b0fe67c46f9da0feee9 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 16:09:29 -0700 Subject: [PATCH 6/8] clippy: remove unwrap call --- rust/sedona-geo/src/st_buffer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/sedona-geo/src/st_buffer.rs b/rust/sedona-geo/src/st_buffer.rs index 2459e6f8..7dbce0a4 100644 --- a/rust/sedona-geo/src/st_buffer.rs +++ b/rust/sedona-geo/src/st_buffer.rs @@ -64,8 +64,8 @@ impl SedonaScalarKernel for STBuffer { if scalar_arg.is_null() { params = None; } else { - let distance = Some(f64::try_from(scalar_arg.clone())?); - params = Some(BufferStyle::new(distance.unwrap())); + let distance = f64::try_from(scalar_arg.clone())?; + params = Some(BufferStyle::new(distance)); } } else { return exec_err!("Invalid distance: {:?}", args[1]); From eace153a7dac77a130b8db1b6dd0a3ca70c65f26 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 16:18:57 -0700 Subject: [PATCH 7/8] clean up --- rust/sedona-geo/src/st_buffer.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/rust/sedona-geo/src/st_buffer.rs b/rust/sedona-geo/src/st_buffer.rs index 7dbce0a4..4d2cf1bb 100644 --- a/rust/sedona-geo/src/st_buffer.rs +++ b/rust/sedona-geo/src/st_buffer.rs @@ -22,8 +22,10 @@ use arrow_schema::DataType; use datafusion_common::{error::Result, exec_err, DataFusionError}; use datafusion_expr::ColumnarValue; use geo::algorithm::buffer::{Buffer, BufferStyle}; +use geo_types::Polygon; use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel}; use sedona_functions::executor::WkbExecutor; +use sedona_geometry::is_empty::is_geometry_empty; use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES; use sedona_schema::{ datatypes::{SedonaType, WKB_GEOMETRY}, @@ -34,7 +36,9 @@ use wkb::{ Endianness, }; -/// ST_Centroid() implementation using centroid extraction +use crate::to_geo::item_to_geometry; + +/// ST_Buffer() implementation using buffer calculation pub fn st_buffer_impl() -> ScalarKernelRef { Arc::new(STBuffer {}) } @@ -71,7 +75,6 @@ impl SedonaScalarKernel for STBuffer { return exec_err!("Invalid distance: {:?}", args[1]); } - // let executor = GeoTypesExecutor::new(arg_types, args); let executor = WkbExecutor::new(arg_types, args); let mut builder = BinaryBuilder::with_capacity( executor.num_iterations(), @@ -99,10 +102,6 @@ fn invoke_scalar( params: BufferStyle, writer: &mut impl std::io::Write, ) -> Result<()> { - use crate::to_geo::item_to_geometry; - use geo_types::Polygon; - use sedona_geometry::is_empty::is_geometry_empty; - // PostGIS returns POLYGON EMPTY for all empty geometries let is_empty = is_geometry_empty(wkb).map_err(|e| DataFusionError::External(Box::new(e)))?; if is_empty { From 7e600ebb614750c9fa3c421be0e5ef03db40d651 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 19 Oct 2025 16:20:14 -0700 Subject: [PATCH 8/8] Move import --- rust/sedona-geo/src/st_buffer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/sedona-geo/src/st_buffer.rs b/rust/sedona-geo/src/st_buffer.rs index 4d2cf1bb..7eebec65 100644 --- a/rust/sedona-geo/src/st_buffer.rs +++ b/rust/sedona-geo/src/st_buffer.rs @@ -32,6 +32,7 @@ use sedona_schema::{ matchers::ArgMatcher, }; use wkb::{ + reader::Wkb, writer::{write_geometry, WriteOptions}, Endianness, }; @@ -96,7 +97,6 @@ impl SedonaScalarKernel for STBuffer { } } -use wkb::reader::Wkb; fn invoke_scalar( wkb: &Wkb, params: BufferStyle,