@@ -26,7 +26,7 @@ use datafusion_physical_expr::{
2626use geo_traits:: Dimensions ;
2727use sedona_common:: sedona_internal_err;
2828use sedona_geometry:: { bounding_box:: BoundingBox , bounds:: wkb_bounds_xy, interval:: IntervalTrait } ;
29- use sedona_schema:: datatypes:: SedonaType ;
29+ use sedona_schema:: { datatypes:: SedonaType , matchers :: ArgMatcher } ;
3030
3131use crate :: {
3232 statistics:: GeoStatistics ,
@@ -185,6 +185,9 @@ impl SpatialFilter {
185185 match ( & args[ 0 ] , & args[ 1 ] ) {
186186 ( ArgRef :: Col ( column) , ArgRef :: Lit ( literal) )
187187 | ( ArgRef :: Lit ( literal) , ArgRef :: Col ( column) ) => {
188+ if !is_prunable_geospatial_literal ( literal) {
189+ return Ok ( Some ( Self :: Unknown ) ) ;
190+ }
188191 match literal_bounds ( literal) {
189192 Ok ( literal_bounds) => {
190193 Ok ( Some ( Self :: Intersects ( column. clone ( ) , literal_bounds) ) )
@@ -204,6 +207,9 @@ impl SpatialFilter {
204207 match ( & args[ 0 ] , & args[ 1 ] ) {
205208 ( ArgRef :: Col ( column) , ArgRef :: Lit ( literal) ) => {
206209 // column within/covered_by literal -> Intersects filter
210+ if !is_prunable_geospatial_literal ( literal) {
211+ return Ok ( Some ( Self :: Unknown ) ) ;
212+ }
207213 match literal_bounds ( literal) {
208214 Ok ( literal_bounds) => {
209215 Ok ( Some ( Self :: Intersects ( column. clone ( ) , literal_bounds) ) )
@@ -213,6 +219,9 @@ impl SpatialFilter {
213219 }
214220 ( ArgRef :: Lit ( literal) , ArgRef :: Col ( column) ) => {
215221 // literal within/covered_by column -> Covers filter
222+ if !is_prunable_geospatial_literal ( literal) {
223+ return Ok ( Some ( Self :: Unknown ) ) ;
224+ }
216225 match literal_bounds ( literal) {
217226 Ok ( literal_bounds) => {
218227 Ok ( Some ( Self :: Covers ( column. clone ( ) , literal_bounds) ) )
@@ -233,6 +242,9 @@ impl SpatialFilter {
233242 ( ArgRef :: Col ( column) , ArgRef :: Lit ( literal) ) => {
234243 // column contains/covers literal -> Covers filter
235244 // (column's bbox must fully cover literal's bbox)
245+ if !is_prunable_geospatial_literal ( literal) {
246+ return Ok ( Some ( Self :: Unknown ) ) ;
247+ }
236248 match literal_bounds ( literal) {
237249 Ok ( literal_bounds) => {
238250 Ok ( Some ( Self :: Covers ( column. clone ( ) , literal_bounds) ) )
@@ -243,6 +255,9 @@ impl SpatialFilter {
243255 ( ArgRef :: Lit ( literal) , ArgRef :: Col ( column) ) => {
244256 // literal contains/covers column -> Intersects filter
245257 // (if literal contains column, they must at least intersect)
258+ if !is_prunable_geospatial_literal ( literal) {
259+ return Ok ( Some ( Self :: Unknown ) ) ;
260+ }
246261 match literal_bounds ( literal) {
247262 Ok ( literal_bounds) => {
248263 Ok ( Some ( Self :: Intersects ( column. clone ( ) , literal_bounds) ) )
@@ -284,6 +299,9 @@ impl SpatialFilter {
284299 match ( & args[ 0 ] , & args[ 1 ] , & args[ 2 ] ) {
285300 ( ArgRef :: Col ( column) , ArgRef :: Lit ( literal) , ArgRef :: Lit ( distance) )
286301 | ( ArgRef :: Lit ( literal) , ArgRef :: Col ( column) , ArgRef :: Lit ( distance) ) => {
302+ if !is_prunable_geospatial_literal ( literal) {
303+ return Ok ( Some ( Self :: Unknown ) ) ;
304+ }
287305 match (
288306 literal_bounds ( literal) ,
289307 distance. value ( ) . cast_to ( & DataType :: Float64 ) ?,
@@ -314,6 +332,19 @@ enum ArgRef<'a> {
314332 Other ,
315333}
316334
335+ /// Our current spatial data pruning implementation does not correctly handle geography data.
336+ /// We therefore only consider geometry data type for pruning.
337+ fn is_prunable_geospatial_literal ( literal : & Literal ) -> bool {
338+ let Ok ( literal_field) = literal. return_field ( & Schema :: empty ( ) ) else {
339+ return false ;
340+ } ;
341+ let Ok ( sedona_type) = SedonaType :: from_storage_field ( & literal_field) else {
342+ return false ;
343+ } ;
344+ let matcher = ArgMatcher :: is_geometry ( ) ;
345+ matcher. match_type ( & sedona_type)
346+ }
347+
317348fn literal_bounds ( literal : & Literal ) -> Result < BoundingBox > {
318349 let literal_field = literal. return_field ( & Schema :: empty ( ) ) ?;
319350 let sedona_type = SedonaType :: from_storage_field ( & literal_field) ?;
@@ -348,12 +379,11 @@ fn parse_args(args: &[Arc<dyn PhysicalExpr>]) -> Vec<ArgRef<'_>> {
348379
349380#[ cfg( test) ]
350381mod test {
351-
352382 use arrow_schema:: { DataType , Field } ;
353383 use datafusion_expr:: { ScalarUDF , Signature , SimpleScalarUDF , Volatility } ;
354384 use rstest:: rstest;
355385 use sedona_geometry:: { bounding_box:: BoundingBox , interval:: Interval } ;
356- use sedona_schema:: datatypes:: WKB_GEOMETRY ;
386+ use sedona_schema:: datatypes:: { WKB_GEOGRAPHY , WKB_GEOMETRY } ;
357387 use sedona_testing:: create:: create_scalar;
358388
359389 use super :: * ;
@@ -806,6 +836,111 @@ mod test {
806836 ) ) ;
807837 }
808838
839+ #[ rstest]
840+ fn range_predicate_involving_geography_should_be_transformed_to_unknown (
841+ #[ values(
842+ "st_intersects" ,
843+ "st_equals" ,
844+ "st_touches" ,
845+ "st_contains" ,
846+ "st_covers" ,
847+ "st_within" ,
848+ "st_covered_by" ,
849+ "st_coveredby"
850+ ) ]
851+ func_name : & str ,
852+ ) {
853+ let column: Arc < dyn PhysicalExpr > = Arc :: new ( Column :: new ( "geometry" , 0 ) ) ;
854+ let storage_field = WKB_GEOGRAPHY . to_storage_field ( "" , true ) . unwrap ( ) ;
855+ let literal: Arc < dyn PhysicalExpr > = Arc :: new ( Literal :: new_with_metadata (
856+ create_scalar ( Some ( "POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))" ) , & WKB_GEOGRAPHY ) ,
857+ Some ( storage_field. metadata ( ) . into ( ) ) ,
858+ ) ) ;
859+
860+ let func = create_dummy_spatial_function ( func_name, 2 ) ;
861+ let expr: Arc < dyn PhysicalExpr > = Arc :: new ( ScalarFunctionExpr :: new (
862+ func_name,
863+ Arc :: new ( func. clone ( ) ) ,
864+ vec ! [ column. clone( ) , literal. clone( ) ] ,
865+ Arc :: new ( Field :: new ( "" , DataType :: Boolean , true ) ) ,
866+ ) ) ;
867+ let predicate = SpatialFilter :: try_from_expr ( & expr) . unwrap ( ) ;
868+ assert ! (
869+ matches!( predicate, SpatialFilter :: Unknown ) ,
870+ "Function {func_name} involving geography should produce Unknown filter"
871+ ) ;
872+ }
873+
874+ #[ test]
875+ fn distance_predicate_involving_geography_should_be_transformed_to_unknown ( ) {
876+ let column: Arc < dyn PhysicalExpr > = Arc :: new ( Column :: new ( "geometry" , 0 ) ) ;
877+ let storage_field = WKB_GEOGRAPHY . to_storage_field ( "" , true ) . unwrap ( ) ;
878+ let literal: Arc < dyn PhysicalExpr > = Arc :: new ( Literal :: new_with_metadata (
879+ create_scalar ( Some ( "POINT (1 2)" ) , & WKB_GEOGRAPHY ) ,
880+ Some ( storage_field. metadata ( ) . into ( ) ) ,
881+ ) ) ;
882+ let distance_literal: Arc < dyn PhysicalExpr > =
883+ Arc :: new ( Literal :: new ( ScalarValue :: Float64 ( Some ( 100.0 ) ) ) ) ;
884+
885+ // Test ST_DWithin function
886+ let st_dwithin = create_dummy_spatial_function ( "st_dwithin" , 3 ) ;
887+ let dwithin_expr: Arc < dyn PhysicalExpr > = Arc :: new ( ScalarFunctionExpr :: new (
888+ "st_dwithin" ,
889+ Arc :: new ( st_dwithin. clone ( ) ) ,
890+ vec ! [ column. clone( ) , literal. clone( ) , distance_literal. clone( ) ] ,
891+ Arc :: new ( Field :: new ( "" , DataType :: Boolean , true ) ) ,
892+ ) ) ;
893+ let predicate = SpatialFilter :: try_from_expr ( & dwithin_expr) . unwrap ( ) ;
894+ assert ! (
895+ matches!( predicate, SpatialFilter :: Unknown ) ,
896+ "ST_DWithin involving geography should produce Unknown filter"
897+ ) ;
898+
899+ // Test ST_DWithin with reversed geometry arguments
900+ let dwithin_expr_reversed: Arc < dyn PhysicalExpr > = Arc :: new ( ScalarFunctionExpr :: new (
901+ "st_dwithin" ,
902+ Arc :: new ( st_dwithin) ,
903+ vec ! [ literal. clone( ) , column. clone( ) , distance_literal. clone( ) ] ,
904+ Arc :: new ( Field :: new ( "" , DataType :: Boolean , true ) ) ,
905+ ) ) ;
906+ let predicate_reversed = SpatialFilter :: try_from_expr ( & dwithin_expr_reversed) . unwrap ( ) ;
907+ assert ! (
908+ matches!( predicate_reversed, SpatialFilter :: Unknown ) ,
909+ "ST_DWithin involving geography should produce Unknown filter"
910+ ) ;
911+
912+ // Test ST_Distance <= threshold
913+ let st_distance = create_dummy_spatial_function ( "st_distance" , 2 ) ;
914+ let distance_expr: Arc < dyn PhysicalExpr > = Arc :: new ( ScalarFunctionExpr :: new (
915+ "st_distance" ,
916+ Arc :: new ( st_distance. clone ( ) ) ,
917+ vec ! [ column. clone( ) , literal. clone( ) ] ,
918+ Arc :: new ( Field :: new ( "" , DataType :: Boolean , true ) ) ,
919+ ) ) ;
920+ let comparison_expr: Arc < dyn PhysicalExpr > = Arc :: new ( BinaryExpr :: new (
921+ distance_expr. clone ( ) ,
922+ Operator :: LtEq ,
923+ distance_literal. clone ( ) ,
924+ ) ) ;
925+ let predicate = SpatialFilter :: try_from_expr ( & comparison_expr) . unwrap ( ) ;
926+ assert ! (
927+ matches!( predicate, SpatialFilter :: Unknown ) ,
928+ "ST_Distance <= threshold involving geography should produce Unknown filter"
929+ ) ;
930+
931+ // Test threshold >= ST_Distance
932+ let comparison_expr_reversed: Arc < dyn PhysicalExpr > = Arc :: new ( BinaryExpr :: new (
933+ distance_literal. clone ( ) ,
934+ Operator :: GtEq ,
935+ distance_expr. clone ( ) ,
936+ ) ) ;
937+ let predicate_reversed = SpatialFilter :: try_from_expr ( & comparison_expr_reversed) . unwrap ( ) ;
938+ assert ! (
939+ matches!( predicate_reversed, SpatialFilter :: Unknown ) ,
940+ "threshold >= ST_Distance involving geography should produce Unknown filter"
941+ ) ;
942+ }
943+
809944 #[ test]
810945 fn predicate_from_expr_has_z ( ) {
811946 let column: Arc < dyn PhysicalExpr > = Arc :: new ( Column :: new ( "geometry" , 0 ) ) ;
0 commit comments