Skip to content

Commit 3ed337e

Browse files
authored
Fix distinguishing the joined geospatial types for ST_KNN (#87)
This PR fixes a regression introduced by #57, which added geography type checking to prevent optimized spatial joins from being used with unsupported geography types. The problematic patch incorrectly matches `left` and `right` expressions in `KNNPredicate` to query plans. Actually, how `left` and `right` maps to the original query plans depends on the value of `probe_side` field. This is quite misleading and may be addressed in a future PR by renaming `left` and `right` to `build` and `probe`. The function for checking if geospatial types are supported now returns `Result<bool>` to propagate errors during the field type extraction and matching process. This prevents silent matching failures due to bugs from happening.
1 parent 827232c commit 3ed337e

File tree

1 file changed

+89
-20
lines changed

1 file changed

+89
-20
lines changed

rust/sedona-spatial-join/src/optimizer.rs

Lines changed: 89 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ impl SpatialJoinOptimizer {
147147
&spatial_predicate,
148148
&left.schema(),
149149
&right.schema(),
150-
) {
150+
)? {
151151
return Ok(None);
152152
}
153153

@@ -190,7 +190,7 @@ impl SpatialJoinOptimizer {
190190
&spatial_predicate,
191191
&hash_join.left().schema(),
192192
&hash_join.right().schema(),
193-
) {
193+
)? {
194194
return Ok(None);
195195
}
196196

@@ -877,26 +877,40 @@ fn is_spatial_predicate_supported(
877877
spatial_predicate: &SpatialPredicate,
878878
left_schema: &Schema,
879879
right_schema: &Schema,
880-
) -> bool {
880+
) -> Result<bool> {
881881
/// Only spatial predicates working with planar geometry are supported for optimization.
882882
/// Geography (spherical) types are explicitly excluded and will not trigger optimized spatial joins.
883-
fn is_geometry_type_supported(expr: &Arc<dyn PhysicalExpr>, schema: &Schema) -> bool {
884-
let Ok(left_return_field) = expr.return_field(schema) else {
885-
return false;
886-
};
887-
let Ok(sedona_type) = SedonaType::from_storage_field(&left_return_field) else {
888-
return false;
889-
};
883+
fn is_geometry_type_supported(expr: &Arc<dyn PhysicalExpr>, schema: &Schema) -> Result<bool> {
884+
let left_return_field = expr.return_field(schema)?;
885+
let sedona_type = SedonaType::from_storage_field(&left_return_field)?;
890886
let matcher = ArgMatcher::is_geometry();
891-
matcher.match_type(&sedona_type)
887+
Ok(matcher.match_type(&sedona_type))
892888
}
893889

894890
match spatial_predicate {
895891
SpatialPredicate::Relation(RelationPredicate { left, right, .. })
896-
| SpatialPredicate::Distance(DistancePredicate { left, right, .. })
897-
| SpatialPredicate::KNearestNeighbors(KNNPredicate { left, right, .. }) => {
898-
is_geometry_type_supported(left, left_schema)
899-
&& is_geometry_type_supported(right, right_schema)
892+
| SpatialPredicate::Distance(DistancePredicate { left, right, .. }) => {
893+
Ok(is_geometry_type_supported(left, left_schema)?
894+
&& is_geometry_type_supported(right, right_schema)?)
895+
}
896+
SpatialPredicate::KNearestNeighbors(KNNPredicate {
897+
left,
898+
right,
899+
probe_side,
900+
..
901+
}) => {
902+
let (left, right) = match probe_side {
903+
JoinSide::Left => (left, right),
904+
JoinSide::Right => (right, left),
905+
_ => {
906+
return sedona_internal_err!(
907+
"Invalid probe side in KNN predicate: {:?}",
908+
probe_side
909+
)
910+
}
911+
};
912+
Ok(is_geometry_type_supported(left, left_schema)?
913+
&& is_geometry_type_supported(right, right_schema)?)
900914
}
901915
}
902916
}
@@ -2479,11 +2493,7 @@ mod tests {
24792493
SpatialRelationType::Intersects,
24802494
);
24812495
let spatial_pred = SpatialPredicate::Relation(rel_pred);
2482-
assert!(super::is_spatial_predicate_supported(
2483-
&spatial_pred,
2484-
&schema,
2485-
&schema
2486-
));
2496+
assert!(super::is_spatial_predicate_supported(&spatial_pred, &schema, &schema).unwrap());
24872497

24882498
// Geography field (should NOT be supported)
24892499
let geog_field = WKB_GEOGRAPHY.to_storage_field("geog", false).unwrap();
@@ -2499,6 +2509,65 @@ mod tests {
24992509
&spatial_pred_geog,
25002510
&geog_schema,
25012511
&geog_schema
2512+
)
2513+
.unwrap());
2514+
}
2515+
2516+
#[test]
2517+
fn test_is_knn_predicate_supported() {
2518+
// ST_KNN(left, right)
2519+
let left_schema = Arc::new(Schema::new(vec![WKB_GEOMETRY
2520+
.to_storage_field("geom", false)
2521+
.unwrap()]));
2522+
let right_schema = Arc::new(Schema::new(vec![
2523+
Field::new("id", DataType::Int32, false),
2524+
WKB_GEOMETRY.to_storage_field("geom", false).unwrap(),
2525+
]));
2526+
let left_col_expr = Arc::new(Column::new("geom", 0)) as Arc<dyn PhysicalExpr>;
2527+
let right_col_expr = Arc::new(Column::new("geom", 1)) as Arc<dyn PhysicalExpr>;
2528+
let knn_pred = SpatialPredicate::KNearestNeighbors(KNNPredicate::new(
2529+
left_col_expr.clone(),
2530+
right_col_expr.clone(),
2531+
5,
2532+
false,
2533+
JoinSide::Left,
2534+
));
2535+
assert!(
2536+
super::is_spatial_predicate_supported(&knn_pred, &left_schema, &right_schema).unwrap()
2537+
);
2538+
2539+
// ST_KNN(right, left)
2540+
let knn_pred = SpatialPredicate::KNearestNeighbors(KNNPredicate::new(
2541+
right_col_expr.clone(),
2542+
left_col_expr.clone(),
2543+
5,
2544+
false,
2545+
JoinSide::Right,
25022546
));
2547+
assert!(
2548+
super::is_spatial_predicate_supported(&knn_pred, &left_schema, &right_schema).unwrap()
2549+
);
2550+
2551+
// ST_KNN with geography (should NOT be supported)
2552+
let left_geog_schema = Arc::new(Schema::new(vec![WKB_GEOGRAPHY
2553+
.to_storage_field("geog", false)
2554+
.unwrap()]));
2555+
assert!(!super::is_spatial_predicate_supported(
2556+
&knn_pred,
2557+
&left_geog_schema,
2558+
&right_schema
2559+
)
2560+
.unwrap());
2561+
2562+
let right_geog_schema = Arc::new(Schema::new(vec![
2563+
Field::new("id", DataType::Int32, false),
2564+
WKB_GEOGRAPHY.to_storage_field("geog", false).unwrap(),
2565+
]));
2566+
assert!(!super::is_spatial_predicate_supported(
2567+
&knn_pred,
2568+
&left_schema,
2569+
&right_geog_schema
2570+
)
2571+
.unwrap());
25032572
}
25042573
}

0 commit comments

Comments
 (0)