Skip to content

Commit 7097a94

Browse files
authored
chore: Add assert_scalar_equal_wkb_geometry_topologically to compare WKBs topologically (#192)
This is part of the forked dependency elimination plan #165. We've hit some overlay operation result assertion errors after upgrading geo to the latest 0.31.0 release. The failures were caused by geo producing different, but topologically equivalent results, and we are asserting on the byte-level equality of resulting WKBs. This patch adds `assert_scalar_equal_wkb_geometry_topologically` to sedona-testing behind a feature flag "geo". We'll switch to use `assert_scalar_equal_wkb_geometry_topologically` for testing overlay ST functions (ST_Union_Aggr and ST_Intersection_Aggr) after upgrading geo. This function is also useful in many other scenarios, so we add this function in its dedicated PR.
1 parent d869cba commit 7097a94

File tree

3 files changed

+128
-14
lines changed

3 files changed

+128
-14
lines changed

Cargo.lock

Lines changed: 16 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/sedona-testing/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ rstest = { workspace = true }
3232

3333
[features]
3434
default = []
35+
geo = ["dep:geo"]
3536
criterion = ["dep:criterion"]
3637

3738
[dependencies]
@@ -42,7 +43,7 @@ criterion = { workspace = true, optional = true }
4243
datafusion-common = { workspace = true }
4344
datafusion-expr = { workspace = true }
4445
datafusion-physical-expr = { workspace = true }
45-
geo-traits = { workspace = true }
46+
geo-traits = { workspace = true, features = ["geo-types"] }
4647
geo-types = { workspace = true }
4748
parquet = { workspace = true, features = ["arrow", "snap", "zstd"] }
4849
rand = { workspace = true }
@@ -52,3 +53,4 @@ sedona-expr = { path = "../sedona-expr" }
5253
sedona-schema = { path = "../sedona-schema" }
5354
wkb = { workspace = true }
5455
wkt = { workspace = true }
56+
geo = { workspace = true, optional = true }

rust/sedona-testing/src/compare.rs

Lines changed: 109 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,25 @@ pub fn assert_array_equal(actual: &ArrayRef, expected: &ArrayRef) {
104104
/// Panics if the values' are not equal, generating reasonable failure messages for geometry
105105
/// arrays where the default failure message would otherwise be uninformative.
106106
pub fn assert_scalar_equal_wkb_geometry(actual: &ScalarValue, expected_wkt: Option<&str>) {
107-
assert_scalar_equal(actual, &create_scalar(expected_wkt, &WKB_GEOMETRY));
107+
let expected = create_scalar(expected_wkt, &WKB_GEOMETRY);
108+
assert_eq!(actual.data_type(), DataType::Binary);
109+
assert_wkb_scalar_equal(actual, &expected, false);
110+
}
111+
112+
/// Assert a [`ScalarValue`] is a WKB_GEOMETRY scalar corresponding to the given WKT. This function
113+
/// compares the geometries topologically, so two geometries that are not byte-wise equal but are
114+
/// topologically equal will be considered equal.
115+
///
116+
/// Panics if the values' are not topologically equal, generating reasonable failure messages for geometry
117+
/// arrays where the default failure message would otherwise be uninformative.
118+
#[cfg(feature = "geo")]
119+
pub fn assert_scalar_equal_wkb_geometry_topologically(
120+
actual: &ScalarValue,
121+
expected_wkt: Option<&str>,
122+
) {
123+
let expected = create_scalar(expected_wkt, &WKB_GEOMETRY);
124+
assert_eq!(actual.data_type(), DataType::Binary);
125+
assert_wkb_scalar_equal(actual, &expected, true);
108126
}
109127

110128
/// Assert two [`ScalarValue`]s are equal
@@ -123,7 +141,7 @@ pub fn assert_scalar_equal(actual: &ScalarValue, expected: &ScalarValue) {
123141
(SedonaType::Arrow(_), SedonaType::Arrow(_)) => assert_arrow_scalar_equal(actual, expected),
124142
(SedonaType::Wkb(_, _), SedonaType::Wkb(_, _))
125143
| (SedonaType::WkbView(_, _), SedonaType::WkbView(_, _)) => {
126-
assert_wkb_scalar_equal(actual, expected);
144+
assert_wkb_scalar_equal(actual, expected, false);
127145
}
128146
(_, _) => unreachable!(),
129147
}
@@ -160,11 +178,21 @@ where
160178
for (i, (actual_item, expected_item)) in zip(actual, expected).enumerate() {
161179
let actual_label = format!("actual Array element #{i}");
162180
let expected_label = format!("expected Array element #{i}");
163-
assert_wkb_value_equal(actual_item, expected_item, &actual_label, &expected_label);
181+
assert_wkb_value_equal(
182+
actual_item,
183+
expected_item,
184+
&actual_label,
185+
&expected_label,
186+
false,
187+
);
164188
}
165189
}
166190

167-
fn assert_wkb_scalar_equal(actual: &ScalarValue, expected: &ScalarValue) {
191+
fn assert_wkb_scalar_equal(
192+
actual: &ScalarValue,
193+
expected: &ScalarValue,
194+
compare_topologically: bool,
195+
) {
168196
match (actual, expected) {
169197
(ScalarValue::Binary(maybe_actual_wkb), ScalarValue::Binary(maybe_expected_wkb))
170198
| (
@@ -176,6 +204,7 @@ fn assert_wkb_scalar_equal(actual: &ScalarValue, expected: &ScalarValue) {
176204
maybe_expected_wkb.as_deref(),
177205
"actual WKB scalar",
178206
"expected WKB scalar",
207+
compare_topologically,
179208
);
180209
}
181210
(_, _) => {
@@ -189,6 +218,7 @@ fn assert_wkb_value_equal(
189218
expected: Option<&[u8]>,
190219
actual_label: &str,
191220
expected_label: &str,
221+
compare_topologically: bool,
192222
) {
193223
match (actual, expected) {
194224
(None, None) => {}
@@ -205,12 +235,56 @@ fn assert_wkb_value_equal(
205235
)
206236
}
207237
(Some(actual_wkb), Some(expected_wkb)) => {
238+
// Quick test: if the binary of the WKB is the same, they are equal
208239
if actual_wkb != expected_wkb {
209-
let (actual_wkt, expected_wkt) = (format_wkb(actual_wkb), format_wkb(expected_wkb));
210-
panic!("{actual_label} != {expected_label}\n{actual_label}:\n {actual_wkt}\n{expected_label}:\n {expected_wkt}")
240+
let is_equals = if compare_topologically {
241+
compare_wkb_topologically(expected_wkb, actual_wkb)
242+
} else {
243+
false
244+
};
245+
246+
if !is_equals {
247+
let (actual_wkt, expected_wkt) =
248+
(format_wkb(actual_wkb), format_wkb(expected_wkb));
249+
panic!("{actual_label} != {expected_label}\n{actual_label}:\n {actual_wkt}\n{expected_label}:\n {expected_wkt}")
250+
}
251+
}
252+
}
253+
}
254+
}
255+
256+
fn compare_wkb_topologically(
257+
#[allow(unused)] expected_wkb: &[u8],
258+
#[allow(unused)] actual_wkb: &[u8],
259+
) -> bool {
260+
#[cfg(feature = "geo")]
261+
{
262+
use geo::Relate;
263+
use geo_traits::to_geo::ToGeoGeometry;
264+
use geo_traits::Dimensions;
265+
use geo_traits::GeometryTrait;
266+
267+
let expected = wkb::reader::read_wkb(expected_wkb);
268+
let actual = wkb::reader::read_wkb(actual_wkb);
269+
match (expected, actual) {
270+
(Ok(expected_geom), Ok(actual_geom)) => {
271+
if expected_geom.dim() == Dimensions::Xy && actual_geom.dim() == Dimensions::Xy {
272+
let expected_geom = expected_geom.to_geometry();
273+
let actual_geom = actual_geom.to_geometry();
274+
expected_geom.relate(&actual_geom).is_equal_topo()
275+
} else {
276+
// geo crate does not support 3D/4D geometry operations, so we fall back to using the result
277+
// of byte-wise comparison
278+
false
279+
}
211280
}
281+
_ => false,
212282
}
213283
}
284+
#[cfg(not(feature = "geo"))]
285+
{
286+
panic!("Topological comparison requires the 'geo' feature to be enabled");
287+
}
214288
}
215289

216290
fn format_wkb(value: &[u8]) -> String {
@@ -357,20 +431,20 @@ actual Array element #0 is POINT(1 2), expected Array element #0 is null")]
357431

358432
#[test]
359433
fn wkb_value_equal() {
360-
assert_wkb_value_equal(None, None, "lhs", "rhs");
361-
assert_wkb_value_equal(Some(&[]), Some(&[]), "lhs", "rhs");
434+
assert_wkb_value_equal(None, None, "lhs", "rhs", false);
435+
assert_wkb_value_equal(Some(&[]), Some(&[]), "lhs", "rhs", false);
362436
}
363437

364438
#[test]
365439
#[should_panic(expected = "lhs != rhs:\nlhs is POINT(1 2), rhs is null")]
366440
fn wkb_value_expected_null() {
367-
assert_wkb_value_equal(Some(&POINT), None, "lhs", "rhs");
441+
assert_wkb_value_equal(Some(&POINT), None, "lhs", "rhs", false);
368442
}
369443

370444
#[test]
371445
#[should_panic(expected = "lhs != rhs:\nlhs is null, rhs is POINT(1 2)")]
372446
fn wkb_value_actual_null() {
373-
assert_wkb_value_equal(None, Some(&POINT), "lhs", "rhs");
447+
assert_wkb_value_equal(None, Some(&POINT), "lhs", "rhs", false);
374448
}
375449

376450
#[test]
@@ -380,7 +454,31 @@ lhs:
380454
rhs:
381455
POINT(1 2)")]
382456
fn wkb_value_values_not_equal() {
383-
assert_wkb_value_equal(Some(&[]), Some(&POINT), "lhs", "rhs");
457+
assert_wkb_value_equal(Some(&[]), Some(&POINT), "lhs", "rhs", false);
458+
}
459+
460+
#[cfg(feature = "geo")]
461+
#[test]
462+
fn wkb_value_equal_topologically() {
463+
use crate::create::make_wkb;
464+
assert_wkb_value_equal(Some(&POINT), Some(&POINT), "lhs", "rhs", true);
465+
let lhs = make_wkb("POLYGON ((0 0, 1 0, 0 1, 0 0))");
466+
let rhs = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0))");
467+
assert_wkb_value_equal(Some(&lhs), Some(&rhs), "lhs", "rhs", true);
468+
}
469+
470+
#[cfg(feature = "geo")]
471+
#[test]
472+
#[should_panic(expected = "lhs != rhs
473+
lhs:
474+
POLYGON((0 0,1 0,0 1,0 0))
475+
rhs:
476+
POLYGON((0 0,1 0,0 0))")]
477+
fn wkb_value_not_equal_topologically() {
478+
use crate::create::make_wkb;
479+
let lhs = make_wkb("POLYGON ((0 0, 1 0, 0 1, 0 0))");
480+
let rhs = make_wkb("POLYGON ((0 0, 1 0, 0 0))");
481+
assert_wkb_value_equal(Some(&lhs), Some(&rhs), "lhs", "rhs", true);
384482
}
385483

386484
#[test]

0 commit comments

Comments
 (0)