From ccc45f4b28941ba1179afff55f0472db50edb51a Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Sun, 25 May 2025 13:00:36 +0200 Subject: [PATCH] Added geometry validation for GEO types to exit early on invalid latitudes (#128259) (#128426) It turns out in #128234 that Lucene pushdown on ST_DISTANCE with invalid points (latitude out of range) will cause all documents to be returned, since DISJOINT on an invalid circle is true for all documents. We could either add an extra check for spatial pushdown that the geometries are valid, or add validation at geometry creation. This second option is much easier to implement, and a more comprehensive approach, as it prevents invalid geometries in many more places, hopefully reducing the likelihood of subtle and obscure bugs like #128234 happening in future. --- docs/changelog/128259.yaml | 6 ++ .../core/util/SpatialCoordinateTypes.java | 24 ++++--- .../src/main/resources/spatial.csv-spec | 72 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 5 ++ .../function/scalar/convert/ToGeoPoint.java | 4 +- .../function/scalar/convert/ToGeoShape.java | 4 +- .../esql/type/EsqlDataTypeConverter.java | 9 +++ 7 files changed, 109 insertions(+), 15 deletions(-) create mode 100644 docs/changelog/128259.yaml diff --git a/docs/changelog/128259.yaml b/docs/changelog/128259.yaml new file mode 100644 index 0000000000000..8c30b7cd1d021 --- /dev/null +++ b/docs/changelog/128259.yaml @@ -0,0 +1,6 @@ +pr: 128259 +summary: Added geometry validation for GEO types to exit early on invalid latitudes +area: Geo +type: bug +issues: + - 128234 diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/SpatialCoordinateTypes.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/SpatialCoordinateTypes.java index 7927e831ebd9d..019aabda5058e 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/SpatialCoordinateTypes.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/SpatialCoordinateTypes.java @@ -12,6 +12,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.geometry.Geometry; import org.elasticsearch.geometry.Point; +import org.elasticsearch.geometry.utils.GeographyValidator; import org.elasticsearch.geometry.utils.GeometryValidator; import org.elasticsearch.geometry.utils.WellKnownBinary; import org.elasticsearch.geometry.utils.WellKnownText; @@ -32,6 +33,11 @@ public long pointAsLong(double x, double y) { int longitudeEncoded = encodeLongitude(x); return (((long) latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL); } + + public GeometryValidator validator() { + // We validate the lat/lon values, and ignore any z values + return GeographyValidator.instance(true); + } }, CARTESIAN { @@ -67,6 +73,10 @@ public long pointAsLong(double x, double y) { } }; + protected GeometryValidator validator() { + return GeometryValidator.NOOP; + } + public abstract Point longAsPoint(long encoded); public abstract long pointAsLong(double x, double y); @@ -77,7 +87,7 @@ public long wkbAsLong(BytesRef wkb) { } public Point wkbAsPoint(BytesRef wkb) { - Geometry geometry = WellKnownBinary.fromWKB(GeometryValidator.NOOP, false, wkb.bytes, wkb.offset, wkb.length); + Geometry geometry = WellKnownBinary.fromWKB(validator(), false, wkb.bytes, wkb.offset, wkb.length); if (geometry instanceof Point point) { return point; } else { @@ -101,26 +111,18 @@ public BytesRef wktToWkb(String wkt) { // TODO: we should be able to transform WKT to WKB without building the geometry // we should as well use different validator for cartesian and geo? try { - Geometry geometry = WellKnownText.fromWKT(GeometryValidator.NOOP, false, wkt); + Geometry geometry = WellKnownText.fromWKT(validator(), false, wkt); return new BytesRef(WellKnownBinary.toWKB(geometry, ByteOrder.LITTLE_ENDIAN)); } catch (Exception e) { throw new IllegalArgumentException("Failed to parse WKT: " + e.getMessage(), e); } } - public Geometry wktToGeometry(String wkt) { - try { - return WellKnownText.fromWKT(GeometryValidator.NOOP, false, wkt); - } catch (Exception e) { - throw new IllegalArgumentException("Failed to parse WKT: " + e.getMessage(), e); - } - } - public String wkbToWkt(BytesRef wkb) { return WellKnownText.fromWKB(wkb.bytes, wkb.offset, wkb.length); } public Geometry wkbToGeometry(BytesRef wkb) { - return WellKnownBinary.fromWKB(GeometryValidator.NOOP, false, wkb.bytes, wkb.offset, wkb.length); + return WellKnownBinary.fromWKB(validator(), false, wkb.bytes, wkb.offset, wkb.length); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec index d10a6178829e6..74286ba466edf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/spatial.csv-spec @@ -1725,6 +1725,78 @@ count:long 12 ; +airportsDistanceLessThanInvalidPoint +required_capability: st_distance +required_capability: geo_validation + +FROM airports +| EVAL distance = ST_DISTANCE(location, TO_GEOPOINT("POINT(-28.6359 153.5906)")) +| KEEP distance, location, name, scalerank +| WHERE distance < 500000 AND scalerank < 6 +| STATS count = COUNT() +; + +warning:Line 2:41: evaluation of [TO_GEOPOINT(\"POINT(-28.6359 153.5906)\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:41: java.lang.IllegalArgumentException: Failed to parse WKT: invalid latitude 153.5906; must be between -90.0 and 90.0 + +count:l +0 +; + +airportsDistanceLessThanInvalidPoint2 +required_capability: st_distance +required_capability: geo_validation + +FROM airports +| EVAL distance = ST_DISTANCE(TO_GEOPOINT("POINT(-28.6359 153.5906)"), location) +| KEEP distance, location, name, scalerank +| WHERE distance < 500000 AND scalerank < 6 +| STATS count = COUNT() +; + +warning:Line 2:31: evaluation of [TO_GEOPOINT(\"POINT(-28.6359 153.5906)\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:31: java.lang.IllegalArgumentException: Failed to parse WKT: invalid latitude 153.5906; must be between -90.0 and 90.0 + +count:l +0 +; + +airportsDistanceGreaterThanInvalidPoint +required_capability: st_distance +required_capability: geo_validation + +FROM airports +| EVAL distance = ST_DISTANCE(location, TO_GEOPOINT("POINT(-28.6359 153.5906)")) +| KEEP distance, location, name, scalerank +| WHERE distance > 500 AND scalerank < 6 +| STATS count = COUNT() +; + +warning:Line 2:41: evaluation of [TO_GEOPOINT(\"POINT(-28.6359 153.5906)\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:41: java.lang.IllegalArgumentException: Failed to parse WKT: invalid latitude 153.5906; must be between -90.0 and 90.0 + +count:l +0 +; + +airportsDistanceGreaterThanInvalidPoint2 +required_capability: st_distance +required_capability: geo_validation + +FROM airports +| EVAL distance = ST_DISTANCE(TO_GEOPOINT("POINT(-28.6359 153.5906)"), location) +| KEEP distance, location, name, scalerank +| WHERE distance > 500 AND scalerank < 6 +| STATS count = COUNT() +; + +warning:Line 2:31: evaluation of [TO_GEOPOINT(\"POINT(-28.6359 153.5906)\")] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:31: java.lang.IllegalArgumentException: Failed to parse WKT: invalid latitude 153.5906; must be between -90.0 and 90.0 + +count:l +0 +; + ############################################### # Tests for Equality and casting with GEO_POINT diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 32e09fa4ec2fb..80509b7519bff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -220,6 +220,11 @@ public enum Cap { */ SPATIAL_CENTROID_NO_RECORDS, + /** + * Do validation check on geo_point and geo_shape fields. Done in #128259. + */ + GEO_VALIDATION, + /** * Support ST_ENVELOPE function (and related ST_XMIN, etc.). */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoPoint.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoPoint.java index 42af06a40553d..14b43bea84e42 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoPoint.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoPoint.java @@ -27,7 +27,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.SEMANTIC_TEXT; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToSpatial; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToGeo; public class ToGeoPoint extends AbstractConvertFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( @@ -93,6 +93,6 @@ protected NodeInfo info() { @ConvertEvaluator(extraName = "FromString", warnExceptions = { IllegalArgumentException.class }) static BytesRef fromKeyword(BytesRef in) { - return stringToSpatial(in.utf8ToString()); + return stringToGeo(in.utf8ToString()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoShape.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoShape.java index b5b6db2752b06..8fed6665b1fbf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoShape.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToGeoShape.java @@ -28,7 +28,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.SEMANTIC_TEXT; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToSpatial; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToGeo; public class ToGeoShape extends AbstractConvertFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( @@ -95,6 +95,6 @@ protected NodeInfo info() { @ConvertEvaluator(extraName = "FromString", warnExceptions = { IllegalArgumentException.class }) static BytesRef fromKeyword(BytesRef in) { - return stringToSpatial(in.utf8ToString()); + return stringToGeo(in.utf8ToString()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 986f1594a4b3f..96516f9bbd086 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -96,6 +96,7 @@ import static org.elasticsearch.xpack.esql.core.util.NumericUtils.asLongUnsigned; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.asUnsignedLong; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.unsignedLongAsNumber; +import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO; import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.UNSPECIFIED; public class EsqlDataTypeConverter { @@ -225,6 +226,9 @@ public static Converter converterFor(DataType from, DataType to) { if (to == DataType.BOOLEAN) { return EsqlConverter.STRING_TO_BOOLEAN; } + if (DataType.isSpatialGeo(to)) { + return EsqlConverter.STRING_TO_GEO; + } if (DataType.isSpatial(to)) { return EsqlConverter.STRING_TO_SPATIAL; } @@ -536,6 +540,10 @@ public static String spatialToString(BytesRef field) { return UNSPECIFIED.wkbToWkt(field); } + public static BytesRef stringToGeo(String field) { + return GEO.wktToWkb(field); + } + public static BytesRef stringToSpatial(String field) { return UNSPECIFIED.wktToWkb(field); } @@ -694,6 +702,7 @@ public enum EsqlConverter implements Converter { STRING_TO_LONG(x -> EsqlDataTypeConverter.stringToLong((String) x)), STRING_TO_INT(x -> EsqlDataTypeConverter.stringToInt((String) x)), STRING_TO_BOOLEAN(x -> EsqlDataTypeConverter.stringToBoolean((String) x)), + STRING_TO_GEO(x -> EsqlDataTypeConverter.stringToGeo((String) x)), STRING_TO_SPATIAL(x -> EsqlDataTypeConverter.stringToSpatial((String) x)); private static final String NAME = "esql-converter";