diff --git a/docs/changelog/142462.yaml b/docs/changelog/142462.yaml new file mode 100644 index 0000000000000..40559109f5d2d --- /dev/null +++ b/docs/changelog/142462.yaml @@ -0,0 +1,6 @@ +pr: 142462 +summary: "ES|QL: Validate TOP_SNIPPETS query argument is foldable at verification time" +area: ES|QL +type: bug +issues: + - 142462 diff --git a/docs/changelog/142763.yaml b/docs/changelog/142763.yaml new file mode 100644 index 0000000000000..ac77e6d6afde8 --- /dev/null +++ b/docs/changelog/142763.yaml @@ -0,0 +1,6 @@ +area: ES|QL +issues: + - 142462 +pr: 142763 +summary: Validate TOP_SNIPPETS query argument is foldable at verification +type: enhancement diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/top-snippets.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/top-snippets.csv-spec index d93e35f701056..9351b8115c103 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/top-snippets.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/top-snippets.csv-spec @@ -208,3 +208,39 @@ ROW content = "Sauron, the Dark Lord, has gathered to him all the Rings of Power snippets:keyword [All he lacks in his plans for dominion is the One Ring - the ring that rules them all - which has fallen into the hands of, ring that rules them all - which has fallen into the hands of the hobbit\, Bilbo Baggins.] ; + +topSnippetsWithConstantQuery +required_capability: top_snippets_function + +FROM employees +| EVAL snippets = TOP_SNIPPETS(first_name, "John") +| KEEP emp_no, first_name, snippets +| SORT emp_no +| LIMIT 5 +; + +emp_no:integer | first_name:keyword | snippets:keyword +10001 | Georgi | null +10002 | Bezalel | null +10003 | Parto | null +10004 | Chirstian | null +10005 | Kyoichi | null +; + +topSnippetsWithFoldableConcatQuery +required_capability: top_snippets_function + +FROM employees +| EVAL snippets = TOP_SNIPPETS(first_name, CONCAT("Jo", "hn")) +| KEEP emp_no, first_name, snippets +| SORT emp_no +| LIMIT 5 +; + +emp_no:integer | first_name:keyword | snippets:keyword +10001 | Georgi | null +10002 | Bezalel | null +10003 | Parto | null +10004 | Chirstian | null +10005 | Kyoichi | null +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippets.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippets.java index 7c67a75cd4e3a..8eae2497c6cff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippets.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippets.java @@ -20,6 +20,8 @@ import org.elasticsearch.xpack.core.common.chunks.MemoryIndexChunkScorer; import org.elasticsearch.xpack.core.common.chunks.ScoredChunk; import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings; +import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.MapExpression; @@ -49,11 +51,13 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; +import static org.elasticsearch.xpack.esql.expression.Foldables.TypeResolutionValidator.forPostOptimizationValidation; +import static org.elasticsearch.xpack.esql.expression.Foldables.resolveTypeQuery; import static org.elasticsearch.xpack.esql.expression.function.Options.resolve; import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.chunkText; import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.emitChunks; -public class TopSnippets extends EsqlScalarFunction implements OptionalArgument { +public class TopSnippets extends EsqlScalarFunction implements OptionalArgument, PostOptimizationVerificationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Expression.class, @@ -277,6 +281,11 @@ static void process( emitChunks(builder, snippets); } + @Override + public void postOptimizationVerification(Failures failures) { + resolveTypeQuery(query(), sourceText(), forPostOptimizationValidation(query(), failures)); + } + @Override public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { int numSnippets; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsTests.java index 83d3642436004..01e46c7e5852c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsTests.java @@ -27,7 +27,6 @@ import java.util.ArrayList; import java.util.List; -import java.util.Locale; import java.util.Objects; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -38,7 +37,9 @@ import static org.elasticsearch.xpack.esql.expression.function.scalar.string.TopSnippets.DEFAULT_NUM_SNIPPETS; import static org.elasticsearch.xpack.esql.expression.function.scalar.string.TopSnippets.DEFAULT_WORD_SIZE; import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.chunkText; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class TopSnippetsTests extends AbstractScalarFunctionTestCase { @@ -245,14 +246,9 @@ public void testSnippetsReturnedInScoringOrder() { List result = process(combinedText, query, 3, 50); - assertNotNull("Should return results for matching query", result); - assertFalse("Should have at least one result", result.isEmpty()); - - assertTrue( - "First snippet should be from the most relevant chunk (contains 'Elasticsearch' multiple times)", - result.get(0).toLowerCase(Locale.ROOT).contains("elasticsearch") - && (result.get(0).contains("powerful") || result.get(0).contains("supports") || result.get(0).contains("companies")) - ); + assertThat(result, hasSize(2)); + assertThat(result.get(0), containsString("Elasticsearch is a powerful search engine")); + assertThat(result.get(1), containsString("Elasticsearch is one option among several alternatives")); } private void verifySnippets(String query, Integer numSnippets, Integer numWords, int expectedNumChunksReturned) { @@ -295,5 +291,4 @@ private List process(String str, String query, int numSnippets, int numW } } } - } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsValidationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsValidationTests.java new file mode 100644 index 0000000000000..44dd18cdaba4b --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippetsValidationTests.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.common.Failures; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; + +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +public class TopSnippetsValidationTests extends ESTestCase { + + public void testValidateWithLiteralQuery() { + Expression field = fieldAttribute("body", DataType.TEXT); + Expression query = new Literal(Source.EMPTY, new BytesRef("search terms"), DataType.KEYWORD); + TopSnippets topSnippets = new TopSnippets(Source.synthetic("TOP_SNIPPETS(body, \"search terms\")"), field, query, null); + + Failures failures = new Failures(); + topSnippets.postOptimizationVerification(failures); + + assertThat(failures.failures(), is(empty())); + } + + public void testValidateWithFieldQuery() { + Expression field = fieldAttribute("body", DataType.TEXT); + Expression query = fieldAttribute("title", DataType.KEYWORD); + TopSnippets topSnippets = new TopSnippets(Source.synthetic("TOP_SNIPPETS(body, title)"), field, query, null); + + Failures failures = new Failures(); + topSnippets.postOptimizationVerification(failures); + + assertThat(failures.failures(), hasSize(1)); + assertThat(failures.failures().iterator().next().message(), containsString("Query must be a valid string")); + } + + private static FieldAttribute fieldAttribute(String name, DataType type) { + return new FieldAttribute(Source.EMPTY, name, new EsField(name, type, Map.of(), true, EsField.TimeSeriesFieldType.NONE)); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index d12087ac90953..3d1e0f987450a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -9967,6 +9967,23 @@ STATS max(max_over_time(network.bytes_in)) by p = pod, bucket(@timestamp, 1 minu as(eval3.child(), EsRelation.class); } + public void testTopSnippetsQueryMustBeFoldable() { + var e = expectThrows(VerificationException.class, () -> optimizedPlan("FROM test | EVAL x = TOP_SNIPPETS(first_name, last_name)")); + assertThat(e.getMessage(), containsString("Query must be a valid string")); + } + + public void testTopSnippetsQueryFoldableAfterOptimization() { + var plan = optimizedPlan("FROM test | EVAL x = TOP_SNIPPETS(first_name, \"search terms\")"); + var failures = LogicalVerifier.INSTANCE.verify(plan, plan.output()); + assertThat(failures.failures(), is(empty())); + } + + public void testTopSnippetsQueryFoldableConcatConstants() { + var plan = optimizedPlan("FROM test | EVAL x = TOP_SNIPPETS(first_name, CONCAT(\"search\", \" terms\"))"); + var failures = LogicalVerifier.INSTANCE.verify(plan, plan.output()); + assertThat(failures.failures(), is(empty())); + } + public void testPushDownSampleAndLimitThroughUriParts() { assumeTrue("requires compound output capability", EsqlCapabilities.Cap.URI_PARTS_COMMAND.isEnabled()); var query = "FROM test | URI_PARTS parts = \"http://example.com/foo/bar?baz=qux\" | SAMPLE .5";