Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/142462.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 142462
summary: "ES|QL: Validate TOP_SNIPPETS query argument is foldable at verification time"
area: ES|QL
type: bug
issues:
- 142462
6 changes: 6 additions & 0 deletions docs/changelog/142763.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
area: ES|QL
issues:
- 142462
pr: 142763
summary: Validate TOP_SNIPPETS query argument is foldable at verification
type: enhancement
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,39 @@ ROW content = "Sauron, the Dark Lord, has gathered to him all the Rings of Power
snippets:keyword
[All he lacks in his plans for dominion is the One Ring - the ring that rules them all - which has fallen into the hands of, ring that rules them all - which has fallen into the hands of the hobbit\, Bilbo Baggins.]
;

topSnippetsWithConstantQuery
required_capability: top_snippets_function

FROM employees
| EVAL snippets = TOP_SNIPPETS(first_name, "John")
| KEEP emp_no, first_name, snippets
| SORT emp_no
| LIMIT 5
;

emp_no:integer | first_name:keyword | snippets:keyword
10001 | Georgi | null
10002 | Bezalel | null
10003 | Parto | null
10004 | Chirstian | null
10005 | Kyoichi | null
;

topSnippetsWithFoldableConcatQuery
required_capability: top_snippets_function

FROM employees
| EVAL snippets = TOP_SNIPPETS(first_name, CONCAT("Jo", "hn"))
| KEEP emp_no, first_name, snippets
| SORT emp_no
| LIMIT 5
;

emp_no:integer | first_name:keyword | snippets:keyword
10001 | Georgi | null
10002 | Bezalel | null
10003 | Parto | null
10004 | Chirstian | null
10005 | Kyoichi | null
;
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import org.elasticsearch.xpack.core.common.chunks.MemoryIndexChunkScorer;
import org.elasticsearch.xpack.core.common.chunks.ScoredChunk;
import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings;
import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.InvalidArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.MapExpression;
Expand Down Expand Up @@ -49,11 +51,13 @@
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
import static org.elasticsearch.xpack.esql.expression.Foldables.TypeResolutionValidator.forPostOptimizationValidation;
import static org.elasticsearch.xpack.esql.expression.Foldables.resolveTypeQuery;
import static org.elasticsearch.xpack.esql.expression.function.Options.resolve;
import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.chunkText;
import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.emitChunks;

public class TopSnippets extends EsqlScalarFunction implements OptionalArgument {
public class TopSnippets extends EsqlScalarFunction implements OptionalArgument, PostOptimizationVerificationAware {

public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
Expression.class,
Expand Down Expand Up @@ -277,6 +281,11 @@ static void process(
emitChunks(builder, snippets);
}

@Override
public void postOptimizationVerification(Failures failures) {
resolveTypeQuery(query(), sourceText(), forPostOptimizationValidation(query(), failures));
}

@Override
public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
int numSnippets;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.function.Supplier;
import java.util.stream.Collectors;
Expand All @@ -38,7 +37,9 @@
import static org.elasticsearch.xpack.esql.expression.function.scalar.string.TopSnippets.DEFAULT_NUM_SNIPPETS;
import static org.elasticsearch.xpack.esql.expression.function.scalar.string.TopSnippets.DEFAULT_WORD_SIZE;
import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.chunkText;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;

public class TopSnippetsTests extends AbstractScalarFunctionTestCase {

Expand Down Expand Up @@ -245,14 +246,9 @@ public void testSnippetsReturnedInScoringOrder() {

List<String> result = process(combinedText, query, 3, 50);

assertNotNull("Should return results for matching query", result);
assertFalse("Should have at least one result", result.isEmpty());

assertTrue(
"First snippet should be from the most relevant chunk (contains 'Elasticsearch' multiple times)",
result.get(0).toLowerCase(Locale.ROOT).contains("elasticsearch")
&& (result.get(0).contains("powerful") || result.get(0).contains("supports") || result.get(0).contains("companies"))
);
assertThat(result, hasSize(2));
assertThat(result.get(0), containsString("Elasticsearch is a powerful search engine"));
assertThat(result.get(1), containsString("Elasticsearch is one option among several alternatives"));
}

private void verifySnippets(String query, Integer numSnippets, Integer numWords, int expectedNumChunksReturned) {
Expand Down Expand Up @@ -295,5 +291,4 @@ private List<String> process(String str, String query, int numSnippets, int numW
}
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;

import java.util.Map;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;

public class TopSnippetsValidationTests extends ESTestCase {

public void testValidateWithLiteralQuery() {
Expression field = fieldAttribute("body", DataType.TEXT);
Expression query = new Literal(Source.EMPTY, new BytesRef("search terms"), DataType.KEYWORD);
TopSnippets topSnippets = new TopSnippets(Source.synthetic("TOP_SNIPPETS(body, \"search terms\")"), field, query, null);

Failures failures = new Failures();
topSnippets.postOptimizationVerification(failures);

assertThat(failures.failures(), is(empty()));
}

public void testValidateWithFieldQuery() {
Expression field = fieldAttribute("body", DataType.TEXT);
Expression query = fieldAttribute("title", DataType.KEYWORD);
TopSnippets topSnippets = new TopSnippets(Source.synthetic("TOP_SNIPPETS(body, title)"), field, query, null);

Failures failures = new Failures();
topSnippets.postOptimizationVerification(failures);

assertThat(failures.failures(), hasSize(1));
assertThat(failures.failures().iterator().next().message(), containsString("Query must be a valid string"));
}

private static FieldAttribute fieldAttribute(String name, DataType type) {
return new FieldAttribute(Source.EMPTY, name, new EsField(name, type, Map.of(), true, EsField.TimeSeriesFieldType.NONE));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9967,6 +9967,23 @@ STATS max(max_over_time(network.bytes_in)) by p = pod, bucket(@timestamp, 1 minu
as(eval3.child(), EsRelation.class);
}

public void testTopSnippetsQueryMustBeFoldable() {
var e = expectThrows(VerificationException.class, () -> optimizedPlan("FROM test | EVAL x = TOP_SNIPPETS(first_name, last_name)"));
assertThat(e.getMessage(), containsString("Query must be a valid string"));
}

public void testTopSnippetsQueryFoldableAfterOptimization() {
var plan = optimizedPlan("FROM test | EVAL x = TOP_SNIPPETS(first_name, \"search terms\")");
var failures = LogicalVerifier.INSTANCE.verify(plan, plan.output());
assertThat(failures.failures(), is(empty()));
}

public void testTopSnippetsQueryFoldableConcatConstants() {
var plan = optimizedPlan("FROM test | EVAL x = TOP_SNIPPETS(first_name, CONCAT(\"search\", \" terms\"))");
var failures = LogicalVerifier.INSTANCE.verify(plan, plan.output());
assertThat(failures.failures(), is(empty()));
}

public void testPushDownSampleAndLimitThroughUriParts() {
assumeTrue("requires compound output capability", EsqlCapabilities.Cap.URI_PARTS_COMMAND.isEnabled());
var query = "FROM test | URI_PARTS parts = \"http://example.com/foo/bar?baz=qux\" | SAMPLE .5";
Expand Down