From ee3c80658d5eec421c60ea2d649a573bf232f8e1 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 13 Aug 2025 13:28:28 +0200 Subject: [PATCH 01/29] Make ScoreOperator and LuceneQueryEvaluator more robust --- .../compute/lucene/LuceneQueryEvaluator.java | 14 ++++++++++---- .../compute/operator/ScoreOperator.java | 7 +++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index d91df60621fce..d268206cff3ff 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -61,10 +61,16 @@ protected LuceneQueryEvaluator(BlockFactory blockFactory, ShardConfig[] shards) } public Block executeQuery(Page page) { - // Lucene based operators retrieve DocVectors as first block - Block block = page.getBlock(0); - assert block instanceof DocBlock : "LuceneQueryExpressionEvaluator expects DocBlock as input"; - DocVector docs = (DocVector) block.asVector(); + // Search for DocVector block + Block docBlock = null; + for (int i = 0; i < page.getBlockCount(); i++) { + if (page.getBlock(i) instanceof DocBlock) { + docBlock = page.getBlock(i); + break; + } + } + assert docBlock != null : "LuceneQueryExpressionEvaluator expects a DocBlock"; + DocVector docs = (DocVector) docBlock.asVector(); try { if (docs.singleSegmentNonDecreasing()) { return evalSingleSegmentNonDecreasing(docs).asBlock(); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ScoreOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ScoreOperator.java index 2afc885d71124..1c3d522fda5ab 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ScoreOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ScoreOperator.java @@ -9,7 +9,6 @@ import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.DocVector; import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.DoubleVector; import org.elasticsearch.compute.data.Page; @@ -46,9 +45,9 @@ public ScoreOperator(BlockFactory blockFactory, ExpressionScorer scorer, int sco @Override protected Page process(Page page) { - assert page.getBlockCount() >= 2 : "Expected at least 2 blocks, got " + page.getBlockCount(); - assert page.getBlock(0).asVector() instanceof DocVector : "Expected a DocVector, got " + page.getBlock(0).asVector(); - assert page.getBlock(1).asVector() instanceof DoubleVector : "Expected a DoubleVector, got " + page.getBlock(1).asVector(); + assert page.getBlockCount() > scoreBlockPosition : "Expected to get a score block in position " + scoreBlockPosition; + assert page.getBlock(scoreBlockPosition).asVector() instanceof DoubleVector + : "Expected a DoubleVector as a score block, got " + page.getBlock(scoreBlockPosition).asVector(); Block[] blocks = new Block[page.getBlockCount()]; for (int i = 0; i < page.getBlockCount(); i++) { From 82779eca509f628efd2d754f7e36db8d400eafdc Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 13 Aug 2025 14:10:51 +0200 Subject: [PATCH 02/29] Translate to exact NN when not pushable --- .../function/fulltext/FullTextFunction.java | 8 +++-- .../esql/expression/function/vector/Knn.java | 31 ++++++++++++++----- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index fd11b3eff29ca..5ca845c8c1d4c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -383,18 +383,22 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvalua ShardConfig[] shardConfigs = new ShardConfig[shardContexts.size()]; int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { - shardConfigs[i++] = new ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher()); + shardConfigs[i++] = new ShardConfig(shardContext.toQuery(evaluatorQueryBuilder()), shardContext.searcher()); } return new LuceneQueryExpressionEvaluator.Factory(shardConfigs); } + protected QueryBuilder evaluatorQueryBuilder() { + return queryBuilder(); + } + @Override public ScoreOperator.ExpressionScorer.Factory toScorer(ToScorer toScorer) { List shardContexts = toScorer.shardContexts(); ShardConfig[] shardConfigs = new ShardConfig[shardContexts.size()]; int i = 0; for (EsPhysicalOperationProviders.ShardContext shardContext : shardContexts) { - shardConfigs[i++] = new ShardConfig(shardContext.toQuery(queryBuilder()), shardContext.searcher()); + shardConfigs[i++] = new ShardConfig(shardContext.toQuery(evaluatorQueryBuilder()), shardContext.searcher()); } return new LuceneQueryScoreEvaluator.Factory(shardConfigs); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index bb80f828b19ae..c0077d6d844bc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -13,6 +13,8 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.search.vectors.ExactKnnQueryBuilder; +import org.elasticsearch.search.vectors.VectorData; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; @@ -275,11 +277,7 @@ protected Query translate(LucenePushdownPredicates pushdownPredicates, Translato Check.notNull(fieldAttribute, "Knn must have a field attribute as the first argument"); String fieldName = getNameFromFieldAttribute(fieldAttribute); - List queryFolded = queryAsObject(); - float[] queryAsFloats = new float[queryFolded.size()]; - for (int i = 0; i < queryFolded.size(); i++) { - queryAsFloats[i] = queryFolded.get(i).floatValue(); - } + float[] queryAsFloats = queryAsFloats(); int kValue = getKIntValue(); Map opts = queryOptions(); @@ -289,8 +287,8 @@ protected Query translate(LucenePushdownPredicates pushdownPredicates, Translato for (Expression filterExpression : filterExpressions()) { if (filterExpression instanceof TranslationAware translationAware) { // We can only translate filter expressions that are translatable. In case any is not translatable, - // Knn won't be pushed down as it will not be translatable so it's safe not to translate all filters and check them - // when creating an evaluator for the non-pushed down query + // Knn won't be pushed down so it's safe not to translate all filters and check them when creating an evaluator + // for the non-pushed down query if (translationAware.translatable(pushdownPredicates) == Translatable.YES) { filterQueries.add(handler.asQuery(pushdownPredicates, filterExpression).toQueryBuilder()); } @@ -300,6 +298,15 @@ protected Query translate(LucenePushdownPredicates pushdownPredicates, Translato return new KnnQuery(source(), fieldName, queryAsFloats, opts, filterQueries); } + private float[] queryAsFloats() { + List queryFolded = queryAsObject(); + float[] queryAsFloats = new float[queryFolded.size()]; + for (int i = 0; i < queryFolded.size(); i++) { + queryAsFloats[i] = queryFolded.get(i).floatValue(); + } + return queryAsFloats; + } + public Expression withFilters(List filterExpressions) { return new Knn(source(), field(), query(), k(), options(), queryBuilder(), filterExpressions); } @@ -312,6 +319,16 @@ private Map queryOptions() throws InvalidArgumentException { return options; } + protected QueryBuilder evaluatorQueryBuilder() { + // Either we couldn't push down due to non-pushable filters, or becauses it's part of a disjuncion. Use exact query. + var fieldAttribute = Match.fieldAsFieldAttribute(field()); + Check.notNull(fieldAttribute, "Knn must have a field attribute as the first argument"); + String fieldName = getNameFromFieldAttribute(fieldAttribute); + Map opts = queryOptions(); + + return new ExactKnnQueryBuilder(VectorData.fromFloats(queryAsFloats()), fieldName, (Float) opts.get(VECTOR_SIMILARITY_FIELD)); + } + @Override public BiConsumer postAnalysisPlanVerification() { return (plan, failures) -> { From 0fb162a24b06f00736fabd15ea0dcedd1a43fb5a Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 14 Aug 2025 18:58:29 +0200 Subject: [PATCH 03/29] KNN k is set via optimizer and limit --- .../esql/images/functions/knn.svg | 2 +- .../function/EsqlFunctionRegistry.java | 2 +- .../esql/expression/function/vector/Knn.java | 73 ++++++------------- .../esql/optimizer/LogicalPlanOptimizer.java | 2 + .../rules/logical/PushLimitToKnn.java | 70 ++++++++++++++++++ .../xpack/esql/querydsl/query/KnnQuery.java | 5 +- .../function/fulltext/KnnTests.java | 2 +- .../optimizer/LogicalPlanOptimizerTests.java | 72 ++++++++++++++++++ 8 files changed, 172 insertions(+), 56 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java diff --git a/docs/reference/query-languages/esql/images/functions/knn.svg b/docs/reference/query-languages/esql/images/functions/knn.svg index 6e20dbc217206..75a104a7cdcfa 100644 --- a/docs/reference/query-languages/esql/images/functions/knn.svg +++ b/docs/reference/query-languages/esql/images/functions/knn.svg @@ -1 +1 @@ -KNN(field,query,k,options) \ No newline at end of file +KNN(field,query,options) \ No newline at end of file diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0eca67f625121..dc78e9d4003c5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -490,7 +490,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(FirstOverTime.class, uni(FirstOverTime::new), "first_over_time"), def(Score.class, uni(Score::new), Score.NAME), def(Term.class, bi(Term::new), "term"), - def(Knn.class, quad(Knn::new), "knn"), + def(Knn.class, tri(Knn::new), "knn"), def(StGeohash.class, StGeohash::new, "st_geohash"), def(StGeohashToLong.class, StGeohashToLong::new, "st_geohash_to_long"), def(StGeohashToString.class, StGeohashToString::new, "st_geohash_to_string"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index c0077d6d844bc..881c5dec51093 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.esql.expression.function.vector; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -56,14 +54,12 @@ import static java.util.Map.entry; import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; -import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; @@ -73,13 +69,12 @@ import static org.elasticsearch.xpack.esql.expression.function.FunctionUtils.resolveTypeQuery; public class Knn extends FullTextFunction implements OptionalArgument, VectorFunction, PostAnalysisPlanVerificationAware { - private final Logger log = LogManager.getLogger(getClass()); public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); private final Expression field; // k is not serialized as it's already included in the query builder on the rewrite step before being sent to data nodes - private final transient Expression k; + private final transient Integer k; private final Expression options; // Expressions to be used as prefilters in knn query private final List filterExpressions; @@ -107,13 +102,6 @@ public Knn( type = { "dense_vector" }, description = "Vector value to find top nearest neighbours for." ) Expression query, - @Param( - name = "k", - type = { "integer" }, - description = "The number of nearest neighbors to return from each shard. " - + "Elasticsearch collects k results from each shard, then merges them to find the global top results. " - + "This value must be less than or equal to num_candidates." - ) Expression k, @MapParam( name = "options", params = { @@ -125,12 +113,13 @@ public Knn( + "Defaults to 1.0." ), @MapParam.MapParamEntry( - name = "num_candidates", + name = "min_candidates", type = "integer", valueHint = { "10" }, - description = "The number of nearest neighbor candidates to consider per shard while doing knn search. " - + "Cannot exceed 10,000. Increasing num_candidates tends to improve the accuracy of the final results. " - + "Defaults to 1.5 * k" + description = "The minimum number of nearest neighbor candidates to consider per shard while doing knn search. " + + " KNN may use a higher number of candidates in case the query can't use a approximate results. " + + "Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. " + + "Defaults to 1.5 * LIMIT used for the query." ), @MapParam.MapParamEntry( name = "similarity", @@ -152,32 +141,29 @@ public Knn( optional = true ) Expression options ) { - this(source, field, query, k, options, null, List.of()); + this(source, field, query, options, null, null, List.of()); } public Knn( Source source, Expression field, Expression query, - Expression k, Expression options, + Integer k, QueryBuilder queryBuilder, List filterExpressions ) { - super(source, query, expressionList(field, query, k, options), queryBuilder); + super(source, query, expressionList(field, query, options), queryBuilder); this.field = field; this.k = k; this.options = options; this.filterExpressions = filterExpressions; } - private static List expressionList(Expression field, Expression query, Expression k, Expression options) { + private static List expressionList(Expression field, Expression query, Expression options) { List result = new ArrayList<>(); result.add(field); result.add(query); - if (k != null) { - result.add(k); - } if (options != null) { result.add(options); } @@ -188,7 +174,7 @@ public Expression field() { return field; } - public Expression k() { + public Integer k() { return k; } @@ -207,7 +193,7 @@ public DataType dataType() { @Override protected TypeResolution resolveParams() { - return resolveField().and(resolveQuery()).and(resolveK()).and(Options.resolve(options(), source(), FOURTH, ALLOWED_OPTIONS)); + return resolveField().and(resolveQuery()).and(Options.resolve(options(), source(), THIRD, ALLOWED_OPTIONS)); } private TypeResolution resolveField() { @@ -227,14 +213,9 @@ private TypeResolution resolveQuery() { return TypeResolution.TYPE_RESOLVED; } - private TypeResolution resolveK() { - if (k == null) { - // Function has already been rewritten and included in QueryBuilder - otherwise parsing would have failed - return TypeResolution.TYPE_RESOLVED; - } - - return isType(k(), dt -> dt == INTEGER, sourceText(), THIRD, "integer").and(isFoldable(k(), sourceText(), THIRD)) - .and(isNotNull(k(), sourceText(), THIRD)); + public Knn replaceK(Integer k) { + Check.notNull(k, "k must not be null"); + return new Knn(source(), field(), query(), options(), k, queryBuilder(), filterExpressions()); } public List queryAsObject() { @@ -248,16 +229,9 @@ public List queryAsObject() { throw new EsqlIllegalArgumentException(format(null, "Query value must be a list of numbers in [{}], found [{}]", source(), query)); } - int getKIntValue() { - if (k() instanceof Literal literal) { - return (int) (Number) literal.value(); - } - throw new EsqlIllegalArgumentException(format(null, "K value must be a constant integer in [{}], found [{}]", source(), k())); - } - @Override public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { - return new Knn(source(), field(), query(), k(), options(), queryBuilder, filterExpressions()); + return new Knn(source(), field(), query(), options(), k(), queryBuilder, filterExpressions()); } @Override @@ -273,15 +247,12 @@ public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { @Override protected Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + assert k() != null : "Knn function must have a k value set before translation"; var fieldAttribute = Match.fieldAsFieldAttribute(field()); Check.notNull(fieldAttribute, "Knn must have a field attribute as the first argument"); String fieldName = getNameFromFieldAttribute(fieldAttribute); float[] queryAsFloats = queryAsFloats(); - int kValue = getKIntValue(); - - Map opts = queryOptions(); - opts.put(K_FIELD.getPreferredName(), kValue); List filterQueries = new ArrayList<>(); for (Expression filterExpression : filterExpressions()) { @@ -295,7 +266,7 @@ protected Query translate(LucenePushdownPredicates pushdownPredicates, Translato } } - return new KnnQuery(source(), fieldName, queryAsFloats, opts, filterQueries); + return new KnnQuery(source(), fieldName, queryAsFloats, k(), queryOptions(), filterQueries); } private float[] queryAsFloats() { @@ -308,7 +279,7 @@ private float[] queryAsFloats() { } public Expression withFilters(List filterExpressions) { - return new Knn(source(), field(), query(), k(), options(), queryBuilder(), filterExpressions); + return new Knn(source(), field(), query(), options(), k(), queryBuilder(), filterExpressions); } private Map queryOptions() throws InvalidArgumentException { @@ -343,8 +314,8 @@ public Expression replaceChildren(List newChildren) { source(), newChildren.get(0), newChildren.get(1), - newChildren.get(2), - newChildren.size() > 3 ? newChildren.get(3) : null, + newChildren.size() > 2 ? newChildren.get(2) : null, + k(), queryBuilder(), filterExpressions() ); @@ -352,7 +323,7 @@ public Expression replaceChildren(List newChildren) { @Override protected NodeInfo info() { - return NodeInfo.create(this, Knn::new, field(), query(), k(), options(), queryBuilder(), filterExpressions()); + return NodeInfo.create(this, Knn::new, field(), query(), options(), k(), queryBuilder(), filterExpressions()); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index dac533f872022..6f550524c5ca5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -44,6 +44,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownInferencePlan; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownJoinPastProject; import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushDownRegexExtract; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.PushLimitToKnn; import org.elasticsearch.xpack.esql.optimizer.rules.logical.RemoveStatsOverride; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateAggExpressionWithEval; import org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAggregateNestedExpressionWithEval; @@ -192,6 +193,7 @@ protected static Batch operators(boolean local) { new PruneColumns(), new PruneLiteralsInOrderBy(), new PushDownAndCombineLimits(), + new PushLimitToKnn(), new PushDownAndCombineFilters(), new PushDownConjunctionsToKnnPrefilters(), new PushDownAndCombineSample(), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java new file mode 100644 index 0000000000000..49e50cc102750 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer.rules.logical; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.util.Holder; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; +import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext; +import org.elasticsearch.xpack.esql.plan.logical.Aggregate; +import org.elasticsearch.xpack.esql.plan.logical.Filter; +import org.elasticsearch.xpack.esql.plan.logical.Limit; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.TopN; +import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; +import org.elasticsearch.xpack.esql.plan.logical.inference.Rerank; + +/** + * Traverses the logical plan and pushes down the limit to the KNN function(s) in filter expressions, so KNN can use + * it to set k if not specified. + */ +public class PushLimitToKnn extends OptimizerRules.ParameterizedOptimizerRule { + + public PushLimitToKnn() { + super(OptimizerRules.TransformDirection.DOWN); + } + + @Override + public LogicalPlan rule(Limit limit, LogicalOptimizerContext ctx) { + Holder breakerReached = new Holder<>(false); + Holder firstLimit = new Holder<>(false); + return limit.transformDown(plan -> { + if (breakerReached.get()) { + // We reached a breaker and don't want to continue processing + return plan; + } + if (plan instanceof Filter filter) { + Expression limitAppliedExpression = limitFilterExpressions(filter.condition(), limit, ctx); + if (limitAppliedExpression.equals(filter.condition()) == false) { + return filter.with(limitAppliedExpression); + } + } else if (plan instanceof Limit) { + // Break if it's not the initial limit + breakerReached.set(firstLimit.get()); + firstLimit.set(true); + } else if (plan instanceof TopN || plan instanceof Rerank || plan instanceof Aggregate) { + breakerReached.set(true); + } + + return plan; + }); + } + + /** + * Applies a limit to the filter expressions of a condition. Some filter expressions, such as KNN function, + * can be optimized by applying the limit directly to them. + */ + private Expression limitFilterExpressions(Expression condition, Limit limit, LogicalOptimizerContext ctx) { + return condition.transformDown(exp -> { + if (exp instanceof Knn knn) { + return knn.replaceK((Integer) limit.limit().fold(ctx.foldCtx())); + } + return exp; + }); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index b218b897121df..42a8d771d5879 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -32,9 +32,11 @@ public class KnnQuery extends Query { private final List filterQueries; public static final String RESCORE_OVERSAMPLE_FIELD = "rescore_oversample"; + private final Integer k; - public KnnQuery(Source source, String field, float[] query, Map options, List filterQueries) { + public KnnQuery(Source source, String field, float[] query, Integer k, Map options, List filterQueries) { super(source); + this.k = k; assert options != null; this.field = field; this.query = query; @@ -44,7 +46,6 @@ public KnnQuery(Source source, String field, float[] query, Map @Override protected QueryBuilder asBuilder() { - Integer k = (Integer) options.get(K_FIELD.getPreferredName()); Integer numCands = (Integer) options.get(NUM_CANDS_FIELD.getPreferredName()); RescoreVectorBuilder rescoreVectorBuilder = null; Float oversample = (Float) options.get(RESCORE_OVERSAMPLE_FIELD); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index 9bd4896350ca7..b0a16e2de6217 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -121,7 +121,7 @@ private static List addFunctionNamedParams(List args) { - Knn knn = new Knn(source, args.get(0), args.get(1), args.get(2), args.size() > 3 ? args.get(3) : null); + Knn knn = new Knn(source, args.get(0), args.get(1), args.size() > 2 ? args.get(2) : null); // We need to add the QueryBuilder to the match expression, as it is used to implement equals() and hashCode() and // thus test the serialization methods. But we can only do this if the parameters make sense . if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 373c8d86e6be7..f3489c22dd340 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8568,6 +8568,78 @@ public void testMultipleKnnQueriesInPrefilters() { assertTrue(secondKnnFilters.contains(firstOr.right())); } + public void testKnnImplicitLimit() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + + var query = """ + from test + | where knn(dense_vector, [0, 1, 2]) + """; + var optimized = planTypes(query); + + var limit = as(optimized, Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(1000)); + } + + public void testKnnWithLimit() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + + var query = """ + from test + | where knn(dense_vector, [0, 1, 2]) + | limit 10 + """; + var optimized = planTypes(query); + + var limit = as(optimized, Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(10)); + } + + public void testKnnWithMultipleLimits() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + + var query = """ + from test metadata _score + | where knn(dense_vector, [0, 1, 2]) + | limit 20 + | sort _score desc + | limit 10 + """; + var optimized = planTypes(query); + + var topN = as(optimized, TopN.class); + assertThat(topN.limit().fold(FoldContext.small()), equalTo(10)); + var limit = as(topN.child(), Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(20)); + } + + public void testKnnWithMultipleClauses() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + + var query = """ + from test metadata _score + | where knn(dense_vector, [0, 1, 2]) and match(keyword, "test") + | where knn(dense_vector, [1, 2, 3]) + | limit 30 + | sort _score desc + | limit 10 + """; + var optimized = planTypes(query); + + var topN = as(optimized, TopN.class); + assertThat(topN.limit().fold(FoldContext.small()), equalTo(10)); + var limit = as(topN.child(), Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(20)); + } + private LogicalPlanOptimizer getCustomRulesLogicalPlanOptimizer(List> batches) { LogicalOptimizerContext context = new LogicalOptimizerContext(EsqlTestUtils.TEST_CFG, FoldContext.small()); LogicalPlanOptimizer customOptimizer = new LogicalPlanOptimizer(context) { From 5694ae72244ac5160e0e9ccd7c08b53e27431c3d Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 14 Aug 2025 19:03:49 +0200 Subject: [PATCH 04/29] Fix KnnFunctionIT test --- .../xpack/esql/plugin/KnnFunctionIT.java | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 9ae1c980337f1..ee858f162c8f7 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -44,9 +44,10 @@ public void testKnnDefaults() { var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, %s, 10) + | WHERE knn(vector, %s) | KEEP id, floats, _score, vector | SORT _score DESC + | LIMIT 10 """, Arrays.toString(queryVector)); try (var resp = run(query)) { @@ -72,36 +73,16 @@ public void testKnnDefaults() { } } - public void testKnnOptions() { - float[] queryVector = new float[numDims]; - Arrays.fill(queryVector, 1.0f); - - var query = String.format(Locale.ROOT, """ - FROM test METADATA _score - | WHERE knn(vector, %s, 5) - | KEEP id, floats, _score, vector - | SORT _score DESC - """, Arrays.toString(queryVector)); - - try (var resp = run(query)) { - assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); - assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); - - List> valuesList = EsqlTestUtils.getValuesList(resp); - assertEquals(5, valuesList.size()); - } - } - public void testKnnNonPushedDown() { float[] queryVector = new float[numDims]; Arrays.fill(queryVector, 1.0f); - // TODO we need to decide what to do when / if user uses k for limit, as no more than k results will be returned from knn query var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, %s, 5) OR id > 10 + | WHERE knn(vector, %s) OR id > 10 | KEEP id, floats, _score, vector | SORT _score DESC + | LIMIT 10 """, Arrays.toString(queryVector)); try (var resp = run(query)) { @@ -109,8 +90,7 @@ public void testKnnNonPushedDown() { assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); List> valuesList = EsqlTestUtils.getValuesList(resp); - // K = 5, 1 more for every id > 10 - assertEquals(5 + Math.max(0, numDocs - 10 - 1), valuesList.size()); + assertEquals(10, valuesList.size()); } } @@ -121,7 +101,7 @@ public void testKnnWithPrefilters() { // We retrieve 5 from knn, but must be prefiltered with id > 5 or no result will be returned as it would be post-filtered var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, %s, 5) AND id > 5 + | WHERE knn(vector, %s) AND id > 5 | KEEP id, floats, _score, vector | SORT _score DESC | LIMIT 5 @@ -144,7 +124,8 @@ public void testKnnWithLookupJoin() { var query = String.format(Locale.ROOT, """ FROM test | LOOKUP JOIN test_lookup ON id - | WHERE KNN(lookup_vector, %s, 5) OR id > 10 + | WHERE KNN(lookup_vector, %s) OR id > 10 + | LIMIT 5 """, Arrays.toString(queryVector)); var error = expectThrows(VerificationException.class, () -> run(query)); From 3e68d71ae954a152993c1c9531109d847f24d847 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 14 Aug 2025 17:25:57 +0000 Subject: [PATCH 05/29] [CI] Auto commit changes from spotless --- .../xpack/esql/expression/function/vector/Knn.java | 4 ++-- .../xpack/esql/optimizer/rules/logical/PushLimitToKnn.java | 1 - .../org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 881c5dec51093..7cf1c8562d051 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -116,8 +116,8 @@ public Knn( name = "min_candidates", type = "integer", valueHint = { "10" }, - description = "The minimum number of nearest neighbor candidates to consider per shard while doing knn search. " + - " KNN may use a higher number of candidates in case the query can't use a approximate results. " + description = "The minimum number of nearest neighbor candidates to consider per shard while doing knn search. " + + " KNN may use a higher number of candidates in case the query can't use a approximate results. " + "Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. " + "Defaults to 1.5 * LIMIT used for the query." ), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java index 49e50cc102750..a8503c300bfbc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java @@ -16,7 +16,6 @@ import org.elasticsearch.xpack.esql.plan.logical.Limit; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.TopN; -import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.plan.logical.inference.Rerank; /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 42a8d771d5879..b633ae712dd52 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -20,7 +20,6 @@ import java.util.Objects; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; -import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; From 1b2829bb2a6cc9c60c3d7526622c637badead316 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 14 Aug 2025 20:13:51 +0200 Subject: [PATCH 06/29] Fix CSV tests --- .../src/main/resources/knn-function.csv-spec | 88 +++++++++---------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index ce8061534ddbb..173fb974f3188 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -7,7 +7,7 @@ required_capability: knn_function_v3 // tag::knn-function[] from colors metadata _score -| where knn(rgb_vector, [0, 120, 0], 10) +| where knn(rgb_vector, [0, 120, 0]) | sort _score desc, color asc // end::knn-function[] | keep color, rgb_vector @@ -33,7 +33,7 @@ knnSearchWithSimilarityOption required_capability: knn_function_v3 from colors metadata _score -| where knn(rgb_vector, [255,192,203], 140, {"similarity": 40}) +| where knn(rgb_vector, [255,192,203], {"similarity": 40}) | sort _score desc, color asc | keep color, rgb_vector ; @@ -49,10 +49,11 @@ knnHybridSearch required_capability: knn_function_v3 from colors metadata _score -| where match(color, "blue") or knn(rgb_vector, [65,105,225], 10) +| where match(color, "blue") or knn(rgb_vector, [65,105,225]) | where primary == true | sort _score desc, color asc | keep color, rgb_vector +| limit 10 ; color:text | rgb_vector:dense_vector @@ -71,7 +72,7 @@ knnWithPrefilter required_capability: knn_function_v3 from colors metadata _score -| where knn(rgb_vector, [128,128,0], 10) and (match(color, "olive") or match(color, "green")) +| where knn(rgb_vector, [128,128,0]) and (match(color, "olive") or match(color, "green")) | sort _score desc, color asc | keep color, rgb_vector ; @@ -85,7 +86,7 @@ knnWithNegatedPrefilter required_capability: knn_function_v3 from colors metadata _score -| where knn(rgb_vector, [128,128,0], 10) and not (match(color, "olive") or match(color, "chocolate")) +| where knn(rgb_vector, [128,128,0]) and not (match(color, "olive") or match(color, "chocolate")) | sort _score desc, color asc | keep color, rgb_vector | LIMIT 10 @@ -109,7 +110,7 @@ required_capability: knn_function_v3 from colors metadata _score | keep rgb_vector, color, _score -| where knn(rgb_vector, [128,255,0], 140) +| where knn(rgb_vector, [128,255,0]) | sort _score desc, color asc | keep rgb_vector | limit 5 @@ -128,7 +129,7 @@ required_capability: knn_function_v3 from colors metadata _score | drop primary -| where knn(rgb_vector, [128,250,0], 140) +| where knn(rgb_vector, [128,250,0]) | sort _score desc, color asc | keep color, rgb_vector | limit 5 @@ -147,7 +148,7 @@ required_capability: knn_function_v3 from colors metadata _score | eval composed_name = locate(color, " ") > 0 -| where knn(rgb_vector, [128,128,0], 140) +| where knn(rgb_vector, [128,128,0]) | sort _score desc, color asc | keep color, composed_name | limit 5 @@ -165,9 +166,10 @@ knnWithConjunction required_capability: knn_function_v3 from colors metadata _score -| where knn(rgb_vector, [255,255,238], 10) and hex_code like "#FFF*" +| where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*" | sort _score desc, color asc | keep color, hex_code, rgb_vector +| limit 10 ; color:text | hex_code:keyword | rgb_vector:dense_vector @@ -184,7 +186,7 @@ knnWithDisjunctionAndFiltersConjunction required_capability: knn_function_v3 from colors metadata _score -| where (knn(rgb_vector, [0,255,255], 140) or knn(rgb_vector, [128, 0, 255], 10)) and primary == true +| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true | keep color, rgb_vector, _score | sort _score desc, color asc | drop _score @@ -207,7 +209,7 @@ knnWithNegationsAndFiltersConjunction required_capability: knn_function_v3 from colors metadata _score -| where (knn(rgb_vector, [0,255,255], 140) and not(primary == true and match(color, "blue"))) +| where (knn(rgb_vector, [0,255,255]) and not(primary == true and match(color, "blue"))) | sort _score desc, color asc | keep color, rgb_vector | limit 10 @@ -231,7 +233,7 @@ required_capability: knn_function_v3 from colors metadata _score | eval composed_name = locate(color, " ") > 0 -| where knn(rgb_vector, [128,128,0], 140) and composed_name == false +| where knn(rgb_vector, [128,128,0], {"min_candidates": 100}) and composed_name == false | sort _score desc, color asc | keep color, composed_name | limit 10 @@ -254,55 +256,45 @@ testKnnWithNonPushableDisjunctions required_capability: knn_function_v3 from colors metadata _score -| where knn(rgb_vector, [128,128,0], 140, {"similarity": 30}) or length(color) > 10 +| where knn(rgb_vector, [128,128,0]) or length(color) > 10 | sort _score desc, color asc -| keep color +| keep color +| limit 10 ; color:text -olive -aqua marine -lemon chiffon -papaya whip +olive +sienna +chocolate +peru +golden rod +brown +firebrick +chartreuse +gray +green ; testKnnWithNonPushableDisjunctionsOnComplexExpressions required_capability: knn_function_v3 from colors metadata _score -| where (knn(rgb_vector, [128,128,0], 140, {"similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], 140, {"similarity": 60}) and primary == false) +| where (knn(rgb_vector, [128,128,0], {"similarity": 70}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"similarity": 60}) and primary == true) | sort _score desc, color asc | keep color, primary ; color:text | primary:boolean -olive | false -purple | false -indigo | false -; - -testKnnInStatsNonPushable -required_capability: knn_function_v3 - -from colors -| where length(color) < 10 -| stats c = count(*) where knn(rgb_vector, [128,128,255], 140) -; - -c: long -50 -; - -testKnnInStatsWithGrouping -required_capability: knn_function_v3 -required_capability: full_text_functions_in_stats_where - -from colors -| where length(color) < 10 -| stats c = count(*) where knn(rgb_vector, [128,128,255], 140) by primary -; - -c: long | primary: boolean -41 | false -9 | true +gray | true +green | true +red | true +black | true +magenta | true +yellow | true +blue | true +aqua marine | false +papaya whip | false +lemon chiffon | false +white | true +cyan | true ; From e3dd487e4fe2ca6643652710e9ac9a064bcd8d62 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 14 Aug 2025 20:14:07 +0200 Subject: [PATCH 07/29] Use min_candidates --- .../esql/expression/function/vector/Knn.java | 4 +++- .../xpack/esql/querydsl/query/KnnQuery.java | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 881c5dec51093..a21d6589011d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -79,8 +79,10 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun // Expressions to be used as prefilters in knn query private final List filterExpressions; + public static final String MIN_CANDIDATES_OPTION = "min_candidates"; + public static final Map ALLOWED_OPTIONS = Map.ofEntries( - entry(NUM_CANDS_FIELD.getPreferredName(), INTEGER), + entry(MIN_CANDIDATES_OPTION, INTEGER), entry(VECTOR_SIMILARITY_FIELD.getPreferredName(), FLOAT), entry(BOOST_FIELD.getPreferredName(), FLOAT), entry(KnnQuery.RESCORE_OVERSAMPLE_FIELD, FLOAT) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 42a8d771d5879..86b1aa2adf0c6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -12,6 +12,7 @@ import org.elasticsearch.search.vectors.RescoreVectorBuilder; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import java.util.ArrayList; import java.util.Arrays; @@ -20,8 +21,6 @@ import java.util.Objects; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; -import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; -import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; public class KnnQuery extends Query { @@ -36,6 +35,7 @@ public class KnnQuery extends Query { public KnnQuery(Source source, String field, float[] query, Integer k, Map options, List filterQueries) { super(source); + assert k != null && k > 0 : "k must be a positive integer, but was: " + k; this.k = k; assert options != null; this.field = field; @@ -46,15 +46,23 @@ public KnnQuery(Source source, String field, float[] query, Integer k, Map Date: Thu, 14 Aug 2025 20:14:52 +0200 Subject: [PATCH 08/29] Bump capability --- .../src/main/resources/knn-function.csv-spec | 28 +++++++++---------- .../xpack/esql/plugin/KnnFunctionIT.java | 2 +- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../function/vector/VectorWritables.java | 2 +- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 16 +++++------ .../function/fulltext/KnnTests.java | 2 +- .../LocalPhysicalPlanOptimizerTests.java | 18 ++++++------ .../optimizer/LogicalPlanOptimizerTests.java | 20 ++++++------- 9 files changed, 46 insertions(+), 46 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 173fb974f3188..313c39a4957ec 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -3,7 +3,7 @@ # top-n query at the shard level knnSearch -required_capability: knn_function_v3 +required_capability: knn_function_v4 // tag::knn-function[] from colors metadata _score @@ -30,7 +30,7 @@ chartreuse | [127.0, 255.0, 0.0] ; knnSearchWithSimilarityOption -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [255,192,203], {"similarity": 40}) @@ -46,7 +46,7 @@ wheat | [245.0, 222.0, 179.0] ; knnHybridSearch -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where match(color, "blue") or knn(rgb_vector, [65,105,225]) @@ -69,7 +69,7 @@ yellow | [255.0, 255.0, 0.0] ; knnWithPrefilter -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [128,128,0]) and (match(color, "olive") or match(color, "green")) @@ -83,7 +83,7 @@ green | [0.0, 128.0, 0.0] ; knnWithNegatedPrefilter -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [128,128,0]) and not (match(color, "olive") or match(color, "chocolate")) @@ -106,7 +106,7 @@ orange | [255.0, 165.0, 0.0] ; knnAfterKeep -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | keep rgb_vector, color, _score @@ -125,7 +125,7 @@ rgb_vector:dense_vector ; knnAfterDrop -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | drop primary @@ -144,7 +144,7 @@ lime | [0.0, 255.0, 0.0] ; knnAfterEval -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | eval composed_name = locate(color, " ") > 0 @@ -163,7 +163,7 @@ golden rod | true ; knnWithConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*" @@ -183,7 +183,7 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0] ; knnWithDisjunctionAndFiltersConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true @@ -206,7 +206,7 @@ yellow | [255.0, 255.0, 0.0] ; knnWithNegationsAndFiltersConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where (knn(rgb_vector, [0,255,255]) and not(primary == true and match(color, "blue"))) @@ -229,7 +229,7 @@ azure | [240.0, 255.0, 255.0] ; knnWithNonPushableConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | eval composed_name = locate(color, " ") > 0 @@ -253,7 +253,7 @@ maroon | false ; testKnnWithNonPushableDisjunctions -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [128,128,0]) or length(color) > 10 @@ -276,7 +276,7 @@ green ; testKnnWithNonPushableDisjunctionsOnComplexExpressions -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where (knn(rgb_vector, [128,128,0], {"similarity": 70}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"similarity": 60}) and primary == true) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index ee858f162c8f7..f5a6eda269f44 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -140,7 +140,7 @@ public void testKnnWithLookupJoin() { @Before public void setup() throws IOException { - assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var indexName = "test"; var client = client().admin().indices(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index a09a9177203c4..d14fc388a0507 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1254,7 +1254,7 @@ public enum Cap { /** * Support knn function */ - KNN_FUNCTION_V3(Build.current().isSnapshot()), + KNN_FUNCTION_V4(Build.current().isSnapshot()), /** * Support for the LIKE operator with a list of wildcards. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java index 4a1a2ec9386ae..45840689eb0a5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java @@ -27,7 +27,7 @@ private VectorWritables() { public static List getNamedWritables() { List entries = new ArrayList<>(); - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { entries.add(Knn.ENTRY); } if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 957b235d16323..607e48cbb2702 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -305,7 +305,7 @@ public final void test() throws Throwable { ); assumeFalse( "can't use KNN function in csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION_V3.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION_V4.capabilityName()) ); assumeFalse( "lookup join disabled for csv tests", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 32b4ccb768efe..b165bc25e54af 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1244,7 +1244,7 @@ public void testFieldBasedFullTextFunctions() throws Exception { checkFieldBasedWithNonIndexedColumn("Term", "term(text, \"cat\")", "function"); checkFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(title, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3], 10)"); } } @@ -1377,7 +1377,7 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2], 10)", "function"); } @@ -1432,7 +1432,7 @@ public void testFullTextFunctionsDisjunctions() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkWithFullTextFunctionsDisjunctions("term(title, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3], 10)"); } } @@ -1497,7 +1497,7 @@ public void testFullTextFunctionsWithNonBooleanFunctions() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(title, \"Meditation\")", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3], 10)", "function"); } } @@ -1568,7 +1568,7 @@ public void testFullTextFunctionsTargetsExistingField() throws Exception { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionTargetsExistingField("term(fist_name, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { testFullTextFunctionTargetsExistingField("knn(vector, [0, 1, 2], 10)"); } } @@ -2147,7 +2147,7 @@ public void testFullTextFunctionOptions() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", title, body, {\"%s\": %s})"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], 10, {\"%s\": %s})"); } } @@ -2235,7 +2235,7 @@ public void testFullTextFunctionsNullArgs() throws Exception { checkFullTextFunctionNullArgs("term(null, \"query\")", "first"); checkFullTextFunctionNullArgs("term(title, null)", "second"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionNullArgs("knn(null, [0, 1, 2], 10)", "first"); checkFullTextFunctionNullArgs("knn(vector, null, 10)", "second"); checkFullTextFunctionNullArgs("knn(vector, [0, 1, 2], null)", "third"); @@ -2267,7 +2267,7 @@ public void testFullTextFunctionsInStats() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("multi_match(\"Meditation\", title, body)"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionsInStats("knn(vector, [0, 1, 2], 10)"); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index b0a16e2de6217..e5298e6665691 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -52,7 +52,7 @@ public static Iterable parameters() { @Before public void checkCapability() { - assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); } private static List testCaseSuppliers() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index cd6371e4d4d5e..f2fc051fb497a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1377,7 +1377,7 @@ public void testMultiMatchOptionsPushDown() { public void testKnnOptionsPushDown() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1843,7 +1843,7 @@ public void testFullTextFunctionWithStatsBy(FullTextFunctionTestCase testCase) { } public void testKnnPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1875,7 +1875,7 @@ public void testKnnPrefilters() { } public void testKnnPrefiltersWithMultipleFilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1911,7 +1911,7 @@ public void testKnnPrefiltersWithMultipleFilters() { } public void testPushDownConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1948,7 +1948,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { } public void testPushDownNegatedConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1985,7 +1985,7 @@ public void testPushDownNegatedConjunctionsToKnnPrefilter() { } public void testNotPushDownDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -2014,7 +2014,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { } public void testNotPushDownKnnWithNonPushablePrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -2048,7 +2048,7 @@ public void testNotPushDownKnnWithNonPushablePrefilters() { } public void testPushDownComplexNegationsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -2098,7 +2098,7 @@ and NOT ((keyword == "test") or knn(dense_vector, [4, 5, 6], 10))) } public void testMultipleKnnQueriesInPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index f3489c22dd340..1d6602be798d7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8399,7 +8399,7 @@ public void testSampleNoPushDownChangePoint() { } public void testPushDownConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8419,7 +8419,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { } public void testPushDownMultipleFiltersToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8442,7 +8442,7 @@ public void testPushDownMultipleFiltersToKnnPrefilter() { } public void testNotPushDownDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8459,7 +8459,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { } public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); /* and @@ -8494,7 +8494,7 @@ public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { } public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); /* or @@ -8526,7 +8526,7 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { } public void testMultipleKnnQueriesInPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); /* and @@ -8569,7 +8569,7 @@ public void testMultipleKnnQueriesInPrefilters() { } public void testKnnImplicitLimit() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8584,7 +8584,7 @@ public void testKnnImplicitLimit() { } public void testKnnWithLimit() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8600,7 +8600,7 @@ public void testKnnWithLimit() { } public void testKnnWithMultipleLimits() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test metadata _score @@ -8620,7 +8620,7 @@ public void testKnnWithMultipleLimits() { } public void testKnnWithMultipleClauses() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test metadata _score From d92a5dda9c7f8eb2dc8e1486698f8b35228c1ff8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 14 Aug 2025 18:21:36 +0000 Subject: [PATCH 09/29] [CI] Auto commit changes from spotless --- .../elasticsearch/xpack/esql/expression/function/vector/Knn.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index dec5e95704f19..dd36d462d8d1a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -54,7 +54,6 @@ import static java.util.Map.entry; import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; -import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; From 66e3dcb2e930be5aa4288fb7e013075cdddebff1 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 13:09:05 +0200 Subject: [PATCH 10/29] Fix tests --- .../xpack/esql/analysis/AnalyzerTests.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index e94fff4c682f1..a20ea8593c07f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2344,7 +2344,7 @@ public void testDenseVectorImplicitCastingKnn() { Analyzer analyzer = analyzer(loadMapping("mapping-dense_vector.json", "vectors")); var plan = analyze(""" - from test | where knn(vector, [0.342, 0.164, 0.234], 10) + from test | where knn(vector, [0.342, 0.164, 0.234]) """, "mapping-dense_vector.json"); var limit = as(plan, Limit.class); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index b165bc25e54af..2e347c23dde36 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1245,7 +1245,7 @@ public void testFieldBasedFullTextFunctions() throws Exception { checkFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(title, \"Meditation\")"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3], 10)"); + checkFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); } } @@ -1378,7 +1378,7 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2], 10)", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2])", "function"); } } @@ -1433,7 +1433,7 @@ public void testFullTextFunctionsDisjunctions() { checkWithFullTextFunctionsDisjunctions("term(title, \"Meditation\")"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3], 10)"); + checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3])"); } } @@ -1498,7 +1498,7 @@ public void testFullTextFunctionsWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(title, \"Meditation\")", "function"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3], 10)", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3])", "function"); } } @@ -2148,7 +2148,7 @@ public void testFullTextFunctionOptions() { checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", title, body, {\"%s\": %s})"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], 10, {\"%s\": %s})"); + checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); } } @@ -2268,7 +2268,7 @@ public void testFullTextFunctionsInStats() { checkFullTextFunctionsInStats("multi_match(\"Meditation\", title, body)"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkFullTextFunctionsInStats("knn(vector, [0, 1, 2], 10)"); + checkFullTextFunctionsInStats("knn(vector, [0, 1, 2])"); } } From 71d3a4847524eddf8e950f79e28f420703b8aeb4 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 13:31:42 +0200 Subject: [PATCH 11/29] Fix min_candidates handling --- .../org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 86b1aa2adf0c6..fedddfa8bcaa4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -54,6 +54,7 @@ protected QueryBuilder asBuilder() { Float vectorSimilarity = (Float) options.get(VECTOR_SIMILARITY_FIELD.getPreferredName()); Integer minCandidates = (Integer) options.get(Knn.MIN_CANDIDATES_OPTION); int adjustedK = Math.max(k, minCandidates == null ? 0 : minCandidates); + minCandidates = minCandidates == null ? null : Math.max(minCandidates, adjustedK); KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder( field, From eb47c7bdbf51e2f41e419eb8b48c8d41bd80fe8d Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 13:31:47 +0200 Subject: [PATCH 12/29] Fix tests --- .../LocalPhysicalPlanOptimizerTests.java | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index f2fc051fb497a..b788fa164ebab 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1381,8 +1381,8 @@ public void testKnnOptionsPushDown() { String query = """ from test - | where KNN(dense_vector, [0.1, 0.2, 0.3], 5, - { "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7, "boost": 3.5 }) + | where KNN(dense_vector, [0.1, 0.2, 0.3], + { "similarity": 0.001, "min_candidates": 5000, "rescore_oversample": 7, "boost": 3.5 }) """; var analyzer = makeAnalyzer("mapping-all-types.json"); var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); @@ -1393,12 +1393,12 @@ public void testKnnOptionsPushDown() { var expectedQuery = new KnnVectorQueryBuilder( "dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, - 5, - 10, + 5000, + 5000, new RescoreVectorBuilder(7), 0.001f ).boost(3.5f); - assertThat(expectedQuery.toString(), is(planStr.get())); + assertEquals(expectedQuery.toString(), planStr.get()); } /** @@ -1847,7 +1847,7 @@ public void testKnnPrefilters() { String query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) and integer > 10 + | where knn(dense_vector, [0, 1, 2]) and integer > 10 """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -1860,12 +1860,12 @@ public void testKnnPrefilters() { query, unscore(rangeQuery("integer").gt(10)), "integer", - new Source(2, 45, "integer > 10") + new Source(2, 41, "integer > 10") ); KnnVectorQueryBuilder expectedKnnQueryBuilder = new KnnVectorQueryBuilder( "dense_vector", new float[] { 0, 1, 2 }, - 10, + 1000, null, null, null @@ -1879,7 +1879,7 @@ public void testKnnPrefiltersWithMultipleFilters() { String query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) + | where knn(dense_vector, [0, 1, 2]) | where integer > 10 | where keyword == "test" """; @@ -1901,7 +1901,7 @@ public void testKnnPrefiltersWithMultipleFilters() { KnnVectorQueryBuilder expectedKnnQueryBuilder = new KnnVectorQueryBuilder( "dense_vector", new float[] { 0, 1, 2 }, - 10, + 1000, null, null, null @@ -1915,7 +1915,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { String query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) and integer > 10 + | where knn(dense_vector, [0, 1, 2]) and integer > 10 """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -1930,13 +1930,13 @@ public void testPushDownConjunctionsToKnnPrefilter() { query, unscore(rangeQuery("integer").gt(10)), "integer", - new Source(2, 45, "integer > 10") + new Source(2, 41, "integer > 10") ); KnnVectorQueryBuilder expectedKnnQueryBuilder = new KnnVectorQueryBuilder( "dense_vector", new float[] { 0, 1, 2 }, - 10, + 1000, null, null, null @@ -1952,7 +1952,7 @@ public void testPushDownNegatedConjunctionsToKnnPrefilter() { String query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) and NOT integer > 10 + | where knn(dense_vector, [0, 1, 2]) and NOT integer > 10 """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -1967,13 +1967,13 @@ public void testPushDownNegatedConjunctionsToKnnPrefilter() { query, unscore(boolQuery().mustNot(unscore(rangeQuery("integer").gt(10)))), "integer", - new Source(2, 45, "NOT integer > 10") + new Source(2, 41, "NOT integer > 10") ); KnnVectorQueryBuilder expectedKnnQueryBuilder = new KnnVectorQueryBuilder( "dense_vector", new float[] { 0, 1, 2 }, - 10, + 1000, null, null, null @@ -1989,7 +1989,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { String query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) or integer > 10 + | where knn(dense_vector, [0, 1, 2]) or integer > 10 """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -2000,12 +2000,12 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { var queryExec = as(field.child(), EsQueryExec.class); // The disjunction should not be pushed down to the KNN query - KnnVectorQueryBuilder knnQueryBuilder = new KnnVectorQueryBuilder("dense_vector", new float[] { 0, 1, 2 }, 10, null, null, null); + KnnVectorQueryBuilder knnQueryBuilder = new KnnVectorQueryBuilder("dense_vector", new float[] { 0, 1, 2 }, 1000, null, null, null); QueryBuilder rangeQueryBuilder = wrapWithSingleQuery( query, unscore(rangeQuery("integer").gt(10)), "integer", - new Source(2, 44, "integer > 10") + new Source(2, 40, "integer > 10") ); var expectedQuery = boolQuery().should(knnQueryBuilder).should(rangeQueryBuilder); @@ -2018,7 +2018,7 @@ public void testNotPushDownKnnWithNonPushablePrefilters() { String query = """ from test - | where ((knn(dense_vector, [0, 1, 2], 10) AND integer > 10) and ((keyword == "test") or length(text) > 10)) + | where ((knn(dense_vector, [0, 1, 2]) AND integer > 10) and ((keyword == "test") or length(text) > 10)) """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -2041,7 +2041,7 @@ public void testNotPushDownKnnWithNonPushablePrefilters() { query, unscore(rangeQuery("integer").gt(10)), "integer", - new Source(2, 47, "integer > 10") + new Source(2, 43, "integer > 10") ); assertEquals(integerGtQuery.toString(), queryExec.query().toString()); @@ -2052,8 +2052,8 @@ public void testPushDownComplexNegationsToKnnPrefilter() { String query = """ from test - | where ((knn(dense_vector, [0, 1, 2], 10) or NOT integer > 10) - and NOT ((keyword == "test") or knn(dense_vector, [4, 5, 6], 10))) + | where ((knn(dense_vector, [0, 1, 2]) or NOT integer > 10) + and NOT ((keyword == "test") or knn(dense_vector, [4, 5, 6]))) """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -2073,18 +2073,18 @@ and NOT ((keyword == "test") or knn(dense_vector, [4, 5, 6], 10))) query, unscore(boolQuery().mustNot(unscore(termQuery("keyword", "test")))), "keyword", - new Source(3, 6, "NOT ((keyword == \"test\") or knn(dense_vector, [4, 5, 6], 10))") + new Source(3, 6, "NOT ((keyword == \"test\") or knn(dense_vector, [4, 5, 6]))") ); QueryBuilder notIntegerGt10 = wrapWithSingleQuery( query, unscore(boolQuery().mustNot(unscore(rangeQuery("integer").gt(10)))), "integer", - new Source(2, 46, "NOT integer > 10") + new Source(2, 42, "NOT integer > 10") ); - KnnVectorQueryBuilder firstKnn = new KnnVectorQueryBuilder("dense_vector", new float[] { 0, 1, 2 }, 10, null, null, null); - KnnVectorQueryBuilder secondKnn = new KnnVectorQueryBuilder("dense_vector", new float[] { 4, 5, 6 }, 10, null, null, null); + KnnVectorQueryBuilder firstKnn = new KnnVectorQueryBuilder("dense_vector", new float[] { 0, 1, 2 }, 1000, null, null, null); + KnnVectorQueryBuilder secondKnn = new KnnVectorQueryBuilder("dense_vector", new float[] { 4, 5, 6 }, 1000, null, null, null); firstKnn.addFilterQuery(notKeywordFilter); secondKnn.addFilterQuery(notIntegerGt10); @@ -2102,7 +2102,7 @@ public void testMultipleKnnQueriesInPrefilters() { String query = """ from test - | where ((knn(dense_vector, [0, 1, 2], 10) or integer > 10) and ((keyword == "test") or knn(dense_vector, [4, 5, 6], 10))) + | where ((knn(dense_vector, [0, 1, 2]) or integer > 10) and ((keyword == "test") or knn(dense_vector, [4, 5, 6]))) """; var plan = plannerOptimizer.plan(query, IS_SV_STATS, makeAnalyzer("mapping-all-types.json")); @@ -2112,24 +2112,24 @@ public void testMultipleKnnQueriesInPrefilters() { var field = as(project.child(), FieldExtractExec.class); var queryExec = as(field.child(), EsQueryExec.class); - KnnVectorQueryBuilder firstKnnQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0, 1, 2 }, 10, null, null, null); + KnnVectorQueryBuilder firstKnnQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0, 1, 2 }, 1000, null, null, null); // Integer range query (right side of first OR) QueryBuilder integerRangeQuery = wrapWithSingleQuery( query, unscore(rangeQuery("integer").gt(10)), "integer", - new Source(2, 46, "integer > 10") + new Source(2, 42, "integer > 10") ); // Second KNN query (right side of second OR) - KnnVectorQueryBuilder secondKnnQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 4, 5, 6 }, 10, null, null, null); + KnnVectorQueryBuilder secondKnnQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 4, 5, 6 }, 1000, null, null, null); // Keyword term query (left side of second OR) QueryBuilder keywordQuery = wrapWithSingleQuery( query, unscore(termQuery("keyword", "test")), "keyword", - new Source(2, 66, "keyword == \"test\"") + new Source(2, 62, "keyword == \"test\"") ); // First OR (knn1 OR integer > 10) From 3daf9534c08ddc038d8bc7fe0881772c49c8baae Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 14:18:06 +0200 Subject: [PATCH 13/29] Add tests --- .../LocalPhysicalPlanOptimizerTests.java | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index b788fa164ebab..610e73655b703 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1401,6 +1401,84 @@ public void testKnnOptionsPushDown() { assertEquals(expectedQuery.toString(), planStr.get()); } + public void testKnnUsesLimitForK() { + assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + String query = """ + from test + | where KNN(dense_vector, [0.1, 0.2, 0.3]) + | limit 10 + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); + + AtomicReference planStr = new AtomicReference<>(); + plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); + + var expectedQuery = new KnnVectorQueryBuilder( + "dense_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + 10, + null, + null, + null + ); + assertEquals(expectedQuery.toString(), planStr.get()); + } + + public void testKnnKAndMinCandidatesLowerK() { + assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + String query = """ + from test + | where KNN(dense_vector, [0.1, 0.2, 0.3], {"min_candidates": 50}) + | limit 10 + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); + + AtomicReference planStr = new AtomicReference<>(); + plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); + + var expectedQuery = new KnnVectorQueryBuilder( + "dense_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + 50, + 50, + null, + null + ); + assertEquals(expectedQuery.toString(), planStr.get()); + } + + public void testKnnKAndMinCandidatesHigherK() { + assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + String query = """ + from test + | where KNN(dense_vector, [0.1, 0.2, 0.3], {"min_candidates": 10}) + | limit 50 + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); + + AtomicReference planStr = new AtomicReference<>(); + plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); + + var expectedQuery = new KnnVectorQueryBuilder( + "dense_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + 50, + 50, + null, + null + ); + assertEquals(expectedQuery.toString(), planStr.get()); + } + /** * Expecting * LimitExec[1000[INTEGER]] From db5c0182de5a0e5dd1184871ed4ffb12cc3d6d89 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 14:35:49 +0200 Subject: [PATCH 14/29] Fix tests --- .../qa/testFixtures/src/main/resources/knn-function.csv-spec | 4 ++-- .../org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index b171ec6f7f0fe..fed41a41b7e75 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -275,11 +275,11 @@ gray green ; -testKnnWithNonPushableDisjunctionsOnComplexExpressions +testKnnWithNonPushableDisjunctionsAndMinCandidates required_capability: knn_function_v4 from colors metadata _score -| where (knn(rgb_vector, [128,128,0], {"similarity": 70}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"similarity": 60}) and primary == true) +| where (knn(rgb_vector, [128,128,0], {"min_candidates": 2}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"min_candidates": 2}) and primary == true) | sort _score desc, color asc | keep color, primary ; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 000f758d79315..428e1aa3694f1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2349,7 +2349,7 @@ public void testImplicitCasting() { public void testDenseVectorImplicitCastingKnn() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); checkDenseVectorCastingKnn("float_vector"); } @@ -2362,7 +2362,6 @@ private static void checkDenseVectorCastingKnn(String fieldName) { var limit = as(plan, Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - var field = knn.field(); var queryVector = as(knn.query(), Literal.class); assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); assertThat(queryVector.value(), equalTo(List.of(0.342f, 0.164f, 0.234f))); From 952a7c92819100c2e9d8b5196d5f12e6559c615d Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 14:36:24 +0200 Subject: [PATCH 15/29] Spotless --- .../LocalPhysicalPlanOptimizerTests.java | 27 +++---------------- 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index fb30c5407cd37..d1bb7aeaa166a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1415,14 +1415,7 @@ public void testKnnUsesLimitForK() { AtomicReference planStr = new AtomicReference<>(); plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); - var expectedQuery = new KnnVectorQueryBuilder( - "dense_vector", - new float[] { 0.1f, 0.2f, 0.3f }, - 10, - null, - null, - null - ); + var expectedQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, 10, null, null, null); assertEquals(expectedQuery.toString(), planStr.get()); } @@ -1441,14 +1434,7 @@ public void testKnnKAndMinCandidatesLowerK() { AtomicReference planStr = new AtomicReference<>(); plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); - var expectedQuery = new KnnVectorQueryBuilder( - "dense_vector", - new float[] { 0.1f, 0.2f, 0.3f }, - 50, - 50, - null, - null - ); + var expectedQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, 50, 50, null, null); assertEquals(expectedQuery.toString(), planStr.get()); } @@ -1467,14 +1453,7 @@ public void testKnnKAndMinCandidatesHigherK() { AtomicReference planStr = new AtomicReference<>(); plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); - var expectedQuery = new KnnVectorQueryBuilder( - "dense_vector", - new float[] { 0.1f, 0.2f, 0.3f }, - 50, - 50, - null, - null - ); + var expectedQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, 50, 50, null, null); assertEquals(expectedQuery.toString(), planStr.get()); } From f499025df91727ae10af0da9bbc65636a333634c Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 15:31:54 +0200 Subject: [PATCH 16/29] Fix tests --- .../optimizer/LogicalPlanOptimizerTests.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index de7a3d05dc6fb..f98d9b3813292 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8439,7 +8439,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { var query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) and integer > 10 + | where knn(dense_vector, [0, 1, 2]) and integer > 10 """; var optimized = planTypes(query); @@ -8459,7 +8459,7 @@ public void testPushDownMultipleFiltersToKnnPrefilter() { var query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) + | where knn(dense_vector, [0, 1, 2]) | where integer > 10 | where keyword == "test" """; @@ -8482,7 +8482,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { var query = """ from test - | where knn(dense_vector, [0, 1, 2], 10) or integer > 10 + | where knn(dense_vector, [0, 1, 2]) or integer > 10 """; var optimized = planTypes(query); @@ -8512,7 +8512,7 @@ public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { var query = """ from test | where - ((knn(dense_vector, [0, 1, 2], 10) or integer > 10) and keyword == "test") and ((short < 5) or (double > 5.0)) + ((knn(dense_vector, [0, 1, 2]) or integer > 10) and keyword == "test") and ((short < 5) or (double > 5.0)) """; var optimized = planTypes(query); @@ -8547,7 +8547,7 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { var query = """ from test | where - ((knn(dense_vector, [0, 1, 2], 10) and integer > 10) or keyword == "test") or ((short < 5) and (double > 5.0)) + ((knn(dense_vector, [0, 1, 2]) and integer > 10) or keyword == "test") or ((short < 5) and (double > 5.0)) """; var optimized = planTypes(query); @@ -8575,7 +8575,7 @@ public void testMultipleKnnQueriesInPrefilters() { */ var query = """ from test - | where ((knn(dense_vector, [0, 1, 2], 10) or integer > 10) and ((keyword == "test") or knn(dense_vector, [4, 5, 6], 10))) + | where ((knn(dense_vector, [0, 1, 2]) or integer > 10) and ((keyword == "test") or knn(dense_vector, [4, 5, 6]))) """; var optimized = planTypes(query); @@ -8662,18 +8662,20 @@ public void testKnnWithMultipleClauses() { from test metadata _score | where knn(dense_vector, [0, 1, 2]) and match(keyword, "test") | where knn(dense_vector, [1, 2, 3]) - | limit 30 - | sort _score desc + | sort _score | limit 10 """; var optimized = planTypes(query); var topN = as(optimized, TopN.class); assertThat(topN.limit().fold(FoldContext.small()), equalTo(10)); - var limit = as(topN.child(), Limit.class); - var filter = as(limit.child(), Filter.class); - var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(20)); + var filter = as(topN.child(), Filter.class); + var firstAnd = as(filter.condition(), And.class); + var fistKnn = as(firstAnd.right(), Knn.class); + assertThat(((Literal) fistKnn.query()).value(), is(List.of(1.0f, 2.0f, 3.0f))); + var secondAnd = as(firstAnd.left(), And.class); + var secondKnn = as(secondAnd.left(), Knn.class); + assertThat(((Literal) secondKnn.query()).value(), is(List.of(0.0f, 1.0f, 2.0f))); } private LogicalPlanOptimizer getCustomRulesLogicalPlanOptimizer(List> batches) { From 09e02da1513b1478b12e65ff04f083f5936f894e Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 16:38:02 +0200 Subject: [PATCH 17/29] Fix generated docs --- .../query-languages/esql/kibana/definition/functions/knn.json | 2 +- .../reference/query-languages/esql/kibana/docs/functions/knn.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json index d347891393dcf..f4b77305a200b 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json @@ -5,7 +5,7 @@ "description" : "Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors.", "signatures" : [ ], "examples" : [ - "from colors metadata _score\n| where knn(rgb_vector, [0, 120, 0], 10)\n| sort _score desc, color asc" + "from colors metadata _score\n| where knn(rgb_vector, [0, 120, 0])\n| sort _score desc, color asc" ], "preview" : true, "snapshot_only" : true diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/knn.md b/docs/reference/query-languages/esql/kibana/docs/functions/knn.md index f32319b080dbb..bea09b0bf50de 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/knn.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/knn.md @@ -5,6 +5,6 @@ Finds the k nearest vectors to a query vector, as measured by a similarity metri ```esql from colors metadata _score -| where knn(rgb_vector, [0, 120, 0], 10) +| where knn(rgb_vector, [0, 120, 0]) | sort _score desc, color asc ``` From 6cbf31abcf9a40c51875a61b6c05d376931df620 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 17:07:57 +0200 Subject: [PATCH 18/29] Add docs and fix equals / hashCode --- .../expression/function/fulltext/FullTextFunction.java | 7 +++++++ .../xpack/esql/expression/function/vector/Knn.java | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index 3e0e71ff9da8a..c9e23fdd29387 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -389,7 +389,14 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvalua return new LuceneQueryExpressionEvaluator.Factory(shardConfigs); } + /** + * Returns the query builder to be used when the function cannot be pushed down to Lucene, but uses a + * {@link org.elasticsearch.compute.lucene.LuceneQueryEvaluator} instead + * + * @return the query builder to be used in the {@link org.elasticsearch.compute.lucene.LuceneQueryEvaluator} + */ protected QueryBuilder evaluatorQueryBuilder() { + // Use the same query builder as for the translation by default return queryBuilder(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 3a2f39e0a5662..cc9b1a421bde9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -292,7 +292,8 @@ private Map queryOptions() throws InvalidArgumentException { } protected QueryBuilder evaluatorQueryBuilder() { - // Either we couldn't push down due to non-pushable filters, or becauses it's part of a disjuncion. Use exact query. + // Either we couldn't push down due to non-pushable filters, or because it's part of a disjuncion. + // Uses a nearest neighbors exact query instead of an approximate one var fieldAttribute = Match.fieldAsFieldAttribute(field()); Check.notNull(fieldAttribute, "Knn must have a field attribute as the first argument"); String fieldName = getNameFromFieldAttribute(fieldAttribute); @@ -359,13 +360,12 @@ public boolean equals(Object o) { return Objects.equals(field(), knn.field()) && Objects.equals(query(), knn.query()) && Objects.equals(queryBuilder(), knn.queryBuilder()) - && Objects.equals(k(), knn.k()) && Objects.equals(filterExpressions(), knn.filterExpressions()); } @Override public int hashCode() { - return Objects.hash(field(), query(), queryBuilder(), k(), filterExpressions()); + return Objects.hash(field(), query(), queryBuilder(), filterExpressions()); } } From e55ff4836a97751043acc50fb282f1653b5faeee Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 19:06:42 +0200 Subject: [PATCH 19/29] equals / hashCode were good as they were --- .../xpack/esql/expression/function/vector/Knn.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index cc9b1a421bde9..76863e9834c9e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -360,12 +360,13 @@ public boolean equals(Object o) { return Objects.equals(field(), knn.field()) && Objects.equals(query(), knn.query()) && Objects.equals(queryBuilder(), knn.queryBuilder()) + && Objects.equals(k(), knn.k()) && Objects.equals(filterExpressions(), knn.filterExpressions()); } @Override public int hashCode() { - return Objects.hash(field(), query(), queryBuilder(), filterExpressions()); + return Objects.hash(field(), query(), queryBuilder(), k(), filterExpressions()); } } From 9fa0fd62de39404929c03e695dfcc1f290098ed1 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Thu, 28 Aug 2025 19:06:45 +0200 Subject: [PATCH 20/29] Fix tests --- .../org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 8eda8c27207f1..21ec240d9f8f4 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -114,9 +114,10 @@ public void testKnnOptions() { var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, %s, 5) + | WHERE knn(vector, %s) | KEEP id, _score, vector | SORT _score DESC + | LIMIT 5 """, Arrays.toString(queryVector)); try (var resp = run(query)) { @@ -137,7 +138,7 @@ public void testKnnNonPushedDown() { | WHERE knn(vector, %s) OR id > 100 | KEEP id, _score, vector | SORT _score DESC - | LIMIT 10 + | LIMIT 5 """, Arrays.toString(queryVector)); try (var resp = run(query)) { From a921df50137106ef2984f5afe596274345190729 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Fri, 29 Aug 2025 09:34:08 +0200 Subject: [PATCH 21/29] Fix docs --- .../esql/_snippets/functions/examples/knn.md | 2 +- .../esql/_snippets/functions/functionNamedParams/knn.md | 6 +++--- .../esql/_snippets/functions/parameters/knn.md | 3 --- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/knn.md b/docs/reference/query-languages/esql/_snippets/functions/examples/knn.md index df15bde7deb55..9b6d20b551e7a 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/examples/knn.md +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/knn.md @@ -4,7 +4,7 @@ ```esql from colors metadata _score -| where knn(rgb_vector, [0, 120, 0], 10) +| where knn(rgb_vector, [0, 120, 0]) | sort _score desc, color asc ``` diff --git a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md index 1e87271707676..f38a8e8d84584 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md +++ b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md @@ -2,12 +2,12 @@ **Supported function named parameters** -`num_candidates` -: (integer) The number of nearest neighbor candidates to consider per shard while doing knn search. Cannot exceed 10,000. Increasing num_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * k - `boost` : (float) Floating point number used to decrease or increase the relevance scores of the query.Defaults to 1.0. +`min_candidates` +: (integer) The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * LIMIT used for the query. + `rescore_oversample` : (double) Applies the specified oversampling for rescoring quantized vectors. See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details. diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md index e33acabbd014f..fb1b98a1e8a7a 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md @@ -8,9 +8,6 @@ `query` : Vector value to find top nearest neighbours for. -`k` -: The number of nearest neighbors to return from each shard. Elasticsearch collects k results from each shard, then merges them to find the global top results. This value must be less than or equal to num_candidates. - `options` : (Optional) kNN additional options as [function named parameters](/reference/query-languages/esql/esql-syntax.md#esql-function-named-params). See [knn query](/reference/query-languages/query-dsl/query-dsl-match-query.md#query-dsl-knn-query) for more information. From 9d3c85f4fd73f55dc737d1d1ad965bc7d7a6f821 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 13:19:19 +0200 Subject: [PATCH 22/29] Verify that knn has a limit --- .../esql/expression/function/vector/Knn.java | 22 ++++++- .../AbstractLogicalPlanOptimizerTests.java | 3 +- .../optimizer/LogicalPlanOptimizerTests.java | 58 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 76863e9834c9e..1e4ffeee5cfbe 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -15,7 +15,9 @@ import org.elasticsearch.search.vectors.VectorData; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; +import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; +import org.elasticsearch.xpack.esql.common.Failure; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -67,7 +69,12 @@ import static org.elasticsearch.xpack.esql.expression.Foldables.TypeResolutionValidator.forPreOptimizationValidation; import static org.elasticsearch.xpack.esql.expression.Foldables.resolveTypeQuery; -public class Knn extends FullTextFunction implements OptionalArgument, VectorFunction, PostAnalysisPlanVerificationAware { +public class Knn extends FullTextFunction + implements + OptionalArgument, + VectorFunction, + PostAnalysisPlanVerificationAware, + PostOptimizationVerificationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); @@ -310,6 +317,17 @@ public BiConsumer postAnalysisPlanVerification() { }; } + @Override + public void postOptimizationVerification(Failures failures) { + // Check that a k has been set + if (k() == null) { + failures.add(Failure.fail( + this, + "Knn function must be used with a LIMIT clause after it to set the number of nearest neighbors to find" + )); + } + } + @Override public Expression replaceChildren(List newChildren) { return new Knn( @@ -369,4 +387,6 @@ public int hashCode() { return Objects.hash(field(), query(), queryBuilder(), k(), filterExpressions()); } + + } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java index 0a42b1962bfe1..35c75d99ab925 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java @@ -32,6 +32,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.EsqlTestUtils.unboundLogicalOptimizerContext; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultInferenceResolution; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultLookupResolution; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; @@ -118,7 +119,7 @@ public static void init() { new EsqlFunctionRegistry(), getIndexResultTypes, enrichResolution, - emptyInferenceResolution() + defaultInferenceResolution() ), TEST_VERIFIER ); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index f98d9b3813292..3b0112033b78b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8635,6 +8635,23 @@ public void testKnnWithLimit() { assertThat(knn.k(), equalTo(10)); } + public void testKnnWithTopN() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + var query = """ + from test metadata _score + | where knn(dense_vector, [0, 1, 2]) + | sort _score desc + | limit 10 + """; + var optimized = planTypes(query); + + var topN = as(optimized, TopN.class); + var filter = as(topN.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(10)); + } + public void testKnnWithMultipleLimits() { assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); @@ -8678,6 +8695,47 @@ public void testKnnWithMultipleClauses() { assertThat(((Literal) secondKnn.query()).value(), is(List.of(0.0f, 1.0f, 2.0f))); } + public void testKnnWithStats() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + assertThat( + typesError("from test | where knn(dense_vector, [0, 1, 2]) | stats c = count(*)"), + containsString("Knn function must be used with a LIMIT clause") + ); + } + + public void testKnnWithRerankAmdTopN() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + assertThat(typesError(""" + from test metadata _score + | where knn(dense_vector, [0, 1, 2]) + | rerank "some text" on text with { "inference_id" : "reranking-inference-id" } + | sort _score desc + | limit 10 + """), containsString("Knn function must be used with a LIMIT clause")); + } + + public void testKnnWithRerankAmdLimit() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + var query = """ + from test metadata _score + | where knn(dense_vector, [0, 1, 2]) + | rerank "some text" on text with { "inference_id" : "reranking-inference-id" } + | limit 100 + """; + + var optimized = planTypes(query); + + var rerank = as(optimized, Rerank.class); + var limit = as(rerank.child(), Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), equalTo(100)); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(100)); + } + private LogicalPlanOptimizer getCustomRulesLogicalPlanOptimizer(List> batches) { LogicalOptimizerContext context = new LogicalOptimizerContext(EsqlTestUtils.TEST_CFG, FoldContext.small()); LogicalPlanOptimizer customOptimizer = new LogicalPlanOptimizer(context) { From e95a5bcb2e1cea3e81509e81916dedcb826d471c Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 13:19:24 +0200 Subject: [PATCH 23/29] Fix tests --- .../org/elasticsearch/xpack/esql/analysis/VerifierTests.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index f759e1d794d4b..9d7af97a51c35 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2258,9 +2258,8 @@ public void testFullTextFunctionsNullArgs() throws Exception { checkFullTextFunctionNullArgs("term(title, null)", "second"); } if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { - checkFullTextFunctionNullArgs("knn(null, [0, 1, 2], 10)", "first"); - checkFullTextFunctionNullArgs("knn(vector, null, 10)", "second"); - checkFullTextFunctionNullArgs("knn(vector, [0, 1, 2], null)", "third"); + checkFullTextFunctionNullArgs("knn(null, [0, 1, 2])", "first"); + checkFullTextFunctionNullArgs("knn(vector, null)", "second"); } } From d11d074dba3b628ec006bef89d2e45ff61a4ad64 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 13:22:21 +0200 Subject: [PATCH 24/29] Spotless --- .../xpack/esql/expression/function/vector/Knn.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 1e4ffeee5cfbe..9add14da034b5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -321,10 +321,9 @@ public BiConsumer postAnalysisPlanVerification() { public void postOptimizationVerification(Failures failures) { // Check that a k has been set if (k() == null) { - failures.add(Failure.fail( - this, - "Knn function must be used with a LIMIT clause after it to set the number of nearest neighbors to find" - )); + failures.add( + Failure.fail(this, "Knn function must be used with a LIMIT clause after it to set the number of nearest neighbors to find") + ); } } @@ -387,6 +386,4 @@ public int hashCode() { return Objects.hash(field(), query(), queryBuilder(), k(), filterExpressions()); } - - } From af3296c84776ffeb36d4b25ed058d094a904c47a Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 13:36:38 +0200 Subject: [PATCH 25/29] Add CSV tests for stats / rerank --- .../src/main/resources/knn-function.csv-spec | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index fed41a41b7e75..f8339828a1376 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -298,3 +298,41 @@ lemon chiffon | false white | true cyan | true ; + +testKnnWithStats +required_capability: knn_function_v4 + +from colors metadata _score +| where knn(rgb_vector, [128,128,0]) +| sort _score desc, color asc +| limit 15 +| stats c = count(*) +; + +c:long +15 +; + +testKnnWithRerank +required_capability: knn_function_v4 + +from colors metadata _score +| where knn(rgb_vector, [128,128,0]) +| sort _score desc, color asc +| limit 10 +| rerank "deepest blue" ON color WITH { "inference_id" : "test_reranker" } +| keep color +; + +color:text +olive +sienna +chocolate +peru +golden rod +brown +firebrick +chartreuse +green +maroon +; From e9ed8e579c3f5058e3df15de862b731d2d79dfc0 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 14:02:23 +0200 Subject: [PATCH 26/29] Fix CSV test --- .../qa/testFixtures/src/main/resources/knn-function.csv-spec | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index f8339828a1376..b95d3463e11b2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -315,6 +315,7 @@ c:long testKnnWithRerank required_capability: knn_function_v4 +required_capability: rerank from colors metadata _score | where knn(rgb_vector, [128,128,0]) From 9fe7f90b3edfad80a182d64606e5fb08b10d46d9 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 17:27:31 +0200 Subject: [PATCH 27/29] Fix rerank test --- .../src/main/resources/knn-function.csv-spec | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index b95d3463e11b2..e48f666eb2f68 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -318,7 +318,7 @@ required_capability: knn_function_v4 required_capability: rerank from colors metadata _score -| where knn(rgb_vector, [128,128,0]) +| where knn(rgb_vector, [100,120,0]) | sort _score desc, color asc | limit 10 | rerank "deepest blue" ON color WITH { "inference_id" : "test_reranker" } @@ -328,12 +328,12 @@ from colors metadata _score color:text olive sienna -chocolate -peru -golden rod +green brown +chocolate firebrick -chartreuse -green +peru maroon +golden rod +gray ; From 8fe374d4165be4ee870898d6045b93d8ebb53f60 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 17:34:19 +0200 Subject: [PATCH 28/29] Improve rerank test --- .../src/main/resources/knn-function.csv-spec | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index e48f666eb2f68..7a0e854f63f90 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -321,19 +321,20 @@ from colors metadata _score | where knn(rgb_vector, [100,120,0]) | sort _score desc, color asc | limit 10 -| rerank "deepest blue" ON color WITH { "inference_id" : "test_reranker" } +| rerank rerank_score = "deepest blue" ON color WITH { "inference_id" : "test_reranker" } +| sort rerank_score desc, color asc | keep color ; color:text +gray +peru +brown +green olive +maroon sienna -green -brown chocolate firebrick -peru -maroon golden rod -gray ; From c4f3da716aebb183daf44bf8430ecba573c38063 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 12:01:43 +0200 Subject: [PATCH 29/29] Add test for multiple limits combination --- .../optimizer/LogicalPlanOptimizerTests.java | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 7d28cb3af5e76..c3c4a9e3f1038 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8716,7 +8716,7 @@ public void testKnnWithTopN() { assertThat(knn.k(), equalTo(10)); } - public void testKnnWithMultipleLimits() { + public void testKnnWithMultipleLimitsAfterTopN() { assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ @@ -8736,6 +8736,24 @@ public void testKnnWithMultipleLimits() { assertThat(knn.k(), equalTo(20)); } + public void testKnnWithMultipleLimitsCombined() { + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + + var query = """ + from test metadata _score + | where knn(dense_vector, [0, 1, 2]) + | limit 20 + | limit 10 + """; + var optimized = planTypes(query); + + var limit = as(optimized, Limit.class); + assertThat(limit.limit().fold(FoldContext.small()), equalTo(10)); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + assertThat(knn.k(), equalTo(10)); + } + public void testKnnWithMultipleClauses() { assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled());