From 6f4d01195e7da79af54d8f2c9bdd6963dac5fa71 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 24 Apr 2025 13:00:47 +0200 Subject: [PATCH 01/64] Add dense_vector field type support --- .../xpack/esql/core/plugin/EsqlCorePlugin.java | 1 + .../org/elasticsearch/xpack/esql/core/type/DataType.java | 7 +++++-- .../java/org/elasticsearch/xpack/esql/EsqlTestUtils.java | 2 +- .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 8 +++++++- .../xpack/esql/action/PositionToXContent.java | 2 +- .../xpack/esql/action/ResponseValueUtils.java | 2 +- .../esql/expression/function/scalar/nulls/Coalesce.java | 2 +- .../xpack/esql/planner/LocalExecutionPlanner.java | 2 +- .../elasticsearch/xpack/esql/planner/PlannerUtils.java | 4 ++-- 9 files changed, 20 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java index 4bd722992f524..b9f55f7985e84 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java @@ -14,4 +14,5 @@ public class EsqlCorePlugin extends Plugin implements ExtensiblePlugin { public static final FeatureFlag AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG = new FeatureFlag("esql_aggregate_metric_double"); + public static final FeatureFlag DENSE_VECTOR_FEATURE_FLAG = new FeatureFlag("esql_dense_vector"); } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 596eda6f0a3f7..c8475286dce70 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -302,7 +302,9 @@ public enum DataType { */ PARTIAL_AGG(builder().esType("partial_agg").unknownSize()), - AGGREGATE_METRIC_DOUBLE(builder().esType("aggregate_metric_double").estimatedSize(Double.BYTES * 3 + Integer.BYTES)); + AGGREGATE_METRIC_DOUBLE(builder().esType("aggregate_metric_double").estimatedSize(Double.BYTES * 3 + Integer.BYTES)), + + DENSE_VECTOR(builder().esType("dense_vector").unknownSize()); /** * Types that are actively being built. These types are not returned @@ -311,7 +313,8 @@ public enum DataType { * check that sending them to a function produces a sane error message. */ public static final Map UNDER_CONSTRUCTION = Map.ofEntries( - Map.entry(AGGREGATE_METRIC_DOUBLE, EsqlCorePlugin.AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG) + Map.entry(AGGREGATE_METRIC_DOUBLE, EsqlCorePlugin.AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG), + Map.entry(DENSE_VECTOR, EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG) ); private final String typeName; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 477d6a7651074..ad4a1b89db40d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -837,7 +837,7 @@ public static Literal randomLiteral(DataType type) { throw new UncheckedIOException(e); } } - case UNSUPPORTED, OBJECT, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG -> throw new IllegalArgumentException( + case UNSUPPORTED, OBJECT, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG, DENSE_VECTOR -> throw new IllegalArgumentException( "can't make random values for [" + type.typeName() + "]" ); }, type); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3551ceeb3c068..3aa24f05bce17 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -20,6 +20,7 @@ import java.util.Set; import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG; +import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG; /** * A {@link Set} of "capabilities" supported by the {@link RestEsqlQueryAction} @@ -1038,7 +1039,12 @@ public enum Cap { /** * Support for the SAMPLE command */ - SAMPLE(Build.current().isSnapshot()); + SAMPLE(Build.current().isSnapshot()), + + /** + * Support for dense_vector field type + */ + DENSE_VECTOR_SUPPORT(DENSE_VECTOR_FEATURE_FLAG); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java index 597b3021a1a13..19a61566adf67 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java @@ -183,7 +183,7 @@ protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Pa } }; case DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, SHORT, BYTE, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT, - PARTIAL_AGG -> throw new IllegalArgumentException("can't convert values of type [" + columnInfo.type() + "]"); + PARTIAL_AGG, DENSE_VECTOR -> throw new IllegalArgumentException("can't convert values of type [" + columnInfo.type() + "]"); }; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java index 4fd554582b56b..b8da39a376350 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java @@ -148,7 +148,7 @@ private static Object valueAt(DataType dataType, Block block, int offset, BytesR } } case SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, NULL, - PARTIAL_AGG -> throw EsqlIllegalArgumentException.illegalDataType(dataType); + PARTIAL_AGG, DENSE_VECTOR -> throw EsqlIllegalArgumentException.illegalDataType(dataType); }; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java index 411aff6f52ab3..c8005506be627 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java @@ -210,7 +210,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { .toEvaluator(toEvaluator, children()); case NULL -> EvalOperator.CONSTANT_NULL_FACTORY; case UNSUPPORTED, SHORT, BYTE, DATE_PERIOD, OBJECT, DOC_DATA_TYPE, SOURCE, TIME_DURATION, FLOAT, HALF_FLOAT, TSID_DATA_TYPE, - SCALED_FLOAT, PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE -> throw new UnsupportedOperationException( + SCALED_FLOAT, PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE, DENSE_VECTOR -> throw new UnsupportedOperationException( dataType() + " can’t be coalesced" ); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 8ef7d43b28d4b..9793670c6a265 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -433,7 +433,7 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte case GEO_POINT, CARTESIAN_POINT, GEO_SHAPE, CARTESIAN_SHAPE, COUNTER_LONG, COUNTER_INTEGER, COUNTER_DOUBLE, SOURCE, AGGREGATE_METRIC_DOUBLE -> TopNEncoder.DEFAULT_UNSORTABLE; // unsupported fields are encoded as BytesRef, we'll use the same encoder; all values should be null at this point - case PARTIAL_AGG, UNSUPPORTED -> TopNEncoder.UNSUPPORTED; + case PARTIAL_AGG, DENSE_VECTOR, UNSUPPORTED -> TopNEncoder.UNSUPPORTED; }; } List orders = topNExec.order().stream().map(order -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index df2da82e3bee0..05af2710a62a2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -299,8 +299,8 @@ public static ElementType toElementType(DataType dataType, MappedFieldType.Field case GEO_POINT, CARTESIAN_POINT -> fieldExtractPreference == DOC_VALUES ? ElementType.LONG : ElementType.BYTES_REF; case GEO_SHAPE, CARTESIAN_SHAPE -> fieldExtractPreference == EXTRACT_SPATIAL_BOUNDS ? ElementType.INT : ElementType.BYTES_REF; case PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE -> ElementType.COMPOSITE; - case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT -> throw EsqlIllegalArgumentException - .illegalDataType(dataType); + case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT, DENSE_VECTOR -> + throw EsqlIllegalArgumentException.illegalDataType(dataType); }; } From 3c4c4010b06b58d64adaf7c53374fe8239b57afd Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 24 Apr 2025 13:18:33 +0200 Subject: [PATCH 02/64] Knn function minimal support --- .../xpack/esql/plugin/KnnFunctionIT.java | 85 ++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../esql/expression/ExpressionWritables.java | 10 ++ .../function/EsqlFunctionRegistry.java | 4 +- .../esql/expression/function/vector/Knn.java | 153 ++++++++++++++++++ .../xpack/esql/querydsl/query/KnnQuery.java | 50 ++++++ 6 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java new file mode 100644 index 0000000000000..680f66fcaea7b --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +public class KnnFunctionIT extends AbstractEsqlIntegTestCase { + + private final Map> indexedVectors = new HashMap<>(); + + public void testKnn() { + var query = """ + FROM test + | WHERE knn(vector, [1.0, 2.0, 3.0]) + | KEEP id, floats + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "floats")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + } + } + + @Before + public void setup() throws IOException { + var indexName = "test"; + var client = client().admin().indices(); + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("id") + .field("type", "integer") + .endObject() + .startObject("vector") + .field("type", "dense_vector") + .field("similarity", "l2_norm") + .endObject() + .startObject("floats") + .field("type", "float") + .endObject() + .endObject() + .endObject(); + + Settings.Builder settingsBuilder = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1); + + var CreateRequest = client.prepareCreate(indexName).setMapping(mapping).setSettings(settingsBuilder.build()); + assertAcked(CreateRequest); + + int numDocs = randomIntBetween(10, 100); + int numDims = 3; + IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; + float value = 0.0f; + for (int i = 0; i < numDocs; i++) { + List vector = new ArrayList<>(numDims); + for (int j = 0; j < numDims; j++) { + vector.add(value++); + } + docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector); + indexedVectors.put(i, vector); + } + + indexRandom(true, docs); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3aa24f05bce17..ac2316ce36d77 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1044,7 +1044,12 @@ public enum Cap { /** * Support for dense_vector field type */ - DENSE_VECTOR_SUPPORT(DENSE_VECTOR_FEATURE_FLAG); + DENSE_VECTOR_SUPPORT(DENSE_VECTOR_FEATURE_FLAG), + + /** + * Support knn function + */ + KNN_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 4ffa778b8287a..fe989f43dfc96 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.expression; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.expression.ExpressionCoreWritables; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateWritables; @@ -73,6 +74,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Trim; import org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLike; import org.elasticsearch.xpack.esql.expression.function.scalar.util.Delay; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.expression.predicate.logical.Not; import org.elasticsearch.xpack.esql.expression.predicate.nulls.IsNotNull; import org.elasticsearch.xpack.esql.expression.predicate.nulls.IsNull; @@ -105,6 +107,7 @@ public static List getNamedWriteables() { entries.addAll(binaryComparisons()); entries.addAll(fullText()); entries.addAll(unaryScalars()); + entries.addAll(vector()); return entries; } @@ -226,4 +229,11 @@ private static List binaryComparisons() { private static List fullText() { return FullTextWritables.getNamedWriteables(); } + + private static List vector() { + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + return List.of(Knn.ENTRY); + } + return List.of(); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0bd4e4bda7c5b..ea6e928618370 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -159,6 +159,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToUpper; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Trim; import org.elasticsearch.xpack.esql.expression.function.scalar.util.Delay; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.parser.ParsingException; import org.elasticsearch.xpack.esql.session.Configuration; @@ -446,7 +447,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(MaxOverTime.class, uni(MaxOverTime::new), "max_over_time"), def(AvgOverTime.class, uni(AvgOverTime::new), "avg_over_time"), def(LastOverTime.class, LastOverTime::withUnresolvedTimestamp, "last_over_time"), - def(Term.class, bi(Term::new), "term") } }; + def(Term.class, bi(Term::new), "term"), + def(Knn.class, bi(Knn::new), "knn") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java new file mode 100644 index 0000000000000..20ef6a6417a8c --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xpack.esql.capabilities.TranslationAware; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; +import org.elasticsearch.xpack.esql.core.expression.function.Function; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.Check; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; +import org.elasticsearch.xpack.esql.planner.TranslatorHandler; +import org.elasticsearch.xpack.esql.querydsl.query.KnnQuery; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; +import static org.elasticsearch.xpack.esql.expression.function.fulltext.Match.getNameFromFieldAttribute; + +public class Knn extends Function implements TranslationAware { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); + + private final Expression field; + private final Expression query; + + @FunctionInfo( + returnType = "boolean", + preview = true, + description = """ + Finds the k nearest vectors to a query vector, as measured by a similarity metric. + knn function finds nearest vectors through approximate search on indexed dense_vectors + """, + appliesTo = { + @FunctionAppliesTo( + lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT + ) } + ) + public Knn(Source source, Expression field, Expression query) { + super(source, List.of(field, query)); + this.field = field; + this.query = query; + } + + public Expression field() { + return field; + } + + public Expression query() { + return query; + } + + @Override + public DataType dataType() { + return DataType.BOOLEAN; + } + + @Override + protected final TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + return isNotNull(field(), sourceText(), FIRST).and(isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, "dense_vector")) + .and(TypeResolutions.isNumeric(query(), sourceText(), TypeResolutions.ParamOrdinal.SECOND)); + } + + @Override + public boolean translatable(LucenePushdownPredicates pushdownPredicates) { + return true; + } + + @Override + public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + var fieldAttribute = Match.fieldAsFieldAttribute(field()); + + Check.notNull(fieldAttribute, "Match must have a field attribute as the first argument"); + String fieldName = getNameFromFieldAttribute(fieldAttribute); + @SuppressWarnings("unchecked") + List queryFolded = (List) query().fold(FoldContext.small() /* TODO remove me */); + float[] queryAsFloats = new float[queryFolded.size()]; + for (int i = 0; i < queryFolded.size(); i++) { + queryAsFloats[i] = queryFolded.get(i).floatValue(); + } + return new KnnQuery(source(), fieldName, queryAsFloats); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Knn(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Knn::new, field(), query()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + private static Knn readFrom(StreamInput in) throws IOException { + Source source = Source.readFrom((PlanStreamInput) in); + Expression field = in.readNamedWriteable(Expression.class); + Expression query = in.readNamedWriteable(Expression.class); + + return new Knn(source, field, query); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field()); + out.writeNamedWriteable(query()); + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + if (super.equals(o) == false) return false; + Knn knn = (Knn) o; + return Objects.equals(field, knn.field) && Objects.equals(query, knn.query); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), field, query); + } + +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java new file mode 100644 index 0000000000000..98e91e2e80539 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.querydsl.query; + +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.xpack.esql.core.querydsl.query.Query; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.util.Arrays; +import java.util.Objects; + +public class KnnQuery extends Query { + + private final String field; + private final float[] query; + + public KnnQuery(Source source, String field, float[] query) { + super(source); + this.field = field; + this.query = query; + } + + @Override + protected QueryBuilder asBuilder() { + return new KnnVectorQueryBuilder(field, query, null, null, null, null); + } + + @Override + protected String innerToString() { + return "knn(" + field + ", " + Arrays.toString(query) + ")"; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof KnnQuery knnQuery)) return false; + if (super.equals(o) == false) return false; + return Objects.equals(field, knnQuery.field) && Objects.deepEquals(query, knnQuery.query); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), field, Arrays.hashCode(query)); + } +} From 8317911592cc849bbb2cd04c8401b89228ea8b74 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 24 Apr 2025 14:46:59 +0200 Subject: [PATCH 03/64] Add scoring --- .../xpack/esql/plugin/KnnFunctionIT.java | 35 +++++++++++++++---- .../xpack/esql/planner/PlannerUtils.java | 4 ++- .../xpack/esql/querydsl/query/KnnQuery.java | 5 +++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 680f66fcaea7b..0368a36c5a3de 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; import org.junit.Before; @@ -29,14 +30,34 @@ public class KnnFunctionIT extends AbstractEsqlIntegTestCase { public void testKnn() { var query = """ - FROM test - | WHERE knn(vector, [1.0, 2.0, 3.0]) - | KEEP id, floats + FROM test METADATA _score + | WHERE knn(vector, [1.0, 1.0, 1.0]) + | KEEP id, floats, _score, vector + | SORT _score DESC """; try (var resp = run(query)) { - assertColumnNames(resp.columns(), List.of("id", "floats")); - assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); + + List> valuesList = EsqlTestUtils.getValuesList(resp); + assertEquals(indexedVectors.size(), valuesList.size()); + for (int i = 0; i < valuesList.size(); i++) { + List row = valuesList.get(i); + // Vectors should be in order of ID, as they're less similar than the query vector as the ID increases + assertEquals(i, row.getFirst()); + @SuppressWarnings("unchecked") + // Vectors should be the same + List floats = (List)row.get(1); + for(int j = 0; j < floats.size(); j++) { + assertEquals(floats.get(j).floatValue(), indexedVectors.get(i).get(j), 0f); + } + var score = (Double) row.get(2); + assertNotNull(score); + assertTrue(score > 0.0); + // dense_vector is null for now + assertNull(row.get(3)); + } } } @@ -67,7 +88,7 @@ public void setup() throws IOException { var CreateRequest = client.prepareCreate(indexName).setMapping(mapping).setSettings(settingsBuilder.build()); assertAcked(CreateRequest); - int numDocs = randomIntBetween(10, 100); + int numDocs = 10; int numDims = 3; IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; float value = 0.0f; @@ -76,7 +97,7 @@ public void setup() throws IOException { for (int j = 0; j < numDims; j++) { vector.add(value++); } - docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector); + docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "floats", vector, "vector", vector); indexedVectors.put(i, vector); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index 05af2710a62a2..e30b2d11d0e2e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -299,7 +299,9 @@ public static ElementType toElementType(DataType dataType, MappedFieldType.Field case GEO_POINT, CARTESIAN_POINT -> fieldExtractPreference == DOC_VALUES ? ElementType.LONG : ElementType.BYTES_REF; case GEO_SHAPE, CARTESIAN_SHAPE -> fieldExtractPreference == EXTRACT_SPATIAL_BOUNDS ? ElementType.INT : ElementType.BYTES_REF; case PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE -> ElementType.COMPOSITE; - case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT, DENSE_VECTOR -> + // Can't throw IAE as this is used to estimate row size + case DENSE_VECTOR -> ElementType.NULL; + case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT -> throw EsqlIllegalArgumentException.illegalDataType(dataType); }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 98e91e2e80539..751fb445e1deb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -47,4 +47,9 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(super.hashCode(), field, Arrays.hashCode(query)); } + + @Override + public boolean scorable() { + return true; + } } From e7736be860ab1d3b5a5ccacb40d2111562cd88f2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 24 Apr 2025 13:17:22 +0000 Subject: [PATCH 04/64] [CI] Auto commit changes from spotless --- .../xpack/esql/plugin/KnnFunctionIT.java | 4 ++-- .../esql/expression/function/vector/Knn.java | 16 ++++------------ .../xpack/esql/planner/PlannerUtils.java | 4 ++-- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 0368a36c5a3de..47812e01b5dd5 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -48,8 +48,8 @@ public void testKnn() { assertEquals(i, row.getFirst()); @SuppressWarnings("unchecked") // Vectors should be the same - List floats = (List)row.get(1); - for(int j = 0; j < floats.size(); j++) { + List floats = (List) row.get(1); + for (int j = 0; j < floats.size(); j++) { assertEquals(floats.get(j).floatValue(), indexedVectors.get(i).get(j), 0f); } var score = (Double) row.get(2); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 20ef6a6417a8c..9d01fcf9704b5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -46,18 +46,10 @@ public class Knn extends Function implements TranslationAware { private final Expression field; private final Expression query; - @FunctionInfo( - returnType = "boolean", - preview = true, - description = """ - Finds the k nearest vectors to a query vector, as measured by a similarity metric. - knn function finds nearest vectors through approximate search on indexed dense_vectors - """, - appliesTo = { - @FunctionAppliesTo( - lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT - ) } - ) + @FunctionInfo(returnType = "boolean", preview = true, description = """ + Finds the k nearest vectors to a query vector, as measured by a similarity metric. + knn function finds nearest vectors through approximate search on indexed dense_vectors + """, appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }) public Knn(Source source, Expression field, Expression query) { super(source, List.of(field, query)); this.field = field; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index e30b2d11d0e2e..1990bca5a1e21 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -301,8 +301,8 @@ public static ElementType toElementType(DataType dataType, MappedFieldType.Field case PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE -> ElementType.COMPOSITE; // Can't throw IAE as this is used to estimate row size case DENSE_VECTOR -> ElementType.NULL; - case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT -> - throw EsqlIllegalArgumentException.illegalDataType(dataType); + case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT -> throw EsqlIllegalArgumentException + .illegalDataType(dataType); }; } From 95880482d60492eee24925dc8f3cf1240560cae8 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 24 Apr 2025 17:03:51 +0200 Subject: [PATCH 05/64] Add options --- .../xpack/esql/plugin/KnnFunctionIT.java | 19 ++++++- .../function/EsqlFunctionRegistry.java | 2 +- .../function/fulltext/FullTextFunction.java | 2 +- .../esql/expression/function/vector/Knn.java | 57 ++++++++++++++++--- .../xpack/esql/querydsl/query/KnnQuery.java | 29 ++++++++-- 5 files changed, 94 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 0368a36c5a3de..4cd5130a9359e 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -28,7 +28,7 @@ public class KnnFunctionIT extends AbstractEsqlIntegTestCase { private final Map> indexedVectors = new HashMap<>(); - public void testKnn() { + public void testKnnDefaults() { var query = """ FROM test METADATA _score | WHERE knn(vector, [1.0, 1.0, 1.0]) @@ -61,6 +61,23 @@ public void testKnn() { } } + public void testKnnOptions() { + var query = """ + FROM test METADATA _score + | WHERE knn(vector, [1.0, 1.0, 1.0], {"k": 5}) + | KEEP id, floats, _score, vector + | SORT _score DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); + + List> valuesList = EsqlTestUtils.getValuesList(resp); + assertEquals(5, valuesList.size()); + } + } + @Before public void setup() throws IOException { var indexName = "test"; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index ea6e928618370..eb10103f1abae 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -448,7 +448,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(AvgOverTime.class, uni(AvgOverTime::new), "avg_over_time"), def(LastOverTime.class, LastOverTime::withUnresolvedTimestamp, "last_over_time"), def(Term.class, bi(Term::new), "term"), - def(Knn.class, bi(Knn::new), "knn") } }; + def(Knn.class, tri(Knn::new), "knn") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index 2d86d7a604b36..f3940b607ccd0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -333,7 +333,7 @@ public ScoreOperator.ExpressionScorer.Factory toScorer(ToScorer toScorer) { return new LuceneQueryScoreEvaluator.Factory(shardConfigs); } - protected static void populateOptionsMap( + public static void populateOptionsMap( final MapExpression options, final Map optionsMap, final TypeResolutions.ParamOrdinal paramOrdinal, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 20ef6a6417a8c..f680c7da83001 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -11,8 +11,10 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.capabilities.TranslationAware; +import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.MapExpression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.expression.function.Function; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; @@ -23,6 +25,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; @@ -30,21 +33,43 @@ import org.elasticsearch.xpack.esql.querydsl.query.KnnQuery; import java.io.IOException; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; +import static java.util.Map.entry; +import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; +import static org.elasticsearch.search.vectors.RescoreVectorBuilder.OVERSAMPLE_FIELD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; +import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.populateOptionsMap; import static org.elasticsearch.xpack.esql.expression.function.fulltext.Match.getNameFromFieldAttribute; -public class Knn extends Function implements TranslationAware { +public class Knn extends Function implements TranslationAware, OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); private final Expression field; private final Expression query; + // TODO Options could be serialized via QueryBuilder in case we want to rewrite it in the coordinator node (for query text inference) + private final Expression options; + + public static final Map ALLOWED_OPTIONS = Map.ofEntries( + entry(K_FIELD.getPreferredName(), INTEGER), + entry(NUM_CANDS_FIELD.getPreferredName(), INTEGER), + entry(VECTOR_SIMILARITY_FIELD.getPreferredName(), FLOAT), + entry(BOOST_FIELD.getPreferredName(), FLOAT), + entry(OVERSAMPLE_FIELD.getPreferredName(), FLOAT) + ); @FunctionInfo( returnType = "boolean", @@ -58,10 +83,11 @@ public class Knn extends Function implements TranslationAware { lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT ) } ) - public Knn(Source source, Expression field, Expression query) { - super(source, List.of(field, query)); + public Knn(Source source, Expression field, Expression query, Expression options) { + super(source, options == null ? List.of(field, query) : List.of(field, query, options)); this.field = field; this.query = query; + this.options = options; } public Expression field() { @@ -72,6 +98,10 @@ public Expression query() { return query; } + public Expression options() { + return options; + } + @Override public DataType dataType() { return DataType.BOOLEAN; @@ -104,17 +134,28 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand for (int i = 0; i < queryFolded.size(); i++) { queryAsFloats[i] = queryFolded.get(i).floatValue(); } - return new KnnQuery(source(), fieldName, queryAsFloats); + + return new KnnQuery(source(), fieldName, queryAsFloats, queryOptions()); + } + + private Map queryOptions() throws InvalidArgumentException { + if (options() == null) { + return Map.of(); + } + + Map options = new HashMap<>(); + populateOptionsMap((MapExpression) options(), options, THIRD, sourceText(), ALLOWED_OPTIONS); + return options; } @Override public Expression replaceChildren(List newChildren) { - return new Knn(source(), newChildren.get(0), newChildren.get(1)); + return new Knn(source(), newChildren.get(0), newChildren.get(1), newChildren.size() > 2 ? newChildren.get(2) : null); } @Override protected NodeInfo info() { - return NodeInfo.create(this, Knn::new, field(), query()); + return NodeInfo.create(this, Knn::new, field(), query(), options()); } @Override @@ -126,8 +167,9 @@ private static Knn readFrom(StreamInput in) throws IOException { Source source = Source.readFrom((PlanStreamInput) in); Expression field = in.readNamedWriteable(Expression.class); Expression query = in.readNamedWriteable(Expression.class); + Expression options = in.readOptionalNamedWriteable(Expression.class); - return new Knn(source, field, query); + return new Knn(source, field, query, options); } @Override @@ -135,6 +177,7 @@ public void writeTo(StreamOutput out) throws IOException { source().writeTo(out); out.writeNamedWriteable(field()); out.writeNamedWriteable(query()); + out.writeOptionalNamedWriteable(options()); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 751fb445e1deb..742dfdda27221 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -9,43 +9,62 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.search.vectors.RescoreVectorBuilder; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.Source; import java.util.Arrays; +import java.util.Map; import java.util.Objects; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; + public class KnnQuery extends Query { private final String field; private final float[] query; + private final Map options; - public KnnQuery(Source source, String field, float[] query) { + public KnnQuery(Source source, String field, float[] query, Map options) { super(source); + assert options != null; this.field = field; this.query = query; + this.options = options; } @Override protected QueryBuilder asBuilder() { - return new KnnVectorQueryBuilder(field, query, null, null, null, null); + Integer k = (Integer) options.get(K_FIELD.getPreferredName()); + Integer numCands = (Integer) options.get(NUM_CANDS_FIELD.getPreferredName()); + RescoreVectorBuilder rescoreVectorBuilder = null; + Float oversample = (Float) options.get(RescoreVectorBuilder.OVERSAMPLE_FIELD.getPreferredName()); + if (oversample != null) { + rescoreVectorBuilder = new RescoreVectorBuilder(oversample); + } + Float vectorSimilarity = (Float) options.get(VECTOR_SIMILARITY_FIELD.getPreferredName()); + + return new KnnVectorQueryBuilder(field, query, k, numCands, rescoreVectorBuilder, vectorSimilarity); } @Override protected String innerToString() { - return "knn(" + field + ", " + Arrays.toString(query) + ")"; + return "knn(" + field + ", " + Arrays.toString(query) + " options={" + options + "}))"; } @Override public boolean equals(Object o) { if (!(o instanceof KnnQuery knnQuery)) return false; if (super.equals(o) == false) return false; - return Objects.equals(field, knnQuery.field) && Objects.deepEquals(query, knnQuery.query); + return Objects.equals(field, knnQuery.field) + && Objects.deepEquals(query, knnQuery.query) && Objects.equals(options, knnQuery.options); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), field, Arrays.hashCode(query)); + return Objects.hash(super.hashCode(), field, Arrays.hashCode(query), options); } @Override From 0f77374a5d9d4b295983ba9884412cff9145c783 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 28 Apr 2025 16:26:12 +0200 Subject: [PATCH 06/64] Make Knn a FullTextFunction --- .../esql/expression/function/vector/Knn.java | 47 +++++++++---------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index f680c7da83001..c8d2e5fe0bb63 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -10,13 +10,12 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.xpack.esql.capabilities.TranslationAware; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.core.expression.MapExpression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; -import org.elasticsearch.xpack.esql.core.expression.function.Function; import org.elasticsearch.xpack.esql.core.querydsl.query.Query; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -26,9 +25,9 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; +import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; import org.elasticsearch.xpack.esql.planner.TranslatorHandler; import org.elasticsearch.xpack.esql.querydsl.query.KnnQuery; @@ -51,16 +50,13 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; -import static org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction.populateOptionsMap; import static org.elasticsearch.xpack.esql.expression.function.fulltext.Match.getNameFromFieldAttribute; -public class Knn extends Function implements TranslationAware, OptionalArgument { +public class Knn extends FullTextFunction implements OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); private final Expression field; - private final Expression query; - // TODO Options could be serialized via QueryBuilder in case we want to rewrite it in the coordinator node (for query text inference) private final Expression options; public static final Map ALLOWED_OPTIONS = Map.ofEntries( @@ -84,20 +80,20 @@ public class Knn extends Function implements TranslationAware, OptionalArgument ) } ) public Knn(Source source, Expression field, Expression query, Expression options) { - super(source, options == null ? List.of(field, query) : List.of(field, query, options)); + this(source, field, query, options, null); + } + + public Knn(Source source, Expression field, Expression query, Expression options, QueryBuilder queryBuilder) { + super(source, query, options == null ? List.of(field, query) : List.of(field, query, options), queryBuilder); this.field = field; - this.query = query; this.options = options; } + public Expression field() { return field; } - public Expression query() { - return query; - } - public Expression options() { return options; } @@ -108,7 +104,7 @@ public DataType dataType() { } @Override - protected final TypeResolution resolveType() { + protected TypeResolution resolveParams() { if (childrenResolved() == false) { return new TypeResolution("Unresolved children"); } @@ -118,12 +114,7 @@ protected final TypeResolution resolveType() { } @Override - public boolean translatable(LucenePushdownPredicates pushdownPredicates) { - return true; - } - - @Override - public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { + protected Query translate(TranslatorHandler handler) { var fieldAttribute = Match.fieldAsFieldAttribute(field()); Check.notNull(fieldAttribute, "Match must have a field attribute as the first argument"); @@ -138,6 +129,11 @@ public Query asQuery(LucenePushdownPredicates pushdownPredicates, TranslatorHand return new KnnQuery(source(), fieldName, queryAsFloats, queryOptions()); } + @Override + public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { + return new Knn(source(), field(), query(), options(), queryBuilder); + } + private Map queryOptions() throws InvalidArgumentException { if (options() == null) { return Map.of(); @@ -167,9 +163,9 @@ private static Knn readFrom(StreamInput in) throws IOException { Source source = Source.readFrom((PlanStreamInput) in); Expression field = in.readNamedWriteable(Expression.class); Expression query = in.readNamedWriteable(Expression.class); - Expression options = in.readOptionalNamedWriteable(Expression.class); + QueryBuilder queryBuilder = in.readOptionalNamedWriteable(QueryBuilder.class); - return new Knn(source, field, query, options); + return new Knn(source, field, query, null, queryBuilder); } @Override @@ -177,7 +173,7 @@ public void writeTo(StreamOutput out) throws IOException { source().writeTo(out); out.writeNamedWriteable(field()); out.writeNamedWriteable(query()); - out.writeOptionalNamedWriteable(options()); + out.writeOptionalNamedWriteable(queryBuilder()); } @Override @@ -185,12 +181,13 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; if (super.equals(o) == false) return false; Knn knn = (Knn) o; - return Objects.equals(field, knn.field) && Objects.equals(query, knn.query); + return Objects.equals(field, knn.field) && Objects.equals(query(), knn.query()) + && Objects.equals(queryBuilder(), knn.queryBuilder()); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), field, query); + return Objects.hash(field(), query(), queryBuilder()); } } From 891f4fc47b92788715e912270d0ba1abdd6bd451 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 28 Apr 2025 16:42:58 +0200 Subject: [PATCH 07/64] make knn query not pushable --- .../compute/lucene/LuceneQueryEvaluator.java | 3 ++- .../xpack/esql/plugin/KnnFunctionIT.java | 18 ++++++++++++++++++ .../esql/expression/function/vector/Knn.java | 8 +++++++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index b741534736fab..5b60d772d5db0 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -184,7 +184,8 @@ private class ShardState { private final List perSegmentState; ShardState(ShardConfig config) throws IOException { - weight = config.searcher.createWeight(config.query, scoreMode(), 1.0f); + Query rewritten = config.searcher.rewrite(config.query); + weight = config.searcher.createWeight(rewritten, scoreMode(), 1.0f); searcher = config.searcher; perSegmentState = new ArrayList<>(Collections.nCopies(searcher.getLeafContexts().size(), null)); } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 4cd5130a9359e..6ca66c34e137d 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -78,6 +78,24 @@ public void testKnnOptions() { } } + public void testKnnNonPushedDown() { + var query = """ + FROM test METADATA _score + | WHERE knn(vector, [1.0, 1.0, 1.0], {"k": 5}) OR id % 2 == 0 + | KEEP id, floats, _score, vector + | SORT _score DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); + assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); + + List> valuesList = EsqlTestUtils.getValuesList(resp); + // K = 5, 2 more for % operator, total 7 + assertEquals(7, valuesList.size()); + } + } + @Before public void setup() throws IOException { var indexName = "test"; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index c8d2e5fe0bb63..bc56825844464 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -146,7 +146,13 @@ private Map queryOptions() throws InvalidArgumentException { @Override public Expression replaceChildren(List newChildren) { - return new Knn(source(), newChildren.get(0), newChildren.get(1), newChildren.size() > 2 ? newChildren.get(2) : null); + return new Knn( + source(), + newChildren.get(0), + newChildren.get(1), + newChildren.size() > 2 ? newChildren.get(2) : null, + queryBuilder() + ); } @Override From fb2a3c70ef5f2ffea0b88a7ab1559b7bc5b09f28 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 8 Apr 2025 11:37:57 +0200 Subject: [PATCH 08/64] Add CSV tests and necessary infra for dense_vector field type --- .../org/elasticsearch/xpack/esql/CsvAssert.java | 5 +++++ .../org/elasticsearch/xpack/esql/CsvTestUtils.java | 3 +++ .../xpack/esql/CsvTestsDataLoader.java | 4 +++- .../src/main/resources/data/dense_vector.csv | 3 +++ .../src/main/resources/dense_vector.csv-spec | 13 +++++++++++++ .../src/main/resources/mapping-dense_vector.json | 11 +++++++++++ 6 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java index 3f8478fe713a3..4cd6343374977 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java @@ -35,6 +35,7 @@ import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.xpack.esql.CsvTestUtils.ExpectedResults; import static org.elasticsearch.xpack.esql.CsvTestUtils.Type; +import static org.elasticsearch.xpack.esql.CsvTestUtils.Type.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.CsvTestUtils.Type.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.CsvTestUtils.logMetaData; import static org.elasticsearch.xpack.esql.core.util.DateUtils.UTC_DATE_TIME_FORMATTER; @@ -145,6 +146,10 @@ private static void assertMetadata( // Type.asType translates all bytes references into keywords continue; } + if (blockType == Type.DOUBLE && expectedType == DENSE_VECTOR) { + // DENSE_VECTOR is internally represented as a double block + continue; + } if (blockType == Type.NULL) { // Null pages don't have any real type information beyond "it's all null, man" continue; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 24db9f6931672..5330cd0bb5627 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -486,6 +486,7 @@ public enum Type { x -> x == null ? null : stringToAggregateMetricDoubleLiteral(x), AggregateMetricDoubleBlockBuilder.AggregateMetricDoubleLiteral.class ), + DENSE_VECTOR(Double::parseDouble, Double.class), UNSUPPORTED(Type::convertUnsupported, Void.class); private static Void convertUnsupported(String s) { @@ -528,6 +529,8 @@ private static Void convertUnsupported(String s) { LOOKUP.put("DATE", DATETIME); LOOKUP.put("DT", DATETIME); LOOKUP.put("V", VERSION); + + LOOKUP.put("DENSE_VECTOR", DENSE_VECTOR); } private final Function converter; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 2abe77fe08c89..3dd0b0d31d20a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -140,6 +140,7 @@ public class CsvTestsDataLoader { private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true); private static final TestDataset LOGS = new TestDataset("logs"); private static final TestDataset MV_TEXT = new TestDataset("mv_text"); + private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -198,7 +199,8 @@ public class CsvTestsDataLoader { Map.entry(BOOKS.indexName, BOOKS), Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT), Map.entry(LOGS.indexName, LOGS), - Map.entry(MV_TEXT.indexName, MV_TEXT) + Map.entry(MV_TEXT.indexName, MV_TEXT), + Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv new file mode 100644 index 0000000000000..db856b59c1348 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv @@ -0,0 +1,3 @@ +id:l, vector:dense_vector +0, [1.0, 2.0, 3.0] +1, [4.0, 5.0, 6.0] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec new file mode 100644 index 0000000000000..592dbfaae4cff --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -0,0 +1,13 @@ + +retrieveDenseVectorData +required_capability: dense_vector_field_type + +FROM dense_vector +| KEEP id, vector +| SORT id +; + +id:l | vector:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json new file mode 100644 index 0000000000000..572d9870d09da --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json @@ -0,0 +1,11 @@ +{ + "properties": { + "id": { + "type": "long" + }, + "vector": { + "type": "dense_vector", + "similarity": "l2_norm" + } + } +} From 8e9b280061f1d36f883e52d0d64a9bbdbd279db3 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 8 Apr 2025 13:10:47 +0200 Subject: [PATCH 09/64] Make CSV test loader to use numbers when there are multivalued numeric fields --- .../java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 3dd0b0d31d20a..bf1460178a63b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -233,6 +233,7 @@ public class CsvTestsDataLoader { CITY_BOUNDARIES_ENRICH, CITY_AIRPORTS_ENRICH ); + public static final String NUMERIC_REGEX = "-?\\d+(\\.\\d+)?"; /** *

@@ -658,7 +659,8 @@ private static void loadCsvData(RestClient client, String indexName, URL resourc private static String quoteIfNecessary(String value) { boolean isQuoted = (value.startsWith("\"") && value.endsWith("\"")) || (value.startsWith("{") && value.endsWith("}")); - return isQuoted ? value : "\"" + value + "\""; + boolean isNumeric = value.matches(NUMERIC_REGEX); + return isQuoted || isNumeric ? value : "\"" + value + "\""; } private static void sendBulkRequest(String indexName, StringBuilder builder, RestClient client, Logger logger, List failures) From 0f58f245af30dc4f1432ff24db32624ae936b983 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 28 Apr 2025 20:24:20 +0200 Subject: [PATCH 10/64] Implicit casting --- .../xpack/esql/analysis/Analyzer.java | 24 ++++++++++++++++++- .../esql/expression/function/vector/Knn.java | 4 ++-- .../function/vector/VectorFunction.java | 16 +++++++++++++ .../xpack/esql/analysis/AnalyzerTests.java | 19 +++++++++++++++ 4 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 1e0577193cab2..1e651c14fdaf1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -65,6 +65,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToUnsignedLong; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; +import org.elasticsearch.xpack.esql.expression.function.vector.VectorFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; @@ -1365,9 +1366,11 @@ private static Expression cast(org.elasticsearch.xpack.esql.core.expression.func if (f instanceof EsqlArithmeticOperation || f instanceof BinaryComparison) { return processBinaryOperator((BinaryOperator) f); } + if (f instanceof VectorFunction vectorFunction) { + return processVectorFunction(f); + } return f; } - private static Expression processScalarOrGroupingFunction( org.elasticsearch.xpack.esql.core.expression.function.Function f, EsqlFunctionRegistry registry @@ -1564,6 +1567,25 @@ private static Expression castStringLiteral(Expression from, DataType target) { return unresolvedAttribute(from, target.toString(), e); } } + + private static Expression processVectorFunction(org.elasticsearch.xpack.esql.core.expression.function.Function vectorFunction) { + List args = vectorFunction.arguments(); + List newArgs = new ArrayList<>(); + for (Expression arg : args) { + if (arg.resolved() && arg.dataType().isNumeric() && arg.foldable()) { + Object folded = arg.fold(FoldContext.small() /* TODO remove me */); + if (folded instanceof List) { + Literal denseVector = new Literal(arg.source(), folded, DataType.DENSE_VECTOR); + newArgs.add(denseVector); + continue; + } + } + newArgs.add(arg); + } + + return vectorFunction.replaceChildren(newArgs); + } + } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index bc56825844464..3217a1a18a405 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -52,7 +52,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.expression.function.fulltext.Match.getNameFromFieldAttribute; -public class Knn extends FullTextFunction implements OptionalArgument { +public class Knn extends FullTextFunction implements OptionalArgument, VectorFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); @@ -110,7 +110,7 @@ protected TypeResolution resolveParams() { } return isNotNull(field(), sourceText(), FIRST).and(isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, "dense_vector")) - .and(TypeResolutions.isNumeric(query(), sourceText(), TypeResolutions.ParamOrdinal.SECOND)); + .and(isType(query(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.SECOND)); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java new file mode 100644 index 0000000000000..83245934f2b9f --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java @@ -0,0 +1,16 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +/** + * Marker interface for vector functions. Makes possible to do implicit casting + * from multi values to dense_vector field types, so parameters are actually + * processed as dense_vectors in vector functions + */ +public interface VectorFunction { +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 5f9c14e2fda61..d20221df2ca28 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -49,6 +49,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan; import org.elasticsearch.xpack.esql.index.EsIndex; @@ -2347,6 +2348,24 @@ public void testImplicitCasting() { assertThat(e.getMessage(), containsString("[+] has arguments with incompatible types [datetime] and [datetime]")); } + public void testDenseVectorImplicitCasting() { + Analyzer analyzer = analyzer(loadMapping("mapping-dense_vector.json", "vectors")); + + var plan = analyze(""" + from test | where knn(vector, [0.342, 0.164, 0.234]) + """, + "mapping-dense_vector.json"); + + var limit = as(plan, Limit.class); + var filter = as(limit.child(), Filter.class); + var knn = as(filter.condition(), Knn.class); + var field = knn.field(); + var queryVector = as(knn.query(), Literal.class); + assertEquals(DataType.DENSE_VECTOR, queryVector.dataType()); + assertThat(queryVector.value(), equalTo(List.of(0.342, 0.164, 0.234))); + } + + public void testRateRequiresCounterTypes() { assumeTrue("rate requires snapshot builds", Build.current().isSnapshot()); Analyzer analyzer = analyzer(tsdbIndexResolution()); From e92c92b7d020dedaf0878498c525e01147bc1615 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 6 May 2025 10:49:31 +0200 Subject: [PATCH 11/64] Format changes --- .../elasticsearch/xpack/esql/analysis/VerifierTests.java | 1 + .../xpack/esql/parser/StatementParserTests.java | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index e75cd5ea05a6c..604300f16cae9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1081,6 +1081,7 @@ public void testAggregateOnCounter() { + " found value [network.bytes_in] type [counter_long]" ) ); + } assertThat( error("FROM tests | STATS max(network.bytes_in)", tsdb), diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index bc08f14867f56..63fa7109e6bc4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -230,7 +230,14 @@ public void testRowCommandWithEscapedFieldName() { public void testCompositeCommand() { assertEquals( - new Filter(EMPTY, new Row(EMPTY, List.of(new Alias(EMPTY, "a", integer(1)))), TRUE), + new Filter(EMPTY, + new Row(EMPTY, + List.of( + new Alias(EMPTY, "a", integer(1)) + ) + ), + TRUE), + statement("row a = 1 | where true") ); } From e44745e152ee8f3ad27d494fed812aa259548ee5 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 23 May 2025 21:59:07 +0200 Subject: [PATCH 12/64] Add testing, fix LuceneQueryEvaluator to pick docs.getPositionCount instead of the docs length to avoid different lengths with non-pushed functions when indexRandom is used --- .../compute/lucene/LuceneQueryEvaluator.java | 2 +- .../xpack/esql/plugin/KnnFunctionIT.java | 47 ++++++++++++------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java index 5b60d772d5db0..cff8cddd8eea3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneQueryEvaluator.java @@ -112,7 +112,7 @@ private Vector evalSingleSegmentNonDecreasing(DocVector docs) throws IOException int min = docs.docs().getInt(0); int max = docs.docs().getInt(docs.getPositionCount() - 1); int length = max - min + 1; - try (T scoreBuilder = createVectorBuilder(blockFactory, length)) { + try (T scoreBuilder = createVectorBuilder(blockFactory, docs.getPositionCount())) { if (length == docs.getPositionCount() && length > 1) { return segmentState.scoreDense(scoreBuilder, min, max); } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index a654e791a526d..33d6a032efdfe 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -18,8 +19,10 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -27,21 +30,26 @@ public class KnnFunctionIT extends AbstractEsqlIntegTestCase { private final Map> indexedVectors = new HashMap<>(); + private int numDocs; + private int numDims; public void testKnnDefaults() { - var query = """ + float[] queryVector = new float[numDims]; + Arrays.fill(queryVector, 1.0f); + + var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, [1.0, 1.0, 1.0]) + | WHERE knn(vector, %s) | KEEP id, floats, _score, vector | SORT _score DESC - """; + """, Arrays.toString(queryVector)); try (var resp = run(query)) { assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); List> valuesList = EsqlTestUtils.getValuesList(resp); - assertEquals(indexedVectors.size(), valuesList.size()); + assertEquals(Math.min(indexedVectors.size(), 10), valuesList.size()); for (int i = 0; i < valuesList.size(); i++) { List row = valuesList.get(i); // Vectors should be in order of ID, as they're less similar than the query vector as the ID increases @@ -62,12 +70,15 @@ public void testKnnDefaults() { } public void testKnnOptions() { - var query = """ + float[] queryVector = new float[numDims]; + Arrays.fill(queryVector, 1.0f); + + var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, [1.0, 1.0, 1.0], {"k": 5}) + | WHERE knn(vector, %s, {"k": 5}) | KEEP id, floats, _score, vector | SORT _score DESC - """; + """, Arrays.toString(queryVector)); try (var resp = run(query)) { assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); @@ -79,20 +90,24 @@ public void testKnnOptions() { } public void testKnnNonPushedDown() { - var query = """ + float[] queryVector = new float[numDims]; + Arrays.fill(queryVector, 1.0f); + + // TODO we need to decide what to do when / if user uses k for limit, as no more than k results will be returned from knn query + var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, [1.0, 1.0, 1.0], {"k": 5}) OR id % 2 == 0 + | WHERE knn(vector, %s, {"k": 5}) OR id > 10 | KEEP id, floats, _score, vector | SORT _score DESC - """; + """, Arrays.toString(queryVector)); try (var resp = run(query)) { assertColumnNames(resp.columns(), List.of("id", "floats", "_score", "vector")); assertColumnTypes(resp.columns(), List.of("integer", "double", "double", "dense_vector")); List> valuesList = EsqlTestUtils.getValuesList(resp); - // K = 5, 2 more for % operator, total 7 - assertEquals(7, valuesList.size()); + // K = 5, 1 more for every id > 10 + assertEquals(5 + Math.max(0, numDocs - 10 - 1), valuesList.size()); } } @@ -120,11 +135,11 @@ public void setup() throws IOException { .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1); - var CreateRequest = client.prepareCreate(indexName).setMapping(mapping).setSettings(settingsBuilder.build()); - assertAcked(CreateRequest); + var createRequest = client.prepareCreate(indexName).setMapping(mapping).setSettings(settingsBuilder.build()); + assertAcked(createRequest); - int numDocs = 10; - int numDims = 3; + numDocs = randomIntBetween(10, 20); + numDims = randomIntBetween(3, 10); IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; float value = 0.0f; for (int i = 0; i < numDocs; i++) { From e1aecf03630835123360987a1d759196c849a007 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 28 May 2025 12:29:58 +0200 Subject: [PATCH 13/64] Fix merge --- .../org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java | 3 --- .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 7 ++++++- .../xpack/esql/action/PositionToXContent.java | 2 +- .../xpack/esql/action/ResponseValueUtils.java | 2 +- .../xpack/esql/expression/function/vector/Knn.java | 2 +- .../xpack/esql/planner/LocalExecutionPlanner.java | 2 +- .../elasticsearch/xpack/esql/analysis/VerifierTests.java | 1 - 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 33d6a032efdfe..b3076175867aa 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -10,7 +10,6 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -63,8 +62,6 @@ public void testKnnDefaults() { var score = (Double) row.get(2); assertNotNull(score); assertTrue(score > 0.0); - // dense_vector is null for now - assertNull(row.get(3)); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index d628b0a601c13..ce33f89dcc420 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1139,7 +1139,12 @@ public enum Cap { /** * Dense vector field type support */ - DENSE_VECTOR_FIELD_TYPE(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG); + DENSE_VECTOR_FIELD_TYPE(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG), + + /** + * Support knn function + */ + KNN_FUNCTION(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java index b6b7e7e27352f..00f297efd0e53 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java @@ -191,7 +191,7 @@ protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Pa } }; case DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, SHORT, BYTE, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT, - PARTIAL_AGG, DENSE_VECTOR -> throw new IllegalArgumentException("can't convert values of type [" + columnInfo.type() + "]"); + PARTIAL_AGG -> throw new IllegalArgumentException("can't convert values of type [" + columnInfo.type() + "]"); }; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java index e8a7a0e0363c0..3a406de60ace7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java @@ -150,7 +150,7 @@ private static Object valueAt(DataType dataType, Block block, int offset, BytesR } case DENSE_VECTOR -> ((FloatBlock) block).getFloat(offset); case SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, NULL, - PARTIAL_AGG, DENSE_VECTOR -> throw EsqlIllegalArgumentException.illegalDataType(dataType); + PARTIAL_AGG -> throw EsqlIllegalArgumentException.illegalDataType(dataType); }; } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 3217a1a18a405..985552453a2cb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -110,7 +110,7 @@ protected TypeResolution resolveParams() { } return isNotNull(field(), sourceText(), FIRST).and(isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, "dense_vector")) - .and(isType(query(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.SECOND)); + .and(isType(query(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.SECOND, "dense_vector")); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 388c31510d682..277b65dd00708 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -473,7 +473,7 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte case GEO_POINT, CARTESIAN_POINT, GEO_SHAPE, CARTESIAN_SHAPE, COUNTER_LONG, COUNTER_INTEGER, COUNTER_DOUBLE, SOURCE, AGGREGATE_METRIC_DOUBLE, DENSE_VECTOR -> TopNEncoder.DEFAULT_UNSORTABLE; // unsupported fields are encoded as BytesRef, we'll use the same encoder; all values should be null at this point - case PARTIAL_AGG, DENSE_VECTOR, UNSUPPORTED -> TopNEncoder.UNSUPPORTED; + case PARTIAL_AGG, UNSUPPORTED -> TopNEncoder.UNSUPPORTED; }; } List orders = topNExec.order().stream().map(order -> { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 95d33362b6957..d76a355a6c9a9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1081,7 +1081,6 @@ public void testAggregateOnCounter() { + " found value [network.bytes_in] type [counter_long]" ) ); - } assertThat( error("FROM tests | STATS max(network.bytes_in)", tsdb), From 239cf1ea3c3affebbaf0487404c8bca894a973d2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 28 May 2025 10:36:58 +0000 Subject: [PATCH 14/64] [CI] Auto commit changes from spotless --- .../xpack/esql/analysis/Analyzer.java | 1 + .../esql/expression/function/vector/Knn.java | 20 ++++++------------- .../function/vector/VectorFunction.java | 3 +-- .../xpack/esql/querydsl/query/KnnQuery.java | 3 ++- .../xpack/esql/analysis/AnalyzerTests.java | 6 ++---- .../esql/parser/StatementParserTests.java | 8 +------- 6 files changed, 13 insertions(+), 28 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 1b74dceb457d5..fff3ba696dd0c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1397,6 +1397,7 @@ private static Expression cast(org.elasticsearch.xpack.esql.core.expression.func } return f; } + private static Expression processScalarOrGroupingFunction( org.elasticsearch.xpack.esql.core.expression.function.Function f, EsqlFunctionRegistry registry diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 985552453a2cb..b83cded567ba4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -67,18 +67,10 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun entry(OVERSAMPLE_FIELD.getPreferredName(), FLOAT) ); - @FunctionInfo( - returnType = "boolean", - preview = true, - description = """ - Finds the k nearest vectors to a query vector, as measured by a similarity metric. - knn function finds nearest vectors through approximate search on indexed dense_vectors - """, - appliesTo = { - @FunctionAppliesTo( - lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT - ) } - ) + @FunctionInfo(returnType = "boolean", preview = true, description = """ + Finds the k nearest vectors to a query vector, as measured by a similarity metric. + knn function finds nearest vectors through approximate search on indexed dense_vectors + """, appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }) public Knn(Source source, Expression field, Expression query, Expression options) { this(source, field, query, options, null); } @@ -89,7 +81,6 @@ public Knn(Source source, Expression field, Expression query, Expression options this.options = options; } - public Expression field() { return field; } @@ -187,7 +178,8 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; if (super.equals(o) == false) return false; Knn knn = (Knn) o; - return Objects.equals(field, knn.field) && Objects.equals(query(), knn.query()) + return Objects.equals(field, knn.field) + && Objects.equals(query(), knn.query()) && Objects.equals(queryBuilder(), knn.queryBuilder()); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java index 83245934f2b9f..dc0be7a29fee0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java @@ -12,5 +12,4 @@ * from multi values to dense_vector field types, so parameters are actually * processed as dense_vectors in vector functions */ -public interface VectorFunction { -} +public interface VectorFunction {} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 742dfdda27221..4fb53713c1fa9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -59,7 +59,8 @@ public boolean equals(Object o) { if (!(o instanceof KnnQuery knnQuery)) return false; if (super.equals(o) == false) return false; return Objects.equals(field, knnQuery.field) - && Objects.deepEquals(query, knnQuery.query) && Objects.equals(options, knnQuery.options); + && Objects.deepEquals(query, knnQuery.query) + && Objects.equals(options, knnQuery.options); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 088efccb8a8d8..06ac6a061b8e1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -51,8 +51,8 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.expression.function.vector.Knn; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan; import org.elasticsearch.xpack.esql.index.EsIndex; @@ -2360,8 +2360,7 @@ public void testDenseVectorImplicitCasting() { var plan = analyze(""" from test | where knn(vector, [0.342, 0.164, 0.234]) - """, - "mapping-dense_vector.json"); + """, "mapping-dense_vector.json"); var limit = as(plan, Limit.class); var filter = as(limit.child(), Filter.class); @@ -2372,7 +2371,6 @@ public void testDenseVectorImplicitCasting() { assertThat(queryVector.value(), equalTo(List.of(0.342, 0.164, 0.234))); } - public void testRateRequiresCounterTypes() { assumeTrue("rate requires snapshot builds", Build.current().isSnapshot()); Analyzer analyzer = analyzer(tsdbIndexResolution()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 1533db608fb58..ef28eb15cdbf8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -230,13 +230,7 @@ public void testRowCommandWithEscapedFieldName() { public void testCompositeCommand() { assertEquals( - new Filter(EMPTY, - new Row(EMPTY, - List.of( - new Alias(EMPTY, "a", integer(1)) - ) - ), - TRUE), + new Filter(EMPTY, new Row(EMPTY, List.of(new Alias(EMPTY, "a", integer(1)))), TRUE), statement("row a = 1 | where true") ); From 183724240116983ebcba1e96eaa752e6b6739c89 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 10:59:33 +0200 Subject: [PATCH 15/64] Spotless --- .../xpack/esql/analysis/Analyzer.java | 1 + .../esql/expression/function/vector/Knn.java | 21 ++++++------------- .../function/vector/VectorFunction.java | 3 +-- .../xpack/esql/querydsl/query/KnnQuery.java | 3 ++- .../xpack/esql/analysis/AnalyzerTests.java | 6 ++---- .../esql/parser/StatementParserTests.java | 8 +------ 6 files changed, 13 insertions(+), 29 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 1b74dceb457d5..fff3ba696dd0c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1397,6 +1397,7 @@ private static Expression cast(org.elasticsearch.xpack.esql.core.expression.func } return f; } + private static Expression processScalarOrGroupingFunction( org.elasticsearch.xpack.esql.core.expression.function.Function f, EsqlFunctionRegistry registry diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 985552453a2cb..4f5c87f47ff0b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -42,7 +42,6 @@ import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; -import static org.elasticsearch.search.vectors.RescoreVectorBuilder.OVERSAMPLE_FIELD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; @@ -67,18 +66,10 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun entry(OVERSAMPLE_FIELD.getPreferredName(), FLOAT) ); - @FunctionInfo( - returnType = "boolean", - preview = true, - description = """ - Finds the k nearest vectors to a query vector, as measured by a similarity metric. - knn function finds nearest vectors through approximate search on indexed dense_vectors - """, - appliesTo = { - @FunctionAppliesTo( - lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT - ) } - ) + @FunctionInfo(returnType = "boolean", preview = true, description = """ + Finds the k nearest vectors to a query vector, as measured by a similarity metric. + knn function finds nearest vectors through approximate search on indexed dense_vectors + """, appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }) public Knn(Source source, Expression field, Expression query, Expression options) { this(source, field, query, options, null); } @@ -89,7 +80,6 @@ public Knn(Source source, Expression field, Expression query, Expression options this.options = options; } - public Expression field() { return field; } @@ -187,7 +177,8 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; if (super.equals(o) == false) return false; Knn knn = (Knn) o; - return Objects.equals(field, knn.field) && Objects.equals(query(), knn.query()) + return Objects.equals(field, knn.field) + && Objects.equals(query(), knn.query()) && Objects.equals(queryBuilder(), knn.queryBuilder()); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java index 83245934f2b9f..dc0be7a29fee0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorFunction.java @@ -12,5 +12,4 @@ * from multi values to dense_vector field types, so parameters are actually * processed as dense_vectors in vector functions */ -public interface VectorFunction { -} +public interface VectorFunction {} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 742dfdda27221..4fb53713c1fa9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -59,7 +59,8 @@ public boolean equals(Object o) { if (!(o instanceof KnnQuery knnQuery)) return false; if (super.equals(o) == false) return false; return Objects.equals(field, knnQuery.field) - && Objects.deepEquals(query, knnQuery.query) && Objects.equals(options, knnQuery.options); + && Objects.deepEquals(query, knnQuery.query) + && Objects.equals(options, knnQuery.options); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 088efccb8a8d8..06ac6a061b8e1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -51,8 +51,8 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; -import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.expression.function.vector.Knn; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan; import org.elasticsearch.xpack.esql.index.EsIndex; @@ -2360,8 +2360,7 @@ public void testDenseVectorImplicitCasting() { var plan = analyze(""" from test | where knn(vector, [0.342, 0.164, 0.234]) - """, - "mapping-dense_vector.json"); + """, "mapping-dense_vector.json"); var limit = as(plan, Limit.class); var filter = as(limit.child(), Filter.class); @@ -2372,7 +2371,6 @@ public void testDenseVectorImplicitCasting() { assertThat(queryVector.value(), equalTo(List.of(0.342, 0.164, 0.234))); } - public void testRateRequiresCounterTypes() { assumeTrue("rate requires snapshot builds", Build.current().isSnapshot()); Analyzer analyzer = analyzer(tsdbIndexResolution()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 1533db608fb58..ef28eb15cdbf8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -230,13 +230,7 @@ public void testRowCommandWithEscapedFieldName() { public void testCompositeCommand() { assertEquals( - new Filter(EMPTY, - new Row(EMPTY, - List.of( - new Alias(EMPTY, "a", integer(1)) - ) - ), - TRUE), + new Filter(EMPTY, new Row(EMPTY, List.of(new Alias(EMPTY, "a", integer(1)))), TRUE), statement("row a = 1 | where true") ); From 7ae99099ac43219e7c14e189de709ab428c2edd1 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 12:55:49 +0200 Subject: [PATCH 16/64] Add test coverage for knn options --- .../src/main/resources/mapping-all-types.json | 3 +++ .../esql/expression/function/vector/Knn.java | 2 +- .../xpack/esql/querydsl/query/KnnQuery.java | 4 +++- .../LocalPhysicalPlanOptimizerTests.java | 24 +++++++++++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json index 17348adb6af4f..a7ef2f4840709 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json @@ -63,6 +63,9 @@ "semantic_text": { "type": "semantic_text", "inference_id": "foo_inference_id" + }, + "dense_vector": { + "type": "dense_vector" } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 4f5c87f47ff0b..ba713ebb7da81 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -63,7 +63,7 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun entry(NUM_CANDS_FIELD.getPreferredName(), INTEGER), entry(VECTOR_SIMILARITY_FIELD.getPreferredName(), FLOAT), entry(BOOST_FIELD.getPreferredName(), FLOAT), - entry(OVERSAMPLE_FIELD.getPreferredName(), FLOAT) + entry(KnnQuery.RESCORE_OVERSAMPLE_FIELD, FLOAT) ); @FunctionInfo(returnType = "boolean", preview = true, description = """ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 4fb53713c1fa9..889261d9fe9ff 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -27,6 +27,8 @@ public class KnnQuery extends Query { private final float[] query; private final Map options; + public static final String RESCORE_OVERSAMPLE_FIELD = "rescore_oversample"; + public KnnQuery(Source source, String field, float[] query, Map options) { super(source); assert options != null; @@ -40,7 +42,7 @@ protected QueryBuilder asBuilder() { Integer k = (Integer) options.get(K_FIELD.getPreferredName()); Integer numCands = (Integer) options.get(NUM_CANDS_FIELD.getPreferredName()); RescoreVectorBuilder rescoreVectorBuilder = null; - Float oversample = (Float) options.get(RescoreVectorBuilder.OVERSAMPLE_FIELD.getPreferredName()); + Float oversample = (Float) options.get(RESCORE_OVERSAMPLE_FIELD); if (oversample != null) { rescoreVectorBuilder = new RescoreVectorBuilder(oversample); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 02590ff680b08..8ba96f84c8834 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -30,6 +30,8 @@ import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; +import org.elasticsearch.search.vectors.RescoreVectorBuilder; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -1904,6 +1906,28 @@ public void testMultiMatchOptionsPushDown() { assertThat(expectedQuery.toString(), is(planStr.get())); } + public void testKnnOptionsPushDown() { + String query = """ + from test + | where KNN(dense_vector, [0.1, 0.2, 0.3], { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7 }) + """; + var analyzer = makeAnalyzer("mapping-all-types.json"); + var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); + + AtomicReference planStr = new AtomicReference<>(); + plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); + + var expectedQuery = new KnnVectorQueryBuilder( + "dense_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + 5, + 10, + new RescoreVectorBuilder(7), + 0.001f + ); + assertThat(expectedQuery.toString(), is(planStr.get())); + } + /** * Expecting * LimitExec[1000[INTEGER]] From 204efda08319f28164a1df0c41ee728716cae646 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 14:11:20 +0200 Subject: [PATCH 17/64] Initial CSV tests --- .../xpack/esql/CsvTestsDataLoader.java | 4 +- .../src/main/resources/data/colors.csv | 140 ++++++++++++++++++ .../src/main/resources/knn.csv-spec | 58 ++++++++ .../src/main/resources/mapping-colors.json | 14 ++ .../esql/expression/function/vector/Knn.java | 2 +- .../elasticsearch/xpack/esql/CsvTests.java | 4 + 6 files changed, 220 insertions(+), 2 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 92fe597362bb0..c041fe55c32fc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -144,6 +144,7 @@ public class CsvTestsDataLoader { private static final TestDataset LOGS = new TestDataset("logs"); private static final TestDataset MV_TEXT = new TestDataset("mv_text"); private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector"); + private static final TestDataset COLORS = new TestDataset("colors"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -204,7 +205,8 @@ public class CsvTestsDataLoader { Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT), Map.entry(LOGS.indexName, LOGS), Map.entry(MV_TEXT.indexName, MV_TEXT), - Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR) + Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR), + Map.entry(COLORS.indexName, COLORS) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv new file mode 100644 index 0000000000000..e63ec176c6231 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv @@ -0,0 +1,140 @@ +color:keyword,hex_code:keyword,rgb_vector:dense_vector +maroon, #800000, [128,0,0] +dark red, #8B0000, [139,0,0] +brown, #A52A2A, [165,42,42] +firebrick, #B22222, [178,34,34] +crimson, #DC143C, [220,20,60] +red, #FF0000, [255,0,0] +tomato, #FF6347, [255,99,71] +coral, #FF7F50, [255,127,80] +indian red, #CD5C5C, [205,92,92] +light coral, #F08080, [240,128,128] +dark salmon, #E9967A, [233,150,122] +salmon, #FA8072, [250,128,114] +light salmon, #FFA07A, [255,160,122] +orange red, #FF4500, [255,69,0] +dark orange, #FF8C00, [255,140,0] +orange, #FFA500, [255,165,0] +gold, #FFD700, [255,215,0] +dark golden rod, #B8860B, [184,134,11] +golden rod, #DAA520, [218,165,32] +pale golden rod, #EEE8AA, [238,232,170] +dark khaki, #BDB76B, [189,183,107] +khaki, #F0E68C, [240,230,140] +olive, #808000, [128,128,0] +yellow, #FFFF00, [255,255,0] +yellow green, #9ACD32, [154,205,50] +dark olive green, #556B2F, [85,107,47] +olive drab, #6B8E23, [107,142,35] +lawn green, #7CFC00, [124,252,0] +chartreuse, #7FFF00, [127,255,0] +green yellow, #ADFF2F, [173,255,47] +dark green, #006400, [0,100,0] +green, #008000, [0,128,0] +forest green, #228B22, [34,139,34] +lime, #00FF00, [0,255,0] +lime green, #32CD32, [50,205,50] +light green, #90EE90, [144,238,144] +pale green, #98FB98, [152,251,152] +dark sea green, #8FBC8F, [143,188,143] +medium spring green, #00FA9A, [0,250,154] +spring green, #00FF7F, [0,255,127] +sea green, #2E8B57, [46,139,87] +medium aqua marine, #66CDAA, [102,205,170] +medium sea green, #3CB371, [60,179,113] +light sea green, #20B2AA, [32,178,170] +dark slate gray, #2F4F4F, [47,79,79] +teal, #008080, [0,128,128] +dark cyan, #008B8B, [0,139,139] +aqua, #00FFFF, [0,255,255] +cyan, #00FFFF, [0,255,255] +light cyan, #E0FFFF, [224,255,255] +dark turquoise, #00CED1, [0,206,209] +turquoise, #40E0D0, [64,224,208] +medium turquoise, #48D1CC, [72,209,204] +pale turquoise, #AFEEEE, [175,238,238] +aqua marine, #7FFFD4, [127,255,212] +powder blue, #B0E0E6, [176,224,230] +cadet blue, #5F9EA0, [95,158,160] +steel blue, #4682B4, [70,130,180] +corn flower blue, #6495ED, [100,149,237] +deep sky blue, #00BFFF, [0,191,255] +dodger blue, #1E90FF, [30,144,255] +light blue, #ADD8E6, [173,216,230] +sky blue, #87CEEB, [135,206,235] +light sky blue, #87CEFA, [135,206,250] +midnight blue, #191970, [25,25,112] +navy, #000080, [0,0,128] +dark blue, #00008B, [0,0,139] +medium blue, #0000CD, [0,0,205] +blue, #0000FF, [0,0,255] +royal blue, #4169E1, [65,105,225] +blue violet, #8A2BE2, [138,43,226] +indigo, #4B0082, [75,0,130] +dark slate blue, #483D8B, [72,61,139] +slate blue, #6A5ACD, [106,90,205] +medium slate blue, #7B68EE, [123,104,238] +medium purple, #9370DB, [147,112,219] +dark magenta, #8B008B, [139,0,139] +dark violet, #9400D3, [148,0,211] +dark orchid, #9932CC, [153,50,204] +medium orchid, #BA55D3, [186,85,211] +purple, #800080, [128,0,128] +thistle, #D8BFD8, [216,191,216] +plum, #DDA0DD, [221,160,221] +violet, #EE82EE, [238,130,238] +magenta / fuchsia, #FF00FF, [255,0,255] +orchid, #DA70D6, [218,112,214] +medium violet red, #C71585, [199,21,133] +pale violet red, #DB7093, [219,112,147] +deep pink, #FF1493, [255,20,147] +hot pink, #FF69B4, [255,105,180] +light pink, #FFB6C1, [255,182,193] +pink, #FFC0CB, [255,192,203] +antique white, #FAEBD7, [250,235,215] +beige, #F5F5DC, [245,245,220] +bisque, #FFE4C4, [255,228,196] +blanched almond, #FFEBCD, [255,235,205] +wheat, #F5DEB3, [245,222,179] +corn silk, #FFF8DC, [255,248,220] +lemon chiffon, #FFFACD, [255,250,205] +light golden rod yellow, #FAFAD2, [250,250,210] +light yellow, #FFFFE0, [255,255,224] +saddle brown, #8B4513, [139,69,19] +sienna, #A0522D, [160,82,45] +chocolate, #D2691E, [210,105,30] +peru, #CD853F, [205,133,63] +sandy brown, #F4A460, [244,164,96] +burly wood, #DEB887, [222,184,135] +tan, #D2B48C, [210,180,140] +rosy brown, #BC8F8F, [188,143,143] +moccasin, #FFE4B5, [255,228,181] +navajo white, #FFDEAD, [255,222,173] +peach puff, #FFDAB9, [255,218,185] +misty rose, #FFE4E1, [255,228,225] +lavender blush, #FFF0F5, [255,240,245] +linen, #FAF0E6, [250,240,230] +old lace, #FDF5E6, [253,245,230] +papaya whip, #FFEFD5, [255,239,213] +sea shell, #FFF5EE, [255,245,238] +mint cream, #F5FFFA, [245,255,250] +slate gray, #708090, [112,128,144] +light slate gray, #778899, [119,136,153] +light steel blue, #B0C4DE, [176,196,222] +lavender, #E6E6FA, [230,230,250] +floral white, #FFFAF0, [255,250,240] +alice blue, #F0F8FF, [240,248,255] +ghost white, #F8F8FF, [248,248,255] +honeydew, #F0FFF0, [240,255,240] +ivory, #FFFFF0, [255,255,240] +azure, #F0FFFF, [240,255,255] +snow, #FFFAFA, [255,250,250] +black, #000000, [0,0,0] +dim gray / dim grey, #696969, [105,105,105] +gray / grey, #808080, [128,128,128] +dark gray / dark grey, #A9A9A9, [169,169,169] +silver, #C0C0C0, [192,192,192] +light gray / light grey, #D3D3D3, [211,211,211] +gainsboro, #DCDCDC, [220,220,220] +white smoke, #F5F5F5, [245,245,245] +white, #FFFFFF, [255,255,255] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec new file mode 100644 index 0000000000000..321a77fd5f145 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec @@ -0,0 +1,58 @@ +knnSearch +required_capability: knn_function + +from colors metadata _score +| where knn(rgb_vector, [0, 120, 0]) +| sort _score desc +| keep color, rgb_vector, _score +; + +color:keyword | rgb_vector:dense_vector | _score:double +green | [0.0, 128.0, 0.0] | 0.017461242154240608 +dark green | [0.0, 100.0, 0.0] | 0.002344448585063219 +forest green | [34.0, 139.0, 34.0] | 8.307271637022495E-4 +dark olive green | [85.0, 107.0, 47.0] | 1.564669219078496E-4 +sea green | [46.0, 139.0, 87.0] | 1.4884951815474778E-4 +dark slate gray | [47.0, 79.0, 79.0] | 1.3984512770548463E-4 +olive drab | [107.0, 142.0, 35.0] | 1.0368906077928841E-4 +lime green | [50.0, 205.0, 50.0] | 1.0350035154260695E-4 +black | [0.0, 0.0, 0.0] | 8.751000859774649E-5 +olive | [128.0, 128.0, 0.0] | 7.608329178765416E-5 +; + +knnSearchWithKOption +required_capability: knn_function + +from colors metadata _score +| where knn(rgb_vector, [0,255,255], {"k": 5}) +| sort _score desc +| keep color, rgb_vector, _score +; + +color:keyword | rgb_vector:dense_vector | _score:double +cyan | [0.0, 255.0, 255.0] | 1.0 +aqua | [0.0, 255.0, 255.0] | 1.0 +deep sky blue | [0.0, 191.0, 255.0] | 2.6103827985934913E-4 +dark turquoise | [0.0, 206.0, 209.0] | 2.463386917952448E-4 +turquoise | [64.0, 224.0, 208.0] | 1.8581181939225644E-4 +; + +knnSearchWithSimilarityOption +required_capability: knn_function + +from colors metadata _score +| where knn(rgb_vector, [255,192,203], {"similarity": 40}) +| sort _score desc +| keep color, rgb_vector, _score +; + +color:keyword | rgb_vector:dense_vector | _score:double +pink | [255.0, 192.0, 203.0] | 1.0 +light pink | [255.0, 182.0, 193.0] | 0.004639764316380024 +peach puff | [255.0, 218.0, 185.0] | 9.59702767431736E-4 +bisque | [255.0, 228.0, 196.0] | 7.536620832979679E-4 +thistle | [216.0, 191.0, 216.0] | 6.913584074936807E-4 +wheat | [245.0, 222.0, 179.0] | 6.44357583951205E-4 +; + + diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json new file mode 100644 index 0000000000000..99c203ddb8661 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json @@ -0,0 +1,14 @@ +{ + "properties": { + "color": { + "type": "keyword" + }, + "hex_code": { + "type": "keyword" + }, + "rgb_vector": { + "type": "dense_vector", + "similarity": "l2_norm" + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index ba713ebb7da81..5f35653b56197 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -110,7 +110,7 @@ protected Query translate(TranslatorHandler handler) { Check.notNull(fieldAttribute, "Match must have a field attribute as the first argument"); String fieldName = getNameFromFieldAttribute(fieldAttribute); @SuppressWarnings("unchecked") - List queryFolded = (List) query().fold(FoldContext.small() /* TODO remove me */); + List queryFolded = (List) query().fold(FoldContext.small() /* TODO remove me */); float[] queryAsFloats = new float[queryFolded.size()]; for (int i = 0; i < queryFolded.size(); i++) { queryAsFloats[i] = queryFolded.get(i).floatValue(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index bf25feb9db553..4ab823ceddebc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -288,6 +288,10 @@ public final void test() throws Throwable { "can't use KQL function in csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KQL_FUNCTION.capabilityName()) ); + assumeFalse( + "can't use KNN function in csv tests", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION.capabilityName()) + ); assumeFalse( "lookup join disabled for csv tests", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V12.capabilityName()) From 1dd6008585b133e61e2c59fe835937dabeefc82e Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 14:37:04 +0200 Subject: [PATCH 18/64] Add boosting support --- .../elasticsearch/xpack/esql/querydsl/query/KnnQuery.java | 8 +++++++- .../esql/optimizer/LocalPhysicalPlanOptimizerTests.java | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 889261d9fe9ff..cbd8a4ca3d789 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -17,6 +17,7 @@ import java.util.Map; import java.util.Objects; +import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; @@ -48,7 +49,12 @@ protected QueryBuilder asBuilder() { } Float vectorSimilarity = (Float) options.get(VECTOR_SIMILARITY_FIELD.getPreferredName()); - return new KnnVectorQueryBuilder(field, query, k, numCands, rescoreVectorBuilder, vectorSimilarity); + KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder(field, query, k, numCands, rescoreVectorBuilder, vectorSimilarity); + Number boost = (Number) options.get(BOOST_FIELD.getPreferredName()); + if (boost != null) { + queryBuilder.boost(boost.floatValue()); + } + return queryBuilder; } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 8ba96f84c8834..3c8367d707dc5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1909,7 +1909,7 @@ public void testMultiMatchOptionsPushDown() { public void testKnnOptionsPushDown() { String query = """ from test - | where KNN(dense_vector, [0.1, 0.2, 0.3], { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7 }) + | where KNN(dense_vector, [0.1, 0.2, 0.3], { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7, "boost": 3.5 }) """; var analyzer = makeAnalyzer("mapping-all-types.json"); var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); @@ -1924,7 +1924,7 @@ public void testKnnOptionsPushDown() { 10, new RescoreVectorBuilder(7), 0.001f - ); + ).boost(3.5f); assertThat(expectedQuery.toString(), is(planStr.get())); } From e4f31fcc69d55ff6581e6996103ac2fa7dd32b85 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 14:46:30 +0200 Subject: [PATCH 19/64] Add CSV tests --- .../src/main/resources/data/colors.csv | 2 +- .../src/main/resources/knn.csv-spec | 26 ++++++++++++++++--- .../src/main/resources/mapping-colors.json | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv index e63ec176c6231..3b64514937425 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv @@ -1,4 +1,4 @@ -color:keyword,hex_code:keyword,rgb_vector:dense_vector +color:text,hex_code:keyword,rgb_vector:dense_vector maroon, #800000, [128,0,0] dark red, #8B0000, [139,0,0] brown, #A52A2A, [165,42,42] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec index 321a77fd5f145..e80d2a49ea1b9 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec @@ -7,7 +7,7 @@ from colors metadata _score | keep color, rgb_vector, _score ; -color:keyword | rgb_vector:dense_vector | _score:double +color:text | rgb_vector:dense_vector | _score:double green | [0.0, 128.0, 0.0] | 0.017461242154240608 dark green | [0.0, 100.0, 0.0] | 0.002344448585063219 forest green | [34.0, 139.0, 34.0] | 8.307271637022495E-4 @@ -29,7 +29,7 @@ from colors metadata _score | keep color, rgb_vector, _score ; -color:keyword | rgb_vector:dense_vector | _score:double +color:text | rgb_vector:dense_vector | _score:double cyan | [0.0, 255.0, 255.0] | 1.0 aqua | [0.0, 255.0, 255.0] | 1.0 deep sky blue | [0.0, 191.0, 255.0] | 2.6103827985934913E-4 @@ -46,7 +46,7 @@ from colors metadata _score | keep color, rgb_vector, _score ; -color:keyword | rgb_vector:dense_vector | _score:double +color:text | rgb_vector:dense_vector | _score:double pink | [255.0, 192.0, 203.0] | 1.0 light pink | [255.0, 182.0, 193.0] | 0.004639764316380024 peach puff | [255.0, 218.0, 185.0] | 9.59702767431736E-4 @@ -55,4 +55,24 @@ thistle | [216.0, 191.0, 216.0] | 6.913584074936807E-4 wheat | [245.0, 222.0, 179.0] | 6.44357583951205E-4 ; +knnHybridSearch +required_capability: knn_function + +from colors metadata _score +| where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 5}) +| sort _score desc +| keep color, rgb_vector, _score +; + +color:text | rgb_vector:dense_vector| _score:double +violet | [238.0, 130.0, 238.0] | 13.985496520996094 +blue violet | [138.0, 43.0, 226.0] | 3.132192373275757 +dark violet | [148.0, 0.0, 211.0] | 3.132192373275757 +medium violet red | [199.0, 21.0, 133.0] | 2.5798425674438477 +pale violet red | [219.0, 112.0, 147.0] | 2.5798425674438477 +orchid | [218.0, 112.0, 214.0] | 0.008259013295173645 +plum | [221.0, 160.0, 221.0] | 0.006797885522246361 +hot pink | [255.0, 105.0, 180.0] | 0.002509034238755703 +thistle | [216.0, 191.0, 216.0] | 0.0022455058060586452 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json index 99c203ddb8661..1ec79607d2011 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json @@ -1,7 +1,7 @@ { "properties": { "color": { - "type": "keyword" + "type": "text" }, "hex_code": { "type": "keyword" From 0d5a66a1a5c5b558c52a2ec55c9e12a80b026dbb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 29 May 2025 12:57:38 +0000 Subject: [PATCH 20/64] [CI] Auto commit changes from spotless --- .../esql/optimizer/LocalPhysicalPlanOptimizerTests.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 3c8367d707dc5..f5a758efacddf 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1907,10 +1907,11 @@ public void testMultiMatchOptionsPushDown() { } public void testKnnOptionsPushDown() { - String query = """ - from test - | where KNN(dense_vector, [0.1, 0.2, 0.3], { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7, "boost": 3.5 }) - """; + String query = + """ + from test + | where KNN(dense_vector, [0.1, 0.2, 0.3], { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7, "boost": 3.5 }) + """; var analyzer = makeAnalyzer("mapping-all-types.json"); var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); From ad344637da25e8651a0dc7b1b2864f4c77aaf175 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 17:26:46 +0200 Subject: [PATCH 21/64] Add CSV tests --- .../src/main/resources/data/colors.csv | 279 +++++++++--------- .../src/main/resources/knn.csv-spec | 219 +++++++++++--- .../src/main/resources/mapping-colors.json | 3 + 3 files changed, 323 insertions(+), 178 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv index 3b64514937425..7922ff5d2ccb1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv @@ -1,140 +1,139 @@ -color:text,hex_code:keyword,rgb_vector:dense_vector -maroon, #800000, [128,0,0] -dark red, #8B0000, [139,0,0] -brown, #A52A2A, [165,42,42] -firebrick, #B22222, [178,34,34] -crimson, #DC143C, [220,20,60] -red, #FF0000, [255,0,0] -tomato, #FF6347, [255,99,71] -coral, #FF7F50, [255,127,80] -indian red, #CD5C5C, [205,92,92] -light coral, #F08080, [240,128,128] -dark salmon, #E9967A, [233,150,122] -salmon, #FA8072, [250,128,114] -light salmon, #FFA07A, [255,160,122] -orange red, #FF4500, [255,69,0] -dark orange, #FF8C00, [255,140,0] -orange, #FFA500, [255,165,0] -gold, #FFD700, [255,215,0] -dark golden rod, #B8860B, [184,134,11] -golden rod, #DAA520, [218,165,32] -pale golden rod, #EEE8AA, [238,232,170] -dark khaki, #BDB76B, [189,183,107] -khaki, #F0E68C, [240,230,140] -olive, #808000, [128,128,0] -yellow, #FFFF00, [255,255,0] -yellow green, #9ACD32, [154,205,50] -dark olive green, #556B2F, [85,107,47] -olive drab, #6B8E23, [107,142,35] -lawn green, #7CFC00, [124,252,0] -chartreuse, #7FFF00, [127,255,0] -green yellow, #ADFF2F, [173,255,47] -dark green, #006400, [0,100,0] -green, #008000, [0,128,0] -forest green, #228B22, [34,139,34] -lime, #00FF00, [0,255,0] -lime green, #32CD32, [50,205,50] -light green, #90EE90, [144,238,144] -pale green, #98FB98, [152,251,152] -dark sea green, #8FBC8F, [143,188,143] -medium spring green, #00FA9A, [0,250,154] -spring green, #00FF7F, [0,255,127] -sea green, #2E8B57, [46,139,87] -medium aqua marine, #66CDAA, [102,205,170] -medium sea green, #3CB371, [60,179,113] -light sea green, #20B2AA, [32,178,170] -dark slate gray, #2F4F4F, [47,79,79] -teal, #008080, [0,128,128] -dark cyan, #008B8B, [0,139,139] -aqua, #00FFFF, [0,255,255] -cyan, #00FFFF, [0,255,255] -light cyan, #E0FFFF, [224,255,255] -dark turquoise, #00CED1, [0,206,209] -turquoise, #40E0D0, [64,224,208] -medium turquoise, #48D1CC, [72,209,204] -pale turquoise, #AFEEEE, [175,238,238] -aqua marine, #7FFFD4, [127,255,212] -powder blue, #B0E0E6, [176,224,230] -cadet blue, #5F9EA0, [95,158,160] -steel blue, #4682B4, [70,130,180] -corn flower blue, #6495ED, [100,149,237] -deep sky blue, #00BFFF, [0,191,255] -dodger blue, #1E90FF, [30,144,255] -light blue, #ADD8E6, [173,216,230] -sky blue, #87CEEB, [135,206,235] -light sky blue, #87CEFA, [135,206,250] -midnight blue, #191970, [25,25,112] -navy, #000080, [0,0,128] -dark blue, #00008B, [0,0,139] -medium blue, #0000CD, [0,0,205] -blue, #0000FF, [0,0,255] -royal blue, #4169E1, [65,105,225] -blue violet, #8A2BE2, [138,43,226] -indigo, #4B0082, [75,0,130] -dark slate blue, #483D8B, [72,61,139] -slate blue, #6A5ACD, [106,90,205] -medium slate blue, #7B68EE, [123,104,238] -medium purple, #9370DB, [147,112,219] -dark magenta, #8B008B, [139,0,139] -dark violet, #9400D3, [148,0,211] -dark orchid, #9932CC, [153,50,204] -medium orchid, #BA55D3, [186,85,211] -purple, #800080, [128,0,128] -thistle, #D8BFD8, [216,191,216] -plum, #DDA0DD, [221,160,221] -violet, #EE82EE, [238,130,238] -magenta / fuchsia, #FF00FF, [255,0,255] -orchid, #DA70D6, [218,112,214] -medium violet red, #C71585, [199,21,133] -pale violet red, #DB7093, [219,112,147] -deep pink, #FF1493, [255,20,147] -hot pink, #FF69B4, [255,105,180] -light pink, #FFB6C1, [255,182,193] -pink, #FFC0CB, [255,192,203] -antique white, #FAEBD7, [250,235,215] -beige, #F5F5DC, [245,245,220] -bisque, #FFE4C4, [255,228,196] -blanched almond, #FFEBCD, [255,235,205] -wheat, #F5DEB3, [245,222,179] -corn silk, #FFF8DC, [255,248,220] -lemon chiffon, #FFFACD, [255,250,205] -light golden rod yellow, #FAFAD2, [250,250,210] -light yellow, #FFFFE0, [255,255,224] -saddle brown, #8B4513, [139,69,19] -sienna, #A0522D, [160,82,45] -chocolate, #D2691E, [210,105,30] -peru, #CD853F, [205,133,63] -sandy brown, #F4A460, [244,164,96] -burly wood, #DEB887, [222,184,135] -tan, #D2B48C, [210,180,140] -rosy brown, #BC8F8F, [188,143,143] -moccasin, #FFE4B5, [255,228,181] -navajo white, #FFDEAD, [255,222,173] -peach puff, #FFDAB9, [255,218,185] -misty rose, #FFE4E1, [255,228,225] -lavender blush, #FFF0F5, [255,240,245] -linen, #FAF0E6, [250,240,230] -old lace, #FDF5E6, [253,245,230] -papaya whip, #FFEFD5, [255,239,213] -sea shell, #FFF5EE, [255,245,238] -mint cream, #F5FFFA, [245,255,250] -slate gray, #708090, [112,128,144] -light slate gray, #778899, [119,136,153] -light steel blue, #B0C4DE, [176,196,222] -lavender, #E6E6FA, [230,230,250] -floral white, #FFFAF0, [255,250,240] -alice blue, #F0F8FF, [240,248,255] -ghost white, #F8F8FF, [248,248,255] -honeydew, #F0FFF0, [240,255,240] -ivory, #FFFFF0, [255,255,240] -azure, #F0FFFF, [240,255,255] -snow, #FFFAFA, [255,250,250] -black, #000000, [0,0,0] -dim gray / dim grey, #696969, [105,105,105] -gray / grey, #808080, [128,128,128] -dark gray / dark grey, #A9A9A9, [169,169,169] -silver, #C0C0C0, [192,192,192] -light gray / light grey, #D3D3D3, [211,211,211] -gainsboro, #DCDCDC, [220,220,220] -white smoke, #F5F5F5, [245,245,245] -white, #FFFFFF, [255,255,255] +color:text,hex_code:keyword,rgb_vector:dense_vector,primary:boolean +maroon, #800000, [128,0,0], false +dark red, #8B0000, [139,0,0], false +brown, #A52A2A, [165,42,42], false +firebrick, #B22222, [178,34,34], false +crimson, #DC143C, [220,20,60], false +red, #FF0000, [255,0,0], true +tomato, #FF6347, [255,99,71], false +coral, #FF7F50, [255,127,80], false +indian red, #CD5C5C, [205,92,92], false +light coral, #F08080, [240,128,128], false +dark salmon, #E9967A, [233,150,122], false +salmon, #FA8072, [250,128,114], false +light salmon, #FFA07A, [255,160,122], false +orange red, #FF4500, [255,69,0], false +dark orange, #FF8C00, [255,140,0], false +orange, #FFA500, [255,165,0], false +gold, #FFD700, [255,215,0], false +dark golden rod, #B8860B, [184,134,11], false +golden rod, #DAA520, [218,165,32], false +pale golden rod, #EEE8AA, [238,232,170], false +dark khaki, #BDB76B, [189,183,107], false +khaki, #F0E68C, [240,230,140], false +olive, #808000, [128,128,0], false +yellow, #FFFF00, [255,255,0], true +yellow green, #9ACD32, [154,205,50], false +dark olive green, #556B2F, [85,107,47], false +olive drab, #6B8E23, [107,142,35], false +lawn green, #7CFC00, [124,252,0], false +chartreuse, #7FFF00, [127,255,0], false +green yellow, #ADFF2F, [173,255,47], false +dark green, #006400, [0,100,0], false +green, #008000, [0,128,0], true +forest green, #228B22, [34,139,34], false +lime, #00FF00, [0,255,0], false +lime green, #32CD32, [50,205,50], false +light green, #90EE90, [144,238,144], false +pale green, #98FB98, [152,251,152], false +dark sea green, #8FBC8F, [143,188,143], false +medium spring green, #00FA9A, [0,250,154], false +spring green, #00FF7F, [0,255,127], false +sea green, #2E8B57, [46,139,87], false +medium aqua marine, #66CDAA, [102,205,170], false +medium sea green, #3CB371, [60,179,113], false +light sea green, #20B2AA, [32,178,170], false +dark slate gray, #2F4F4F, [47,79,79], false +teal, #008080, [0,128,128], false +dark cyan, #008B8B, [0,139,139], false +cyan, #00FFFF, [0,255,255], true +light cyan, #E0FFFF, [224,255,255], false +dark turquoise, #00CED1, [0,206,209], false +turquoise, #40E0D0, [64,224,208], false +medium turquoise, #48D1CC, [72,209,204], false +pale turquoise, #AFEEEE, [175,238,238], false +aqua marine, #7FFFD4, [127,255,212], false +powder blue, #B0E0E6, [176,224,230], false +cadet blue, #5F9EA0, [95,158,160], false +steel blue, #4682B4, [70,130,180], false +corn flower blue, #6495ED, [100,149,237], false +deep sky blue, #00BFFF, [0,191,255], false +dodger blue, #1E90FF, [30,144,255], false +light blue, #ADD8E6, [173,216,230], false +sky blue, #87CEEB, [135,206,235], false +light sky blue, #87CEFA, [135,206,250], false +midnight blue, #191970, [25,25,112], false +navy, #000080, [0,0,128], false +dark blue, #00008B, [0,0,139], false +medium blue, #0000CD, [0,0,205], false +blue, #0000FF, [0,0,255], true +royal blue, #4169E1, [65,105,225], false +blue violet, #8A2BE2, [138,43,226], false +indigo, #4B0082, [75,0,130], false +dark slate blue, #483D8B, [72,61,139], false +slate blue, #6A5ACD, [106,90,205], false +medium slate blue, #7B68EE, [123,104,238], false +medium purple, #9370DB, [147,112,219], false +dark magenta, #8B008B, [139,0,139], false +dark violet, #9400D3, [148,0,211], false +dark orchid, #9932CC, [153,50,204], false +medium orchid, #BA55D3, [186,85,211], false +purple, #800080, [128,0,128], false +thistle, #D8BFD8, [216,191,216], false +plum, #DDA0DD, [221,160,221], false +violet, #EE82EE, [238,130,238], false +magenta, #FF00FF, [255,0,255], true +orchid, #DA70D6, [218,112,214], false +medium violet red, #C71585, [199,21,133], false +pale violet red, #DB7093, [219,112,147], false +deep pink, #FF1493, [255,20,147], false +hot pink, #FF69B4, [255,105,180], false +light pink, #FFB6C1, [255,182,193], false +pink, #FFC0CB, [255,192,203], false +antique white, #FAEBD7, [250,235,215], false +beige, #F5F5DC, [245,245,220], false +bisque, #FFE4C4, [255,228,196], false +blanched almond, #FFEBCD, [255,235,205], false +wheat, #F5DEB3, [245,222,179], false +corn silk, #FFF8DC, [255,248,220], false +lemon chiffon, #FFFACD, [255,250,205], false +light golden rod yellow, #FAFAD2, [250,250,210], false +light yellow, #FFFFE0, [255,255,224], false +saddle brown, #8B4513, [139,69,19], false +sienna, #A0522D, [160,82,45], false +chocolate, #D2691E, [210,105,30], false +peru, #CD853F, [205,133,63], false +sandy brown, #F4A460, [244,164,96], false +burly wood, #DEB887, [222,184,135], false +tan, #D2B48C, [210,180,140], false +rosy brown, #BC8F8F, [188,143,143], false +moccasin, #FFE4B5, [255,228,181], false +navajo white, #FFDEAD, [255,222,173], false +peach puff, #FFDAB9, [255,218,185], false +misty rose, #FFE4E1, [255,228,225], false +lavender blush, #FFF0F5, [255,240,245], false +linen, #FAF0E6, [250,240,230], false +old lace, #FDF5E6, [253,245,230], false +papaya whip, #FFEFD5, [255,239,213], false +sea shell, #FFF5EE, [255,245,238], false +mint cream, #F5FFFA, [245,255,250], false +slate gray, #708090, [112,128,144], false +light slate gray, #778899, [119,136,153], false +light steel blue, #B0C4DE, [176,196,222], false +lavender, #E6E6FA, [230,230,250], false +floral white, #FFFAF0, [255,250,240], false +alice blue, #F0F8FF, [240,248,255], false +ghost white, #F8F8FF, [248,248,255], false +honeydew, #F0FFF0, [240,255,240], false +ivory, #FFFFF0, [255,255,240], false +azure, #F0FFFF, [240,255,255], false +snow, #FFFAFA, [255,250,250], false +black, #000000, [0,0,0], true +dim gray, #696969, [105,105,105], false +gray, #808080, [128,128,128], true +dark gray, #A9A9A9, [169,169,169], false +silver, #C0C0C0, [192,192,192], false +light gray, #D3D3D3, [211,211,211], false +gainsboro, #DCDCDC, [220,220,220], false +white smoke, #F5F5F5, [245,245,245], false +white, #FFFFFF, [255,255,255], true diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec index e80d2a49ea1b9..c8a4a23ab1806 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec @@ -4,37 +4,38 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [0, 120, 0]) | sort _score desc -| keep color, rgb_vector, _score +| eval round_score = round(_score, 4) +| keep color, rgb_vector, round_score ; -color:text | rgb_vector:dense_vector | _score:double -green | [0.0, 128.0, 0.0] | 0.017461242154240608 -dark green | [0.0, 100.0, 0.0] | 0.002344448585063219 -forest green | [34.0, 139.0, 34.0] | 8.307271637022495E-4 -dark olive green | [85.0, 107.0, 47.0] | 1.564669219078496E-4 -sea green | [46.0, 139.0, 87.0] | 1.4884951815474778E-4 -dark slate gray | [47.0, 79.0, 79.0] | 1.3984512770548463E-4 -olive drab | [107.0, 142.0, 35.0] | 1.0368906077928841E-4 -lime green | [50.0, 205.0, 50.0] | 1.0350035154260695E-4 -black | [0.0, 0.0, 0.0] | 8.751000859774649E-5 -olive | [128.0, 128.0, 0.0] | 7.608329178765416E-5 +color:text | rgb_vector:dense_vector | round_score:double +green | [0.0, 128.0, 0.0] | 0.0127 +dark green | [0.0, 100.0, 0.0] | 0.0027 +forest green | [34.0, 139.0, 34.0] | 0.0023 +dark olive green | [85.0, 107.0, 47.0] | 3.0E-4 +sea green | [46.0, 139.0, 87.0] | 2.0E-4 +dark slate gray | [47.0, 79.0, 79.0] | 2.0E-4 +olive drab | [107.0, 142.0, 35.0] | 1.0E-4 +lime green | [50.0, 205.0, 50.0] | 1.0E-4 +black | [0.0, 0.0, 0.0] | 1.0E-4 +olive | [128.0, 128.0, 0.0] | 1.0E-4 ; knnSearchWithKOption required_capability: knn_function from colors metadata _score -| where knn(rgb_vector, [0,255,255], {"k": 5}) +| where knn(rgb_vector, [0,255,255], {"k": 4}) | sort _score desc -| keep color, rgb_vector, _score +| eval round_score = round(_score, 4) +| keep color, rgb_vector, round_score ; -color:text | rgb_vector:dense_vector | _score:double -cyan | [0.0, 255.0, 255.0] | 1.0 -aqua | [0.0, 255.0, 255.0] | 1.0 -deep sky blue | [0.0, 191.0, 255.0] | 2.6103827985934913E-4 -dark turquoise | [0.0, 206.0, 209.0] | 2.463386917952448E-4 -turquoise | [64.0, 224.0, 208.0] | 1.8581181939225644E-4 +color:text | rgb_vector:dense_vector | round_score:double +cyan | [0.0, 255.0, 255.0] | 1.0 +deep sky blue | [0.0, 191.0, 255.0] | 3.0E-4 +dark turquoise | [0.0, 206.0, 209.0] | 3.0E-4 +turquoise | [64.0, 224.0, 208.0] | 3.0E-4 ; knnSearchWithSimilarityOption @@ -43,16 +44,16 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [255,192,203], {"similarity": 40}) | sort _score desc -| keep color, rgb_vector, _score +| keep color, rgb_vector ; -color:text | rgb_vector:dense_vector | _score:double -pink | [255.0, 192.0, 203.0] | 1.0 -light pink | [255.0, 182.0, 193.0] | 0.004639764316380024 -peach puff | [255.0, 218.0, 185.0] | 9.59702767431736E-4 -bisque | [255.0, 228.0, 196.0] | 7.536620832979679E-4 -thistle | [216.0, 191.0, 216.0] | 6.913584074936807E-4 -wheat | [245.0, 222.0, 179.0] | 6.44357583951205E-4 +color:text | rgb_vector:dense_vector +pink | [255.0, 192.0, 203.0] +light pink | [255.0, 182.0, 193.0] +peach puff | [255.0, 218.0, 185.0] +bisque | [255.0, 228.0, 196.0] +thistle | [216.0, 191.0, 216.0] +wheat | [245.0, 222.0, 179.0] ; knnHybridSearch @@ -61,18 +62,160 @@ required_capability: knn_function from colors metadata _score | where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 5}) | sort _score desc +| eval round_score = round(_score, 4) +| keep color, rgb_vector, round_score +; + +color:text | rgb_vector:dense_vector | round_score:double +violet | [238.0, 130.0, 238.0] | 13.9457 +blue violet | [138.0, 43.0, 226.0] | 3.0871 +dark violet | [148.0, 0.0, 211.0] | 3.0871 +medium violet red | [199.0, 21.0, 133.0] | 2.5355 +pale violet red | [219.0, 112.0, 147.0] | 2.5355 +orchid | [218.0, 112.0, 214.0] | 0.0083 +plum | [221.0, 160.0, 221.0] | 0.0071 +hot pink | [255.0, 105.0, 180.0] | 0.0024 +thistle | [216.0, 191.0, 216.0] | 0.0021 +; + +knnWithMultipleFunctions +required_capability: knn_function + +from colors metadata _score +| where knn(rgb_vector, [128,128,0]) and match(color, "olive") +| sort _score desc +| eval round_score = round(_score, 4) +| keep color, rgb_vector, round_score +; + +color:text | rgb_vector:dense_vector | round_score:double +olive | [128.0, 128.0, 0.0] | 5.4979 +olive drab | [107.0, 142.0, 35.0] | 3.5206 +dark olive green | [85.0, 107.0, 47.0] | 2.8906 +; + +knnAfterKeep +required_capability: knn_function + +from colors metadata _score +| keep rgb_vector, _score +| where knn(rgb_vector, [128,128,0]) +| eval round_score = round(_score, 4) +| sort round_score desc +| keep rgb_vector, round_score +| limit 5 +; + +rgb_vector:dense_vector | round_score:double +[128.0, 128.0, 0.0] | 1.0 +[107.0, 142.0, 35.0] | 0.0014 +[85.0, 107.0, 47.0] | 4.0E-4 +[139.0, 69.0, 19.0] | 3.0E-4 +[184.0, 134.0, 11.0] | 3.0E-4 +; + +knnAfterDrop +required_capability: knn_function + +from colors metadata _score +| drop color +| where knn(rgb_vector, [128,128,0]) +| eval round_score = round(_score, 4) +| keep rgb_vector, round_score +| limit 5 +; + +rgb_vector:dense_vector | round_score:double +[184.0, 134.0, 11.0] | 3.0E-4 +[128.0, 128.0, 0.0] | 1.0 +[154.0, 205.0, 50.0] | 1.0E-4 +[85.0, 107.0, 47.0] | 4.0E-4 +[107.0, 142.0, 35.0] | 0.0014 +; + +knnAfterEval +required_capability: knn_function + +from colors metadata _score +| eval composed_name = locate(color, " ") > 0 +| where knn(rgb_vector, [128,128,0]) +| sort _score, color desc +| keep color, composed_name +; + +color:text | composed_name:boolean +peru | false +yellow green | true +chocolate | false +dim gray | true +saddle brown | true +sienna | false +dark golden rod | true +dark olive green | true +olive drab | true +olive | false +; + +knnWithConjunction +required_capability: knn_function + +# TODO We need kNN prefiltering here so we get more candidates that pass the filter +from colors metadata _score +| where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*" +| keep color, hex_code, rgb_vector +; +ignoreOrder:true + +color:text | hex_code: keyword | rgb_vector:dense_vector +light yellow | #FFFFE0 | [255.0, 255.0, 224.0] +lavender blush | #FFF0F5 | [255.0, 240.0, 245.0] +sea shell | #FFF5EE | [255.0, 245.0, 238.0] +floral white | #FFFAF0 | [255.0, 250.0, 240.0] +ivory | #FFFFF0 | [255.0, 255.0, 240.0] +snow | #FFFAFA | [255.0, 250.0, 250.0] +white | #FFFFFF | [255.0, 255.0, 255.0] +; + +knnWithDisjunctionAndFiltersConjunction +required_capability: knn_function + +# TODO We need kNN prefiltering here so we get more candidates that pass the filter +from colors metadata _score +| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true | keep color, rgb_vector, _score ; -color:text | rgb_vector:dense_vector| _score:double -violet | [238.0, 130.0, 238.0] | 13.985496520996094 -blue violet | [138.0, 43.0, 226.0] | 3.132192373275757 -dark violet | [148.0, 0.0, 211.0] | 3.132192373275757 -medium violet red | [199.0, 21.0, 133.0] | 2.5798425674438477 -pale violet red | [219.0, 112.0, 147.0] | 2.5798425674438477 -orchid | [218.0, 112.0, 214.0] | 0.008259013295173645 -plum | [221.0, 160.0, 221.0] | 0.006797885522246361 -hot pink | [255.0, 105.0, 180.0] | 0.002509034238755703 -thistle | [216.0, 191.0, 216.0] | 0.0022455058060586452 +color:text | rgb_vector:dense_vector | _score:double +cyan | [0.0, 255.0, 255.0] | 1.0 +blue | [0.0, 0.0, 255.0] | 9.922293975250795E-5 ; +knnWithDisjunctionAndConjunction +required_capability: knn_function +required_capability: full_text_functions_disjunctions + +# TODO We need kNN prefiltering here so we get more candidates that pass the filter +from colors metadata _score +| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0]) +| keep color, rgb_vector, _score +; + +color:text | rgb_vector:dense_vector | _score:double +medium spring green | [0.0, 250.0, 154.0] | 1.6871128173079342E-4 +; + +knnWithNonPushableConjunction +required_capability: knn_function + +from colors metadata _score +| eval composed_name = locate(color, " ") > 0 +| where knn(rgb_vector, [128,128,0]) and composed_name == false +| keep color, composed_name, _score +; + +color:text | composed_name: boolean | _score: double +olive | false | 1.0 +sienna | false | 2.9301168979145586E-4 +chocolate | false | 1.4101568376645446E-4 +peru | false | 1.3746823242399842E-4 +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json index 1ec79607d2011..e5c5e7d65fd1e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json @@ -9,6 +9,9 @@ "rgb_vector": { "type": "dense_vector", "similarity": "l2_norm" + }, + "primary": { + "type": "boolean" } } } From 26f48e7bccc0c0c1d17a4973d2f3d49b15202540 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 18:11:53 +0200 Subject: [PATCH 22/64] Add CSV tests --- .../src/main/resources/knn.csv-spec | 123 +++++++++++++++++- 1 file changed, 116 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec index c8a4a23ab1806..48d1faed98a42 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec @@ -209,13 +209,122 @@ required_capability: knn_function from colors metadata _score | eval composed_name = locate(color, " ") > 0 -| where knn(rgb_vector, [128,128,0]) and composed_name == false -| keep color, composed_name, _score +| where knn(rgb_vector, [128,128,0]) and composed_name == false +| eval round_score = round(_score, 4) +| keep color, composed_name, round_score ; -color:text | composed_name: boolean | _score: double -olive | false | 1.0 -sienna | false | 2.9301168979145586E-4 -chocolate | false | 1.4101568376645446E-4 -peru | false | 1.3746823242399842E-4 +color:text | composed_name:boolean | round_score:double +olive | false | 1.0 +sienna | false | 3.0E-4 +chocolate | false | 1.0E-4 +peru | false | 1.0E-4 ; + +testKnnWithNonPushableDisjunctions +required_capability: knn_function + +from colors metadata _score +| where knn(rgb_vector, [128,128,0], {"k": 5}) or length(color) > 17 +| sort _score desc +| eval round_score = round(_score, 4) +| keep color, round_score +; + +color:text | round_score: double +olive | 1.0 +olive drab | 0.0014 +dark olive green | 4.0E-4 +dark golden rod | 3.0E-4 +sienna | 3.0E-4 +medium aqua marine | 0.0 +medium spring green | 0.0 +light golden rod yellow | 0.0 +; + +testKnnWithNonPushableDisjunctionsOnComplexExpressions +required_capability: knn_function + +from colors metadata _score +| where (knn(rgb_vector, [128,128,0]) and length(color) > 12) or (knn(rgb_vector, [128,0,128]) and primary == false) +| sort _score desc +| eval round_score = round(_score, 4) +| keep color, primary, round_score +; + +color: text | primary: boolean | round_score: double +purple | false | 1.0 +dark magenta | false | 0.0045 +dark olive green | false | 4.0E-4 +indigo | false | 4.0E-4 +dark golden rod | false | 3.0E-4 +dim gray | false | 3.0E-4 +dark slate blue | false | 2.0E-4 +medium violet red | false | 2.0E-4 +dark orchid | false | 1.0E-4 +dark violet | false | 1.0E-4 +brown | false | 1.0E-4 +blue violet | false | 1.0E-4 +; + +testKnnInStatsNonPushable +required_capability: knn_function + +from colors +| where length(color) < 10 +| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40}) +; + +c: long +11 +; + + +testKnnInStatsPushableAndNonPushable +required_capability: knn_function +required_capability: full_text_functions_in_stats_where + +from colors metadata _score +| stats c = count(*) where (knn(rgb_vector, [0,255,255], {"k": 40}) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0], {"k": 40}) +; + +c:long +21 +; + +testKnnInStatsWithGrouping +from colors +| where length(color) < 10 +| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40}) by primary +; + +c: long | primary: boolean +9 | false +2 | true +; + +testKnnInStatsPushable +required_capability: knn_function +required_capability: full_text_functions_in_stats_where + +from colors +| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40}) +; + +# No surprises, gets the number of top k +c:long +40 +; + +testKnnInStatsWithNonPushableDisjunctions +required_capability: knn_function +required_capability: full_text_functions_in_stats_where + +FROM colors +| STATS c = count(*) where knn(rgb_vector, [128,128,0], {"k": 5}) or length(color) > 17 +; + +c:long +8 +; + From e7452dd819a6ce6ebbcc066e65ed067c8063e975 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 18:23:02 +0200 Subject: [PATCH 23/64] Add CSV tests --- .../{knn.csv-spec => knn-function.csv-spec} | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) rename x-pack/plugin/esql/qa/testFixtures/src/main/resources/{knn.csv-spec => knn-function.csv-spec} (88%) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec similarity index 88% rename from x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec rename to x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 48d1faed98a42..2eb2dfaf9659f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -1,41 +1,45 @@ knnSearch required_capability: knn_function +// tag::knn-function[] from colors metadata _score | where knn(rgb_vector, [0, 120, 0]) | sort _score desc -| eval round_score = round(_score, 4) -| keep color, rgb_vector, round_score +// end::knn-function[] +| keep color, rgb_vector ; -color:text | rgb_vector:dense_vector | round_score:double -green | [0.0, 128.0, 0.0] | 0.0127 -dark green | [0.0, 100.0, 0.0] | 0.0027 -forest green | [34.0, 139.0, 34.0] | 0.0023 -dark olive green | [85.0, 107.0, 47.0] | 3.0E-4 -sea green | [46.0, 139.0, 87.0] | 2.0E-4 -dark slate gray | [47.0, 79.0, 79.0] | 2.0E-4 -olive drab | [107.0, 142.0, 35.0] | 1.0E-4 -lime green | [50.0, 205.0, 50.0] | 1.0E-4 -black | [0.0, 0.0, 0.0] | 1.0E-4 -olive | [128.0, 128.0, 0.0] | 1.0E-4 +// tag::knn-function-result[] +color:text | rgb_vector:dense_vector +green | [0.0, 128.0, 0.0] +dark green | [0.0, 100.0, 0.0] +forest green | [34.0, 139.0, 34.0] +dark olive green | [85.0, 107.0, 47.0] +sea green | [46.0, 139.0, 87.0] +dark slate gray | [47.0, 79.0, 79.0] +olive drab | [107.0, 142.0, 35.0] +lime green | [50.0, 205.0, 50.0] +black | [0.0, 0.0, 0.0] +olive | [128.0, 128.0, 0.0] +// end::knn-function-result[] ; knnSearchWithKOption required_capability: knn_function +// tag::knn-function-options[] from colors metadata _score | where knn(rgb_vector, [0,255,255], {"k": 4}) | sort _score desc -| eval round_score = round(_score, 4) -| keep color, rgb_vector, round_score +// end::knn-function-options[] +| keep color, rgb_vector ; -color:text | rgb_vector:dense_vector | round_score:double -cyan | [0.0, 255.0, 255.0] | 1.0 -deep sky blue | [0.0, 191.0, 255.0] | 3.0E-4 -dark turquoise | [0.0, 206.0, 209.0] | 3.0E-4 -turquoise | [64.0, 224.0, 208.0] | 3.0E-4 +color:text | rgb_vector:dense_vector +cyan | [0.0, 255.0, 255.0] +deep sky blue | [0.0, 191.0, 255.0] +dark turquoise | [0.0, 206.0, 209.0] +turquoise | [64.0, 224.0, 208.0] ; knnSearchWithSimilarityOption From 22efe273ca7d9c53b04ab940192a60c5d478e4a5 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 29 May 2025 18:36:49 +0200 Subject: [PATCH 24/64] Add Knn doc annotations --- .../esql/expression/function/vector/Knn.java | 73 +++++++++++++++++-- .../function/fulltext/KnnTests.java | 34 +++++++++ 2 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 5f35653b56197..6084aeabee831 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -21,10 +21,13 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Check; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.MapParam; import org.elasticsearch.xpack.esql.expression.function.OptionalArgument; +import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextFunction; import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; @@ -66,11 +69,71 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun entry(KnnQuery.RESCORE_OVERSAMPLE_FIELD, FLOAT) ); - @FunctionInfo(returnType = "boolean", preview = true, description = """ - Finds the k nearest vectors to a query vector, as measured by a similarity metric. - knn function finds nearest vectors through approximate search on indexed dense_vectors - """, appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }) - public Knn(Source source, Expression field, Expression query, Expression options) { + @FunctionInfo( + returnType = "boolean", + preview = true, + description = """ + Finds the k nearest vectors to a query vector, as measured by a similarity metric. + knn function finds nearest vectors through approximate search on indexed dense_vectors + """, + examples = { + @Example(file = "knn-function", tag = "knn-function"), + @Example(file = "knn-function", tag = "knn-function-options"), }, + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) } + ) + public Knn( + Source source, + @Param(name = "field", type = { "dense_vector" }, description = "Field that the query will target.") Expression field, + @Param( + name = "query", + type = { "dense_vector" }, + description = "Vector value to find top nearest neighbours for." + ) Expression query, + @MapParam( + name = "options", + params = { + @MapParam.MapParamEntry( + name = "boost", + type = "float", + valueHint = { "2.5" }, + description = "Floating point number used to decrease or increase the relevance scores of the query. " + + "Defaults to 1.0." + ), + @MapParam.MapParamEntry( + name = "k", + type = "integer", + valueHint = { "10" }, + description = "The number of nearest neighbors to return from each shard. " + + "Elasticsearch collects k results from each shard, then merges them to find the global top results. " + + "This value must be less than or equal to num_candidates. Defaults to 10." + ), + @MapParam.MapParamEntry( + name = "num_candidates", + type = "integer", + valueHint = { "10" }, + description = "The number of nearest neighbor candidates to consider per shard while doing knn search. " + + "Cannot exceed 10,000. Increasing num_candidates tends to improve the accuracy of the final results. " + + "Defaults to 1.5 * k" + ), + @MapParam.MapParamEntry( + name = "similarity", + type = "double", + valueHint = { "0.01" }, + description = "The minimum similarity required for a document to be considered a match. " + + "The similarity value calculated relates to the raw similarity used, not the document score" + ), + @MapParam.MapParamEntry( + name = "rescore_oversample", + type = "double", + valueHint = { "3.5" }, + description = "Applies the specified oversampling for rescoring quantized vectors. " + + "See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details." + ), }, + description = "(Optional) kNN additional options as <>." + + " See <> for more information.", + optional = true + ) Expression options + ) { this(source, field, query, options, null); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java new file mode 100644 index 0000000000000..9a8d1f817b629 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.fulltext; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.List; +import java.util.function.Supplier; + +public class KnnTests extends NoneFieldFullTextFunctionTestCase { + public KnnTests(@Name("TestCase") Supplier testCaseSupplier) { + super(testCaseSupplier); + } + + @ParametersFactory + public static Iterable parameters() { + return generateParameters(); + } + + @Override + protected Expression build(Source source, List args) { + return new Kql(source, args.get(0)); + } +} From 7f5ddde62f08fe2dd4c6747f5be2863efab81293 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 10:22:12 +0200 Subject: [PATCH 25/64] Add first version of KnnTests and generated docs --- .../esql/images/functions/knn.svg | 1 + .../esql/kibana/definition/functions/knn.json | 13 +++ .../esql/kibana/docs/functions/knn.md | 10 ++ .../function/fulltext/FullTextFunction.java | 3 +- .../esql/expression/function/vector/Knn.java | 19 ++-- .../xpack/esql/SerializationTestUtils.java | 2 + .../function/fulltext/KnnTests.java | 104 +++++++++++++++++- .../function/fulltext/MatchTests.java | 2 +- 8 files changed, 138 insertions(+), 16 deletions(-) create mode 100644 docs/reference/query-languages/esql/images/functions/knn.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/knn.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/knn.md diff --git a/docs/reference/query-languages/esql/images/functions/knn.svg b/docs/reference/query-languages/esql/images/functions/knn.svg new file mode 100644 index 0000000000000..75a104a7cdcfa --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/knn.svg @@ -0,0 +1 @@ +KNN(field,query,options) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json new file mode 100644 index 0000000000000..48d3e582eec58 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json @@ -0,0 +1,13 @@ +{ + "comment" : "This is generated by ESQL’s AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "knn", + "description" : "Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors.", + "signatures" : [ ], + "examples" : [ + "from colors metadata _score\n| where knn(rgb_vector, [0, 120, 0])\n| sort _score desc", + "from colors metadata _score\n| where knn(rgb_vector, [0,255,255], {\"k\": 4})\n| sort _score desc" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/knn.md b/docs/reference/query-languages/esql/kibana/docs/functions/knn.md new file mode 100644 index 0000000000000..45d1f294ea0a8 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/knn.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +### KNN +Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors. + +```esql +from colors metadata _score +| where knn(rgb_vector, [0, 120, 0]) +| sort _score desc +``` diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index b18cbc1572d90..aeb97617d3cc3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -152,7 +152,8 @@ public boolean equals(Object obj) { return false; } - return Objects.equals(queryBuilder, ((FullTextFunction) obj).queryBuilder); + return Objects.equals(queryBuilder, ((FullTextFunction) obj).queryBuilder) + && Objects.equals(query, ((FullTextFunction) obj).query); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 6084aeabee831..782753d62d1db 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -72,10 +72,8 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun @FunctionInfo( returnType = "boolean", preview = true, - description = """ - Finds the k nearest vectors to a query vector, as measured by a similarity metric. - knn function finds nearest vectors through approximate search on indexed dense_vectors - """, + description = "Finds the k nearest vectors to a query vector, as measured by a similarity metric. " + + "knn function finds nearest vectors through approximate search on indexed dense_vectors.", examples = { @Example(file = "knn-function", tag = "knn-function"), @Example(file = "knn-function", tag = "knn-function-options"), }, @@ -96,7 +94,7 @@ public Knn( name = "boost", type = "float", valueHint = { "2.5" }, - description = "Floating point number used to decrease or increase the relevance scores of the query. " + description = "Floating point number used to decrease or increase the relevance scores of the query." + "Defaults to 1.0." ), @MapParam.MapParamEntry( @@ -120,7 +118,7 @@ public Knn( type = "double", valueHint = { "0.01" }, description = "The minimum similarity required for a document to be considered a match. " - + "The similarity value calculated relates to the raw similarity used, not the document score" + + "The similarity value calculated relates to the raw similarity used, not the document score." ), @MapParam.MapParamEntry( name = "rescore_oversample", @@ -237,12 +235,13 @@ public void writeTo(StreamOutput out) throws IOException { @Override public boolean equals(Object o) { + // Knn does not serialize options, as they get included in the query builder. We need to override equals and hashcode to + // ignore options when comparing two Knn functions if (o == null || getClass() != o.getClass()) return false; - if (super.equals(o) == false) return false; Knn knn = (Knn) o; - return Objects.equals(field, knn.field) - && Objects.equals(query(), knn.query()) - && Objects.equals(queryBuilder(), knn.queryBuilder()); + return Objects.equals(field(), knn.field()) + && Objects.equals(query(), knn.query()) + && Objects.equals(queryBuilder(), knn.queryBuilder()); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java index 8e396e4753f09..e55a1b039258e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/SerializationTestUtils.java @@ -24,6 +24,7 @@ import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.index.query.WildcardQueryBuilder; +import org.elasticsearch.search.vectors.KnnVectorQueryBuilder; import org.elasticsearch.test.EqualsHashCodeTestUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.expression.ExpressionWritables; @@ -111,6 +112,7 @@ public static NamedWriteableRegistry writableRegistry() { entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, WildcardQueryBuilder.NAME, WildcardQueryBuilder::new)); entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, RegexpQueryBuilder.NAME, RegexpQueryBuilder::new)); entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, ExistsQueryBuilder.NAME, ExistsQueryBuilder::new)); + entries.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, KnnVectorQueryBuilder.NAME, KnnVectorQueryBuilder::new)); entries.add(SingleValueQuery.ENTRY); entries.addAll(ExpressionWritables.getNamedWriteables()); entries.addAll(PlanWritables.getNamedWriteables()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index 9a8d1f817b629..39218968b5163 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -10,25 +10,121 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.MapExpression; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; +import java.util.ArrayList; import java.util.List; import java.util.function.Supplier; -public class KnnTests extends NoneFieldFullTextFunctionTestCase { +import static org.elasticsearch.xpack.esql.SerializationTestUtils.serializeDeserialize; +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; +import static org.elasticsearch.xpack.esql.planner.TranslatorHandler.TRANSLATOR_HANDLER; +import static org.hamcrest.Matchers.equalTo; + +public class KnnTests extends AbstractFunctionTestCase { + public KnnTests(@Name("TestCase") Supplier testCaseSupplier) { - super(testCaseSupplier); + this.testCase = testCaseSupplier.get(); } @ParametersFactory public static Iterable parameters() { - return generateParameters(); + return parameterSuppliersFromTypedData(addFunctionNamedParams(testCaseSuppliers())); + } + + private static List testCaseSuppliers() { + List suppliers = new ArrayList<>(); + + suppliers.add( + TestCaseSupplier.testCaseSupplier( + new TestCaseSupplier.TypedDataSupplier("dense_vector field", KnnTests::randomDenseVector, DENSE_VECTOR), + new TestCaseSupplier.TypedDataSupplier("query", KnnTests::randomDenseVector, DENSE_VECTOR, true), + (d1, d2) -> equalTo("string"), + BOOLEAN, + (o1, o2) -> true + ) + ); + + return suppliers; + } + + private static List randomDenseVector() { + int dimensions = randomIntBetween(64, 128); + List vector = new ArrayList<>(); + for (int i = 0; i < dimensions; i++) { + vector.add(randomFloat()); + } + return vector; + } + + /** + * Adds function named parameters to all the test case suppliers provided + */ + private static List addFunctionNamedParams(List suppliers) { + // TODO get to a common class with MatchTests + List result = new ArrayList<>(); + for (TestCaseSupplier supplier : suppliers) { + List dataTypes = new ArrayList<>(supplier.types()); + dataTypes.add(UNSUPPORTED); + result.add(new TestCaseSupplier(supplier.name() + ", options", dataTypes, () -> { + List values = new ArrayList<>(supplier.get().getData()); + values.add( + new TestCaseSupplier.TypedData( + new MapExpression( + Source.EMPTY, + List.of( + new Literal(Source.EMPTY, randomAlphaOfLength(10), KEYWORD) + ) + ), + UNSUPPORTED, + "options" + ).forceLiteral() + ); + + return new TestCaseSupplier.TestCase(values, equalTo("KnnEvaluator"), BOOLEAN, equalTo(true)); + })); + } + return result; } @Override protected Expression build(Source source, List args) { - return new Kql(source, args.get(0)); + Knn knn = new Knn(source, args.get(0), args.get(1), args.size() > 2 ? args.get(2) : null); + // We need to add the QueryBuilder to the match expression, as it is used to implement equals() and hashCode() and + // thus test the serialization methods. But we can only do this if the parameters make sense . + if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) { + QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, knn).toQueryBuilder(); + knn = (Knn) knn.replaceQueryBuilder(queryBuilder); + } + return knn; + } + + /** + * Copy of the overridden method that doesn't check for children size, as the {@code options} child isn't serialized in Match. + */ + @Override + protected Expression serializeDeserializeExpression(Expression expression) { + Expression newExpression = serializeDeserialize( + expression, + PlanStreamOutput::writeNamedWriteable, + in -> in.readNamedWriteable(Expression.class), + testCase.getConfiguration() // The configuration query should be == to the source text of the function for this to work + ); + // Fields use synthetic sources, which can't be serialized. So we use the originals instead. + return newExpression.replaceChildren(expression.children()); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchTests.java index 6993f7583dd02..301cbd6844afe 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchTests.java @@ -82,7 +82,7 @@ protected Expression build(Source source, List args) { // thus test the serialization methods. But we can only do this if the parameters make sense . if (args.get(0) instanceof FieldAttribute && args.get(1).foldable()) { QueryBuilder queryBuilder = TRANSLATOR_HANDLER.asQuery(LucenePushdownPredicates.DEFAULT, match).toQueryBuilder(); - match.replaceQueryBuilder(queryBuilder); + match = (Match) match.replaceQueryBuilder(queryBuilder); } return match; } From 66f849657057850dab8e1d562f0af01101f12b07 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 13:48:41 +0200 Subject: [PATCH 26/64] Add verifier tests --- .../esql/expression/function/vector/Knn.java | 55 +++- .../xpack/esql/analysis/VerifierTests.java | 237 +++--------------- 2 files changed, 86 insertions(+), 206 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 782753d62d1db..58a83948c4e47 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -46,8 +46,11 @@ import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.NUM_CANDS_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isMapExpression; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNullAndFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; @@ -72,8 +75,8 @@ public class Knn extends FullTextFunction implements OptionalArgument, VectorFun @FunctionInfo( returnType = "boolean", preview = true, - description = "Finds the k nearest vectors to a query vector, as measured by a similarity metric. " + - "knn function finds nearest vectors through approximate search on indexed dense_vectors.", + description = "Finds the k nearest vectors to a query vector, as measured by a similarity metric. " + + "knn function finds nearest vectors through approximate search on indexed dense_vectors.", examples = { @Example(file = "knn-function", tag = "knn-function"), @Example(file = "knn-function", tag = "knn-function-options"), }, @@ -156,12 +159,48 @@ public DataType dataType() { @Override protected TypeResolution resolveParams() { - if (childrenResolved() == false) { - return new TypeResolution("Unresolved children"); + return resolveField().and(resolveQuery()).and(resolveOptions()); + } + + private TypeResolution resolveField() { + return isNotNull(field(), sourceText(), FIRST).and(isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, "dense_vector")); + } + + private TypeResolution resolveQuery() { + return isType(query(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.SECOND, "dense_vector").and( + isNotNullAndFoldable(query(), sourceText(), SECOND) + ); + } + + private TypeResolution resolveOptions() { + if (options() != null) { + TypeResolution resolution = isNotNull(options(), sourceText(), THIRD); + if (resolution.unresolved()) { + return resolution; + } + // MapExpression does not have a DataType associated with it + resolution = isMapExpression(options(), sourceText(), THIRD); + if (resolution.unresolved()) { + return resolution; + } + + try { + knnQueryOptions(); + } catch (InvalidArgumentException e) { + return new TypeResolution(e.getMessage()); + } + } + return TypeResolution.TYPE_RESOLVED; + } + + private Map knnQueryOptions() throws InvalidArgumentException { + if (options() == null) { + return Map.of(); } - return isNotNull(field(), sourceText(), FIRST).and(isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, "dense_vector")) - .and(isType(query(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.SECOND, "dense_vector")); + Map matchOptions = new HashMap<>(); + populateOptionsMap((MapExpression) options(), matchOptions, THIRD, sourceText(), ALLOWED_OPTIONS); + return matchOptions; } @Override @@ -240,8 +279,8 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; Knn knn = (Knn) o; return Objects.equals(field(), knn.field()) - && Objects.equals(query(), knn.query()) - && Objects.equals(queryBuilder(), knn.queryBuilder()); + && Objects.equals(query(), knn.query()) + && Objects.equals(queryBuilder(), knn.queryBuilder()); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d76a355a6c9a9..f67aa63739610 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.fulltext.MultiMatch; import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; +import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; import org.elasticsearch.xpack.esql.parser.EsqlParser; @@ -54,7 +55,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.IP; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; -import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.hamcrest.Matchers.containsString; @@ -2173,231 +2173,72 @@ public void testLookupJoinDataTypeMismatch() { } public void testMatchOptions() { - // Check positive cases - query("FROM test | WHERE match(first_name, \"Jean\", {\"analyzer\": \"standard\"})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"boost\": 2.1})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"fuzziness\": 2})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"fuzziness\": \"AUTO\"})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"fuzzy_transpositions\": false})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"lenient\": false})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"max_expansions\": 10})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"minimum_should_match\": \"2\"})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"operator\": \"AND\"})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"prefix_length\": 2})"); - query("FROM test | WHERE match(first_name, \"Jean\", {\"auto_generate_synonyms_phrase_query\": true})"); - - // Check all data types for available options - DataType[] optionTypes = new DataType[] { INTEGER, LONG, FLOAT, DOUBLE, KEYWORD, BOOLEAN }; - for (Map.Entry allowedOptions : Match.ALLOWED_OPTIONS.entrySet()) { - String optionName = allowedOptions.getKey(); - DataType optionType = allowedOptions.getValue(); - // Check every possible type for the option - we'll try to convert it to the expected type - for (DataType currentType : optionTypes) { - String optionValue = switch (currentType) { - case BOOLEAN -> String.valueOf(randomBoolean()); - case INTEGER -> String.valueOf(randomIntBetween(0, 100000)); - case LONG -> String.valueOf(randomLong()); - case FLOAT -> String.valueOf(randomFloat()); - case DOUBLE -> String.valueOf(randomDouble()); - case KEYWORD -> randomAlphaOfLength(10); - default -> throw new IllegalArgumentException("Unsupported option type: " + currentType); - }; - String queryOptionValue = optionValue; - if (currentType == KEYWORD) { - queryOptionValue = "\"" + optionValue + "\""; - } - - String query = "FROM test | WHERE match(first_name, \"Jean\", {\"" + optionName + "\": " + queryOptionValue + "})"; - try { - // Check conversion is possible - DataTypeConverter.convert(optionValue, optionType); - // If no exception was thrown, conversion is possible and should be done - query(query); - } catch (InvalidArgumentException e) { - // Conversion is not possible, query should fail - assertEquals( - "1:19: Invalid option [" - + optionName - + "] in [match(first_name, \"Jean\", {\"" - + optionName - + "\": " - + queryOptionValue - + "})], cannot cast [" - + optionValue - + "] to [" - + optionType.typeName() - + "]", - error(query) - ); - } - } - } + checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(first_name, \"Jean\", {\"%s\": %s})", defaultAnalyzer); + } - assertThat( - error("FROM test | WHERE match(first_name, \"Jean\", {\"unknown_option\": true})"), - containsString( - "1:19: Invalid option [unknown_option] in [match(first_name, \"Jean\", {\"unknown_option\": true})]," + " expected one of " - ) + public void testMultiMatchOptions() { + checkOptionDataTypes( + MultiMatch.OPTIONS, + "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})", + defaultAnalyzer ); } public void testQueryStringOptions() { - // Check positive cases - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"analyzer\": \"standard\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"allow_leading_wildcard\": false})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"analyze_wildcard\": false})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"auto_generate_synonyms_phrase_query\": true})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"boost\": 2.1})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"default_field\": \"field1\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"default_operator\": \"AND\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"enable_position_increments\": false})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"fuzziness\": 2})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"fuzziness\": \"AUTO\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"fuzzy_prefix_length\": 5})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"fuzzy_transpositions\": false})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"lenient\": false})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"max_determinized_states\": 10})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"minimum_should_match\": \"2\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"quote_analyzer\": \"qnalyzer_1\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"quote_field_suffix\": \"q_suffix\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"phrase_slop\": 10})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"rewrite\": \"r1\"})"); - query("FROM test | WHERE QSTR(\"first_name: Jean\", {\"time_zone\": \"time_zone\"})"); - - // Check all data types for available options - DataType[] optionTypes = new DataType[] { INTEGER, LONG, FLOAT, DOUBLE, KEYWORD, BOOLEAN }; - for (Map.Entry allowedOptions : QueryString.ALLOWED_OPTIONS.entrySet()) { - String optionName = allowedOptions.getKey(); - DataType optionType = allowedOptions.getValue(); - // Check every possible type for the option - we'll try to convert it to the expected type - for (DataType currentType : optionTypes) { - String optionValue = switch (currentType) { - case BOOLEAN -> String.valueOf(randomBoolean()); - case INTEGER -> String.valueOf(randomIntBetween(0, 100000)); - case LONG -> String.valueOf(randomLong()); - case FLOAT -> String.valueOf(randomFloat()); - case DOUBLE -> String.valueOf(randomDouble()); - case KEYWORD -> randomAlphaOfLength(10); - default -> throw new IllegalArgumentException("Unsupported option type: " + currentType); - }; - String queryOptionValue = optionValue; - if (currentType == KEYWORD) { - queryOptionValue = "\"" + optionValue + "\""; - } - - String query = "FROM test | WHERE QSTR(\"first_name: Jean\", {\"" + optionName + "\": " + queryOptionValue + "})"; - try { - // Check conversion is possible - DataTypeConverter.convert(optionValue, optionType); - // If no exception was thrown, conversion is possible and should be done - query(query); - } catch (InvalidArgumentException e) { - // Conversion is not possible, query should fail - assertEquals( - "1:19: Invalid option [" - + optionName - + "] in [QSTR(\"first_name: Jean\", {\"" - + optionName - + "\": " - + queryOptionValue - + "})], cannot cast [" - + optionValue - + "] to [" - + optionType.typeName() - + "]", - error(query) - ); - } - } - } + checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"first_name: Jean\", {\"%s\": %s})", defaultAnalyzer); + } - assertThat( - error("FROM test | WHERE QSTR(\"first_name: Jean\", {\"unknown_option\": true})"), - containsString( - "1:20: Invalid option [unknown_option] in [QSTR(\"first_name: Jean\", {\"unknown_option\": true})]," + " expected one of " - ) - ); + public void testKnnOptions() { + Analyzer analyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-colors.json", "colors")); + checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM colors | WHERE KNN(rgb_vector, [0.1, 0.2, 0.3], {\"%s\": %s})", analyzer); } - public void testMultiMatchOptions() { - // Check positive cases - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name)"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, {\"analyzer\": \"standard\"})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"analyzer\": \"standard\"})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"slop\": 10})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"auto_generate_synonyms_phrase_query\": true})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"fuzziness\": 2})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"fuzzy_transpositions\": false})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"lenient\": false})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"max_expansions\": 10})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"minimum_should_match\": \"2\"})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"operator\": \"AND\"})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"prefix_length\": 2})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"tie_breaker\": 1.0})"); - query("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"type\": \"best_fields\"})"); - - // Check all data types for available options + /** + * Check all data types for available options. When conversion is not possible, checks that it's an error + */ + private void checkOptionDataTypes(Map allowedOptionsMap, String queryTemplate, Analyzer analyzer) { DataType[] optionTypes = new DataType[] { INTEGER, LONG, FLOAT, DOUBLE, KEYWORD, BOOLEAN }; - for (Map.Entry allowedOptions : MultiMatch.OPTIONS.entrySet()) { + for (Map.Entry allowedOptions : allowedOptionsMap.entrySet()) { String optionName = allowedOptions.getKey(); DataType optionType = allowedOptions.getValue(); + // Check every possible type for the option - we'll try to convert it to the expected type for (DataType currentType : optionTypes) { - String optionValue = switch (currentType) { - case BOOLEAN -> String.valueOf(randomBoolean()); - case INTEGER -> String.valueOf(randomIntBetween(0, 100000)); - case LONG -> String.valueOf(randomLong()); - case FLOAT -> String.valueOf(randomFloat()); - case DOUBLE -> String.valueOf(randomDouble()); - case KEYWORD -> randomAlphaOfLength(10); - default -> throw new IllegalArgumentException("Unsupported option type: " + currentType); - }; + String optionValue = exampleValueForType(currentType); String queryOptionValue = optionValue; if (currentType == KEYWORD) { queryOptionValue = "\"" + optionValue + "\""; } - String query = "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"" - + optionName - + "\": " - + queryOptionValue - + "})"; + String query = String.format(Locale.ROOT, queryTemplate, optionName, queryOptionValue); try { // Check conversion is possible DataTypeConverter.convert(optionValue, optionType); // If no exception was thrown, conversion is possible and should be done - query(query); + query(query, analyzer); } catch (InvalidArgumentException e) { // Conversion is not possible, query should fail - assertEquals( - "1:19: Invalid option [" - + optionName - + "] in [MULTI_MATCH(\"Jean\", first_name, last_name, {\"" - + optionName - + "\": " - + queryOptionValue - + "})], cannot " - + (optionType == OBJECT ? "convert from" : "cast") - + " [" - + optionValue - + "]" - + (optionType == OBJECT ? (", type [keyword]") : "") - + " to [" - + optionType.typeName() - + "]", - error(query) - ); + String error = error(query, analyzer); + assertThat(error, containsString("Invalid option [" + optionName + "]")); + assertThat(error, containsString("cannot cast [" + optionValue + "] to [" + optionType.typeName() + "]")); } } } - assertThat( - error("FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"unknown_option\": true})"), - containsString( - "1:19: Invalid option [unknown_option] in [MULTI_MATCH(\"Jean\", first_name, last_name, " - + "{\"unknown_option\": true})], expected one of " - ) - ); + String errorQuery = String.format(Locale.ROOT, queryTemplate, "unknown_option", "\"any_value\""); + assertThat(error(errorQuery, analyzer), containsString("Invalid option [unknown_option]")); + } + + private static String exampleValueForType(DataType currentType) { + return switch (currentType) { + case BOOLEAN -> String.valueOf(randomBoolean()); + case INTEGER -> String.valueOf(randomIntBetween(0, 100000)); + case LONG -> String.valueOf(randomLong()); + case FLOAT -> String.valueOf(randomFloat()); + case DOUBLE -> String.valueOf(randomDouble()); + case KEYWORD -> randomAlphaOfLength(10); + default -> throw new IllegalArgumentException("Unsupported option type: " + currentType); + }; } public void testMultiMatchFunctionIsNotNullable() { From f756e85eed3a21071e8dd1b0a32f747727f16fda Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 13:48:45 +0200 Subject: [PATCH 27/64] Spotless --- .../expression/function/fulltext/FullTextFunction.java | 3 +-- .../xpack/esql/expression/function/fulltext/KnnTests.java | 7 +------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index aeb97617d3cc3..ca93aab3740a2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -152,8 +152,7 @@ public boolean equals(Object obj) { return false; } - return Objects.equals(queryBuilder, ((FullTextFunction) obj).queryBuilder) - && Objects.equals(query, ((FullTextFunction) obj).query); + return Objects.equals(queryBuilder, ((FullTextFunction) obj).queryBuilder) && Objects.equals(query, ((FullTextFunction) obj).query); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index 39218968b5163..ac31b3fab23d8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -84,12 +84,7 @@ private static List addFunctionNamedParams(List values = new ArrayList<>(supplier.get().getData()); values.add( new TestCaseSupplier.TypedData( - new MapExpression( - Source.EMPTY, - List.of( - new Literal(Source.EMPTY, randomAlphaOfLength(10), KEYWORD) - ) - ), + new MapExpression(Source.EMPTY, List.of(new Literal(Source.EMPTY, randomAlphaOfLength(10), KEYWORD))), UNSUPPORTED, "options" ).forceLiteral() From 34968ad5f53863a5672cd4f65630e756d9042b6a Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 14:06:49 +0200 Subject: [PATCH 28/64] Add verifier tests --- .../src/main/resources/dense_vector.csv-spec | 2 +- .../main/resources/mapping-dense_vector.json | 6 ++ .../xpack/esql/analysis/VerifierTests.java | 60 ++++++++++--------- 3 files changed, 38 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec index 7736e69d77890..d1494b101b1c8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -35,7 +35,7 @@ required_capability: dense_vector_field_type FROM dense_vector | EVAL v = vector | RENAME v AS new_vector -| DROP vector +| DROP vector, first_name, last_name | SORT id ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json index 572d9870d09da..0160b1fd264fb 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json @@ -3,6 +3,12 @@ "id": { "type": "long" }, + "first_name": { + "type": "text" + }, + "last_name": { + "type": "text" + }, "vector": { "type": "dense_vector", "similarity": "l2_norm" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index f67aa63739610..6c1b7028d0d17 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1426,27 +1426,27 @@ public void testKqlFunctionArgNotNullOrConstant() throws Exception { } public void testQueryStringWithDisjunctions() { - checkWithDisjunctions("QSTR", "qstr(\"first_name: Anna\")", "function"); + checkWithDisjunctions("qstr(\"first_name: Anna\")"); } public void testKqlFunctionWithDisjunctions() { - checkWithDisjunctions("KQL", "kql(\"first_name: Anna\")", "function"); + checkWithDisjunctions("kql(\"first_name: Anna\")"); } public void testMatchFunctionWithDisjunctions() { - checkWithDisjunctions("MATCH", "match(first_name, \"Anna\")", "function"); + checkWithDisjunctions("match(first_name, \"Anna\")"); } public void testTermFunctionWithDisjunctions() { assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - checkWithDisjunctions("Term", "term(first_name, \"Anna\")", "function"); + checkWithDisjunctions("term(first_name, \"Anna\")"); } public void testMatchOperatorWithDisjunctions() { - checkWithDisjunctions(":", "first_name : \"Anna\"", "operator"); + checkWithDisjunctions("first_name : \"Anna\""); } - private void checkWithDisjunctions(String functionName, String functionInvocation, String functionType) { + private void checkWithDisjunctions(String functionInvocation) { query("from test | where " + functionInvocation + " or length(first_name) > 12"); query( "from test | where (" @@ -1457,50 +1457,52 @@ private void checkWithDisjunctions(String functionName, String functionInvocatio } public void testFullTextFunctionsDisjunctions() { - checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")"); - checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)"); - checkWithFullTextFunctionsDisjunctions("last_name : \"Smith\""); - checkWithFullTextFunctionsDisjunctions("qstr(\"last_name: Smith\")"); - checkWithFullTextFunctionsDisjunctions("kql(\"last_name: Smith\")"); + checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")", defaultAnalyzer); + checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)", defaultAnalyzer); + checkWithFullTextFunctionsDisjunctions("last_name : \"Smith\"", defaultAnalyzer); + checkWithFullTextFunctionsDisjunctions("qstr(\"last_name: Smith\")", defaultAnalyzer); + checkWithFullTextFunctionsDisjunctions("kql(\"last_name: Smith\")", defaultAnalyzer); + Analyzer analyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-dense_vector.json", "test")); + checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3])", analyzer); } - private void checkWithFullTextFunctionsDisjunctions(String functionInvocation) { + private void checkWithFullTextFunctionsDisjunctions(String functionInvocation, Analyzer analyzer) { // Disjunctions with non-pushable functions - scoring - query("from test | where " + functionInvocation + " or length(first_name) > 10"); - query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)"); + query("from test | where " + functionInvocation + " or length(first_name) > 10", analyzer); + query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)", analyzer); query( "from test | where (" + functionInvocation + " and length(first_name) > 0) or (match(last_name, \"Anneke\") and length(first_name) > 10)" - ); + , analyzer); // Disjunctions with non-pushable functions - no scoring - query("from test | where " + functionInvocation + " or length(first_name) > 10"); - query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)"); + query("from test | where " + functionInvocation + " or length(first_name) > 10", analyzer); + query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)", analyzer); query( "from test | where (" + functionInvocation + " and length(first_name) > 0) or (match(last_name, \"Anneke\") and length(first_name) > 10)" - ); + , analyzer); // Disjunctions with full text functions - no scoring - query("from test | where " + functionInvocation + " or match(first_name, \"Anna\")"); - query("from test | where " + functionInvocation + " or not match(first_name, \"Anna\")"); - query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10"); - query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")"); - query("from test | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))"); + query("from test | where " + functionInvocation + " or match(first_name, \"Anna\")", analyzer); + query("from test | where " + functionInvocation + " or not match(first_name, \"Anna\")", analyzer); + query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10", analyzer); + query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")", analyzer); + query("from test | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))", analyzer); // Disjunctions with full text functions - scoring - query("from test metadata _score | where " + functionInvocation + " or match(first_name, \"Anna\")"); - query("from test metadata _score | where " + functionInvocation + " or not match(first_name, \"Anna\")"); - query("from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10"); + query("from test metadata _score | where " + functionInvocation + " or match(first_name, \"Anna\")", analyzer); + query("from test metadata _score | where " + functionInvocation + " or not match(first_name, \"Anna\")", analyzer); + query("from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10", analyzer); query( "from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")" - ); + , analyzer); query( "from test metadata _score | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))" - ); + , analyzer); } @@ -2271,7 +2273,7 @@ public void testMultiMatchFunctionNotAllowedAfterCommands() throws Exception { } public void testMultiMatchFunctionWithDisjunctions() { - checkWithDisjunctions("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); + checkWithDisjunctions("multi_match(\"Anna\", first_name, last_name)"); } public void testMultiMatchFunctionWithNonBooleanFunctions() { From 77011c1647b22d548406a3ebcebe269d367e7d61 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 14:30:28 +0200 Subject: [PATCH 29/64] Add verifier tests, revert some changes to mappings --- .../src/main/resources/dense_vector.csv-spec | 2 +- .../src/main/resources/mapping-default.json | 4 + .../main/resources/mapping-dense_vector.json | 6 - .../xpack/esql/analysis/VerifierTests.java | 110 +++++++----------- 4 files changed, 46 insertions(+), 76 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec index d1494b101b1c8..7736e69d77890 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -35,7 +35,7 @@ required_capability: dense_vector_field_type FROM dense_vector | EVAL v = vector | RENAME v AS new_vector -| DROP vector, first_name, last_name +| DROP vector | SORT id ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json index 61603776f3f3d..fcb3269131b0e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json @@ -75,6 +75,10 @@ "type" : "keyword" } } + }, + "vector": { + "type": "dense_vector", + "similarity": "l2_norm" } } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json index 0160b1fd264fb..572d9870d09da 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json @@ -3,12 +3,6 @@ "id": { "type": "long" }, - "first_name": { - "type": "text" - }, - "last_name": { - "type": "text" - }, "vector": { "type": "dense_vector", "similarity": "l2_norm" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 6c1b7028d0d17..0ee9402cc7135 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1391,6 +1391,10 @@ public void testMatchOperatornOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere(":", "first_name:\"Anna\"", "operator"); } + public void testKnnFunctionOnlyAllowedInWhere() throws Exception { + checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [1, 2, 3])", "function"); + } + private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, String functionInvocation, String functionType) throws Exception { assertEquals( @@ -1425,84 +1429,57 @@ public void testKqlFunctionArgNotNullOrConstant() throws Exception { // Other value types are tested in KqlFunctionTests } - public void testQueryStringWithDisjunctions() { - checkWithDisjunctions("qstr(\"first_name: Anna\")"); - } - - public void testKqlFunctionWithDisjunctions() { - checkWithDisjunctions("kql(\"first_name: Anna\")"); - } - - public void testMatchFunctionWithDisjunctions() { - checkWithDisjunctions("match(first_name, \"Anna\")"); - } - - public void testTermFunctionWithDisjunctions() { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - checkWithDisjunctions("term(first_name, \"Anna\")"); - } - - public void testMatchOperatorWithDisjunctions() { - checkWithDisjunctions("first_name : \"Anna\""); - } - - private void checkWithDisjunctions(String functionInvocation) { - query("from test | where " + functionInvocation + " or length(first_name) > 12"); - query( - "from test | where (" - + functionInvocation - + " or first_name is not null) or (length(first_name) > 12 and match(last_name, \"Smith\"))" - ); - query("from test | where " + functionInvocation + " or (last_name is not null and first_name is null)"); - } - public void testFullTextFunctionsDisjunctions() { - checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")", defaultAnalyzer); - checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)", defaultAnalyzer); - checkWithFullTextFunctionsDisjunctions("last_name : \"Smith\"", defaultAnalyzer); - checkWithFullTextFunctionsDisjunctions("qstr(\"last_name: Smith\")", defaultAnalyzer); - checkWithFullTextFunctionsDisjunctions("kql(\"last_name: Smith\")", defaultAnalyzer); - Analyzer analyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-dense_vector.json", "test")); - checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3])", analyzer); + checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")"); + checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)"); + checkWithFullTextFunctionsDisjunctions("last_name : \"Smith\""); + checkWithFullTextFunctionsDisjunctions("qstr(\"last_name: Smith\")"); + checkWithFullTextFunctionsDisjunctions("kql(\"last_name: Smith\")"); + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + checkWithFullTextFunctionsDisjunctions("term(last_name, \"Smith\")"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3])"); + } } - private void checkWithFullTextFunctionsDisjunctions(String functionInvocation, Analyzer analyzer) { + private void checkWithFullTextFunctionsDisjunctions(String functionInvocation) { // Disjunctions with non-pushable functions - scoring - query("from test | where " + functionInvocation + " or length(first_name) > 10", analyzer); - query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)", analyzer); + query("from test | where " + functionInvocation + " or length(first_name) > 10"); + query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)"); query( "from test | where (" + functionInvocation + " and length(first_name) > 0) or (match(last_name, \"Anneke\") and length(first_name) > 10)" - , analyzer); + ); // Disjunctions with non-pushable functions - no scoring - query("from test | where " + functionInvocation + " or length(first_name) > 10", analyzer); - query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)", analyzer); + query("from test | where " + functionInvocation + " or length(first_name) > 10"); + query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)"); query( "from test | where (" + functionInvocation + " and length(first_name) > 0) or (match(last_name, \"Anneke\") and length(first_name) > 10)" - , analyzer); + ); // Disjunctions with full text functions - no scoring - query("from test | where " + functionInvocation + " or match(first_name, \"Anna\")", analyzer); - query("from test | where " + functionInvocation + " or not match(first_name, \"Anna\")", analyzer); - query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10", analyzer); - query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")", analyzer); - query("from test | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))", analyzer); + query("from test | where " + functionInvocation + " or match(first_name, \"Anna\")"); + query("from test | where " + functionInvocation + " or not match(first_name, \"Anna\")"); + query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10"); + query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")"); + query("from test | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))"); // Disjunctions with full text functions - scoring - query("from test metadata _score | where " + functionInvocation + " or match(first_name, \"Anna\")", analyzer); - query("from test metadata _score | where " + functionInvocation + " or not match(first_name, \"Anna\")", analyzer); - query("from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10", analyzer); + query("from test metadata _score | where " + functionInvocation + " or match(first_name, \"Anna\")"); + query("from test metadata _score | where " + functionInvocation + " or not match(first_name, \"Anna\")"); + query("from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10"); query( "from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")" - , analyzer); + ); query( "from test metadata _score | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))" - , analyzer); + ); } @@ -2175,30 +2152,29 @@ public void testLookupJoinDataTypeMismatch() { } public void testMatchOptions() { - checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(first_name, \"Jean\", {\"%s\": %s})", defaultAnalyzer); + checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(first_name, \"Jean\", {\"%s\": %s})"); } public void testMultiMatchOptions() { checkOptionDataTypes( MultiMatch.OPTIONS, - "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})", - defaultAnalyzer + "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})" ); } public void testQueryStringOptions() { - checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"first_name: Jean\", {\"%s\": %s})", defaultAnalyzer); + checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"first_name: Jean\", {\"%s\": %s})"); } public void testKnnOptions() { - Analyzer analyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-colors.json", "colors")); - checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM colors | WHERE KNN(rgb_vector, [0.1, 0.2, 0.3], {\"%s\": %s})", analyzer); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()); + checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); } /** * Check all data types for available options. When conversion is not possible, checks that it's an error */ - private void checkOptionDataTypes(Map allowedOptionsMap, String queryTemplate, Analyzer analyzer) { + private void checkOptionDataTypes(Map allowedOptionsMap, String queryTemplate) { DataType[] optionTypes = new DataType[] { INTEGER, LONG, FLOAT, DOUBLE, KEYWORD, BOOLEAN }; for (Map.Entry allowedOptions : allowedOptionsMap.entrySet()) { String optionName = allowedOptions.getKey(); @@ -2217,10 +2193,10 @@ private void checkOptionDataTypes(Map allowedOptionsMap, Strin // Check conversion is possible DataTypeConverter.convert(optionValue, optionType); // If no exception was thrown, conversion is possible and should be done - query(query, analyzer); + query(query); } catch (InvalidArgumentException e) { // Conversion is not possible, query should fail - String error = error(query, analyzer); + String error = error(query); assertThat(error, containsString("Invalid option [" + optionName + "]")); assertThat(error, containsString("cannot cast [" + optionValue + "] to [" + optionType.typeName() + "]")); } @@ -2228,7 +2204,7 @@ private void checkOptionDataTypes(Map allowedOptionsMap, Strin } String errorQuery = String.format(Locale.ROOT, queryTemplate, "unknown_option", "\"any_value\""); - assertThat(error(errorQuery, analyzer), containsString("Invalid option [unknown_option]")); + assertThat(error(errorQuery), containsString("Invalid option [unknown_option]")); } private static String exampleValueForType(DataType currentType) { @@ -2272,10 +2248,6 @@ public void testMultiMatchFunctionNotAllowedAfterCommands() throws Exception { ); } - public void testMultiMatchFunctionWithDisjunctions() { - checkWithDisjunctions("multi_match(\"Anna\", first_name, last_name)"); - } - public void testMultiMatchFunctionWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); } From d60c8e51fd8a26549347e559572befaba9fbc740 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 14:36:14 +0200 Subject: [PATCH 30/64] Refactor verifier tests --- .../xpack/esql/analysis/VerifierTests.java | 122 +++++++----------- 1 file changed, 45 insertions(+), 77 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 0ee9402cc7135..6b80b2d7e4ad1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1259,112 +1259,83 @@ public void testMatchFunctionIsNotNullable() { } public void testQueryStringFunctionsNotAllowedAfterCommands() throws Exception { - // Source commands - assertEquals("1:13: [QSTR] function cannot be used after SHOW", error("show info | where qstr(\"8.16.0\")")); - assertEquals("1:17: [QSTR] function cannot be used after ROW", error("row a= \"Anna\" | where qstr(\"Anna\")")); + testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands("QSTR", "qstr(\"field_name: Anna\")"); + } - // Processing commands - assertEquals( - "1:43: [QSTR] function cannot be used after DISSECT", - error("from test | dissect first_name \"%{foo}\" | where qstr(\"Connection\")") - ); - assertEquals("1:27: [QSTR] function cannot be used after DROP", error("from test | drop emp_no | where qstr(\"Anna\")")); - assertEquals( - "1:71: [QSTR] function cannot be used after ENRICH", - error("from test | enrich languages on languages with lang = language_name | where qstr(\"Anna\")") - ); - assertEquals("1:26: [QSTR] function cannot be used after EVAL", error("from test | eval z = 2 | where qstr(\"Anna\")")); - assertEquals( - "1:44: [QSTR] function cannot be used after GROK", - error("from test | grok last_name \"%{WORD:foo}\" | where qstr(\"Anna\")") - ); - assertEquals("1:27: [QSTR] function cannot be used after KEEP", error("from test | keep emp_no | where qstr(\"Anna\")")); - assertEquals("1:24: [QSTR] function cannot be used after LIMIT", error("from test | limit 10 | where qstr(\"Anna\")")); - assertEquals( - "1:35: [QSTR] function cannot be used after MV_EXPAND", - error("from test | mv_expand last_name | where qstr(\"Anna\")") - ); - assertEquals( - "1:45: [QSTR] function cannot be used after RENAME", - error("from test | rename last_name as full_name | where qstr(\"Anna\")") - ); + public void testKqlFunctionsNotAllowedAfterCommands() throws Exception { + testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands("KQL", "kql(\"field_name: Anna\")"); + } + + public void testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands(String functionName, String functionInvocation) throws Exception { + // Source commands + assertEquals("1:13: [" + functionName + "] function cannot be used after SHOW", error("show info | where " + functionInvocation)); assertEquals( - "1:52: [QSTR] function cannot be used after STATS", - error("from test | STATS c = COUNT(emp_no) BY languages | where qstr(\"Anna\")") + "1:17: [" + functionName + "] function cannot be used after ROW", + error("row a= \"Anna\" | where " + functionInvocation) ); - // Some combination of processing commands + // Processing commands assertEquals( - "1:38: [QSTR] function cannot be used after LIMIT", - error("from test | keep emp_no | limit 10 | where qstr(\"Anna\")") + "1:43: [" + functionName + "] function cannot be used after DISSECT", + error("from test | dissect first_name \"%{foo}\" | where " + functionInvocation) ); assertEquals( - "1:46: [QSTR] function cannot be used after MV_EXPAND", - error("from test | limit 10 | mv_expand last_name | where qstr(\"Anna\")") + "1:27: [" + functionName + "] function cannot be used after DROP", + error("from test | drop emp_no | where " + functionInvocation) ); assertEquals( - "1:52: [QSTR] function cannot be used after KEEP", - error("from test | mv_expand last_name | keep last_name | where qstr(\"Anna\")") + "1:71: [" + functionName + "] function cannot be used after ENRICH", + error("from test | enrich languages on languages with lang = language_name | where " + functionInvocation) ); assertEquals( - "1:77: [QSTR] function cannot be used after RENAME", - error("from test | STATS c = COUNT(emp_no) BY languages | rename c as total_emps | where qstr(\"Anna\")") + "1:26: [" + functionName + "] function cannot be used after EVAL", + error("from test | eval z = 2 | where " + functionInvocation) ); assertEquals( - "1:54: [QSTR] function cannot be used after KEEP", - error("from test | rename last_name as name | keep emp_no | where qstr(\"Anna\")") + "1:44: [" + functionName + "] function cannot be used after GROK", + error("from test | grok last_name \"%{WORD:foo}\" | where " + functionInvocation) ); - } - - public void testKqlFunctionsNotAllowedAfterCommands() throws Exception { - // Source commands - assertEquals("1:13: [KQL] function cannot be used after SHOW", error("show info | where kql(\"8.16.0\")")); - assertEquals("1:17: [KQL] function cannot be used after ROW", error("row a= \"Anna\" | where kql(\"Anna\")")); - - // Processing commands assertEquals( - "1:43: [KQL] function cannot be used after DISSECT", - error("from test | dissect first_name \"%{foo}\" | where kql(\"Connection\")") + "1:27: [" + functionName + "] function cannot be used after KEEP", + error("from test | keep emp_no | where " + functionInvocation) ); - assertEquals("1:27: [KQL] function cannot be used after DROP", error("from test | drop emp_no | where kql(\"Anna\")")); assertEquals( - "1:71: [KQL] function cannot be used after ENRICH", - error("from test | enrich languages on languages with lang = language_name | where kql(\"Anna\")") + "1:24: [" + functionName + "] function cannot be used after LIMIT", + error("from test | limit 10 | where " + functionInvocation) ); - assertEquals("1:26: [KQL] function cannot be used after EVAL", error("from test | eval z = 2 | where kql(\"Anna\")")); assertEquals( - "1:44: [KQL] function cannot be used after GROK", - error("from test | grok last_name \"%{WORD:foo}\" | where kql(\"Anna\")") + "1:35: [" + functionName + "] function cannot be used after MV_EXPAND", + error("from test | mv_expand last_name | where " + functionInvocation) ); - assertEquals("1:27: [KQL] function cannot be used after KEEP", error("from test | keep emp_no | where kql(\"Anna\")")); - assertEquals("1:24: [KQL] function cannot be used after LIMIT", error("from test | limit 10 | where kql(\"Anna\")")); - assertEquals("1:35: [KQL] function cannot be used after MV_EXPAND", error("from test | mv_expand last_name | where kql(\"Anna\")")); assertEquals( - "1:45: [KQL] function cannot be used after RENAME", - error("from test | rename last_name as full_name | where kql(\"Anna\")") + "1:45: [" + functionName + "] function cannot be used after RENAME", + error("from test | rename last_name as full_name | where " + functionInvocation) ); assertEquals( - "1:52: [KQL] function cannot be used after STATS", - error("from test | STATS c = COUNT(emp_no) BY languages | where kql(\"Anna\")") + "1:52: [" + functionName + "] function cannot be used after STATS", + error("from test | STATS c = COUNT(emp_no) BY languages | where " + functionInvocation) ); // Some combination of processing commands - assertEquals("1:38: [KQL] function cannot be used after LIMIT", error("from test | keep emp_no | limit 10 | where kql(\"Anna\")")); assertEquals( - "1:46: [KQL] function cannot be used after MV_EXPAND", - error("from test | limit 10 | mv_expand last_name | where kql(\"Anna\")") + "1:38: [" + functionName + "] function cannot be used after LIMIT", + error("from test | keep emp_no | limit 10 | where " + functionInvocation) ); assertEquals( - "1:52: [KQL] function cannot be used after KEEP", - error("from test | mv_expand last_name | keep last_name | where kql(\"Anna\")") + "1:46: [" + functionName + "] function cannot be used after MV_EXPAND", + error("from test | limit 10 | mv_expand last_name | where " + functionInvocation) ); assertEquals( - "1:77: [KQL] function cannot be used after RENAME", - error("from test | STATS c = COUNT(emp_no) BY languages | rename c as total_emps | where kql(\"Anna\")") + "1:52: [" + functionName + "] function cannot be used after KEEP", + error("from test | mv_expand last_name | keep last_name | where " + functionInvocation) ); assertEquals( - "1:54: [KQL] function cannot be used after DROP", - error("from test | rename last_name as name | drop emp_no | where kql(\"Anna\")") + "1:77: [" + functionName + "] function cannot be used after RENAME", + error("from test | STATS c = COUNT(emp_no) BY languages | rename c as total_emps | where " + functionInvocation) + ); + assertEquals( + "1:54: [" + functionName + "] function cannot be used after DROP", + error("from test | rename last_name as name | drop emp_no | where " + functionInvocation) ); } @@ -2156,10 +2127,7 @@ public void testMatchOptions() { } public void testMultiMatchOptions() { - checkOptionDataTypes( - MultiMatch.OPTIONS, - "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})" - ); + checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})"); } public void testQueryStringOptions() { From eacb9a087c7827c507aa48b0323d855b33e2805a Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 30 May 2025 15:03:35 +0200 Subject: [PATCH 31/64] Refactor verifier tests --- .../xpack/esql/analysis/VerifierTests.java | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 6b80b2d7e4ad1..a420177c17625 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1239,22 +1239,31 @@ public void testMatchFunctionAndOperatorHaveCorrectErrorMessages() throws Except assertEquals("1:24: [:] operator cannot be used after LIMIT", error("from test | limit 10 | where first_name : \"Anna\"")); } + public void testFieldBasedFullTextFunctions() { + testFieldBasedWithNonIndexedColumn("MATCH", " match(text, \"cat\")", "function"); + testFieldBasedWithNonIndexedColumn(":", " text : \"cat\"", "operator"); + testFieldBasedWithNonIndexedColumn("MultiMatch", " multi_match(\"cat\", text)", "function"); + } + // These should pass eventually once we lift some restrictions on match function - public void testMatchWithNonIndexedColumnCurrentlyUnsupported() { - assertEquals( - "1:67: [MATCH] function cannot operate on [initial], which is not a field from an index mapping", - error("from test | eval initial = substring(first_name, 1) | where match(initial, \"A\")") + public void testFieldBasedWithNonIndexedColumn(String functionName, String functionInvocation, String functionType) { + assertThat( + error("from test | eval text = substring(first_name, 1) | where" + functionInvocation), + containsString("[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping") ); - assertEquals( - "1:67: [MATCH] function cannot operate on [text], which is not a field from an index mapping", - error("from test | eval text=concat(first_name, last_name) | where match(text, \"cat\")") + assertThat( + error("from test | eval text=concat(first_name, last_name) | where" + functionInvocation), + containsString("[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping") ); - } - - public void testMatchFunctionIsNotNullable() { - assertEquals( - "1:48: [MATCH] function cannot operate on [text::keyword], which is not a field from an index mapping", - error("row n = null | eval text = n + 5 | where match(text::keyword, \"Anna\")") + var keywordInvocation = functionInvocation.replace("text", "text::keyword"); + String keywordError = error("row n = null | eval text = n + 5 | where " + keywordInvocation); + assertThat( + keywordError, + containsString("[" + functionName + "] " + functionType + " cannot operate on") + ); + assertThat( + keywordError, + containsString("which is not a field from an index mapping") ); } From 03a329a12d647214295ae7a0b0ee8e13ce649ad9 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 2 Jun 2025 13:11:05 +0200 Subject: [PATCH 32/64] Fix some tests for multiple shards --- .../src/main/resources/knn-function.csv-spec | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 2eb2dfaf9659f..0cabc83b12647 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -276,11 +276,11 @@ required_capability: knn_function from colors | where length(color) < 10 -| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40}) +| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140}) ; c: long -11 +59 ; @@ -289,22 +289,22 @@ required_capability: knn_function required_capability: full_text_functions_in_stats_where from colors metadata _score -| stats c = count(*) where (knn(rgb_vector, [0,255,255], {"k": 40}) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0], {"k": 40}) +| stats c = count(*) where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0], {"k": 40}) ; c:long -21 +40 ; testKnnInStatsWithGrouping from colors | where length(color) < 10 -| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40}) by primary +| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140}) by primary ; c: long | primary: boolean -9 | false -2 | true +50 | false +9 | true ; testKnnInStatsPushable @@ -319,16 +319,3 @@ from colors c:long 40 ; - -testKnnInStatsWithNonPushableDisjunctions -required_capability: knn_function -required_capability: full_text_functions_in_stats_where - -FROM colors -| STATS c = count(*) where knn(rgb_vector, [128,128,0], {"k": 5}) or length(color) > 17 -; - -c:long -8 -; - From fbe8b6c7f786d1b2f9b4bdf754318dca1fcc953d Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 2 Jun 2025 17:00:53 +0200 Subject: [PATCH 33/64] Simplify tests --- .../function/fulltext/FullTextFunction.java | 18 +------- .../xpack/esql/analysis/VerifierTests.java | 45 ++++++++----------- 2 files changed, 20 insertions(+), 43 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index ca93aab3740a2..696e545296b90 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -204,23 +204,7 @@ private static void checkFullTextQueryFunctions(LogicalPlan plan, Failures failu checkCommandsBeforeExpression( plan, condition, - Match.class, - lp -> (lp instanceof Limit == false) && (lp instanceof Aggregate == false), - m -> "[" + m.functionName() + "] " + m.functionType(), - failures - ); - checkCommandsBeforeExpression( - plan, - condition, - MultiMatch.class, - lp -> (lp instanceof Limit == false) && (lp instanceof Aggregate == false), - m -> "[" + m.functionName() + "] " + m.functionType(), - failures - ); - checkCommandsBeforeExpression( - plan, - condition, - Term.class, + FullTextFunction.class, lp -> (lp instanceof Limit == false) && (lp instanceof Aggregate == false), m -> "[" + m.functionName() + "] " + m.functionType(), failures diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index a420177c17625..afa2d52fabba3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1215,44 +1215,37 @@ public void testMatchInsideEval() throws Exception { ); } - public void testMatchFunctionNotAllowedAfterCommands() throws Exception { - assertEquals( - "1:24: [MATCH] function cannot be used after LIMIT", - error("from test | limit 10 | where match(first_name, \"Anna\")") - ); - assertEquals( - "1:47: [MATCH] function cannot be used after STATS", - error("from test | STATS c = AVG(salary) BY gender | where match(gender, \"F\")") - ); + public void testFieldBasedFullTextFunctions() throws Exception { + testFieldBasedWithNonIndexedColumn("MATCH", "match(text, \"cat\")", "function"); + testFieldBasedWithNonIndexedColumn(":", "text : \"cat\"", "operator"); + testFieldBasedWithNonIndexedColumn("MultiMatch", "multi_match(\"cat\", text)", "function"); + + testFieldBasedFunctionNotAllowedAfterCommands("MATCH", "function", "match(first_name, \"Anna\")"); + testFieldBasedFunctionNotAllowedAfterCommands(":", "operator", "first_name : \"Anna\""); + testFieldBasedFunctionNotAllowedAfterCommands("MultiMatch", "function", "multi_match(\"Anna\", first_name)"); + testFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); } - public void testMatchFunctionAndOperatorHaveCorrectErrorMessages() throws Exception { - assertEquals( - "1:24: [MATCH] function cannot be used after LIMIT", - error("from test | limit 10 | where match(first_name, \"Anna\")") + public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, String functionType, String functionInvocation) throws Exception { + assertThat( + error("from test | limit 10 | where " + functionInvocation), + containsString("[" + functionName + "] " + functionType + " cannot be used after LIMIT") ); - assertEquals( - "1:24: [MATCH] function cannot be used after LIMIT", - error("from test | limit 10 | where match ( first_name, \"Anna\" ) ") + String fieldName = "KNN".equals(functionName) ? "vector" : "first_name"; + assertThat( + error("from test | STATS c = COUNT(emp_no) BY " + fieldName + " | where " + functionInvocation), + containsString("[" + functionName + "] " + functionType + " cannot be used after STATS") ); - assertEquals("1:24: [:] operator cannot be used after LIMIT", error("from test | limit 10 | where first_name:\"Anna\"")); - assertEquals("1:24: [:] operator cannot be used after LIMIT", error("from test | limit 10 | where first_name : \"Anna\"")); - } - - public void testFieldBasedFullTextFunctions() { - testFieldBasedWithNonIndexedColumn("MATCH", " match(text, \"cat\")", "function"); - testFieldBasedWithNonIndexedColumn(":", " text : \"cat\"", "operator"); - testFieldBasedWithNonIndexedColumn("MultiMatch", " multi_match(\"cat\", text)", "function"); } // These should pass eventually once we lift some restrictions on match function public void testFieldBasedWithNonIndexedColumn(String functionName, String functionInvocation, String functionType) { assertThat( - error("from test | eval text = substring(first_name, 1) | where" + functionInvocation), + error("from test | eval text = substring(first_name, 1) | where " + functionInvocation), containsString("[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping") ); assertThat( - error("from test | eval text=concat(first_name, last_name) | where" + functionInvocation), + error("from test | eval text=concat(first_name, last_name) | where " + functionInvocation), containsString("[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping") ); var keywordInvocation = functionInvocation.replace("text", "text::keyword"); From 6ea4995c10bf878f471981388c07ccdef89ed1be Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 2 Jun 2025 17:24:00 +0200 Subject: [PATCH 34/64] Simplify tests --- .../xpack/esql/analysis/VerifierTests.java | 161 ++++++------------ 1 file changed, 49 insertions(+), 112 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index afa2d52fabba3..7b8602457a4da 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1341,31 +1341,17 @@ public void testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands(String fun ); } - public void testQueryStringFunctionOnlyAllowedInWhere() throws Exception { - assertEquals("1:9: [QSTR] function is only supported in WHERE and STATS commands", error("row a = qstr(\"Anna\")")); - checkFullTextFunctionsOnlyAllowedInWhere("QSTR", "qstr(\"Anna\")", "function"); - } - - public void testKqlFunctionOnlyAllowedInWhere() throws Exception { - assertEquals("1:9: [KQL] function is only supported in WHERE and STATS commands", error("row a = kql(\"Anna\")")); - checkFullTextFunctionsOnlyAllowedInWhere("KQL", "kql(\"Anna\")", "function"); - } - - public void testMatchFunctionOnlyAllowedInWhere() throws Exception { + public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere("MATCH", "match(first_name, \"Anna\")", "function"); - } - - public void testTermFunctionOnlyAllowedInWhere() throws Exception { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(first_name, \"Anna\")", "function"); - } - - public void testMatchOperatornOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere(":", "first_name:\"Anna\"", "operator"); - } - - public void testKnnFunctionOnlyAllowedInWhere() throws Exception { - checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [1, 2, 3])", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("QSTR", "qstr(\"Anna\")", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("KQL", "kql(\"Anna\")", "function"); + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [1, 2, 3])", "function"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(first_name, \"Anna\")", "function"); + } } private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, String functionInvocation, String functionType) @@ -1382,6 +1368,12 @@ private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, Strin "1:47: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", error("from test | stats max_salary = max(salary) by " + functionInvocation) ); + if( "KQL".equals(functionName) || "QSTR".equals(functionName)) { + assertEquals( + "1:9: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", + error("row a = " + functionInvocation) + ); + } } public void testQueryStringFunctionArgNotNullOrConstant() throws Exception { @@ -1456,25 +1448,20 @@ private void checkWithFullTextFunctionsDisjunctions(String functionInvocation) { } - public void testQueryStringFunctionWithNonBooleanFunctions() { - checkFullTextFunctionsWithNonBooleanFunctions("QSTR", "qstr(\"first_name: Anna\")", "function"); - } - - public void testKqlFunctionWithNonBooleanFunctions() { - checkFullTextFunctionsWithNonBooleanFunctions("KQL", "kql(\"first_name: Anna\")", "function"); - } - - public void testMatchFunctionWithNonBooleanFunctions() { + public void testFullTextFunctionsWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions("MATCH", "match(first_name, \"Anna\")", "function"); - } - - public void testTermFunctionWithNonBooleanFunctions() { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(first_name, \"Anna\")", "function"); - } - - public void testMatchOperatorWithNonBooleanFunctions() { checkFullTextFunctionsWithNonBooleanFunctions(":", "first_name:\"Anna\"", "operator"); + checkFullTextFunctionsWithNonBooleanFunctions("QSTR", "qstr(\"first_name: Anna\")", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("KQL", "kql(\"first_name: Anna\")", "function"); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + checkFullTextFunctionsWithNonBooleanFunctions("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3])", "function"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(first_name, \"Anna\")", "function"); + } } private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, String functionInvocation, String functionType) { @@ -1545,18 +1532,6 @@ public void testMatchFunctionArgNotConstant() throws Exception { // Other value types are tested in QueryStringFunctionTests } - // These should pass eventually once we lift some restrictions on match function - public void testMatchFunctionCurrentlyUnsupportedBehaviour() throws Exception { - assertEquals( - "1:68: Unknown column [first_name]", - error("from test | stats max_salary = max(salary) by emp_no | where match(first_name, \"Anna\")") - ); - assertEquals( - "1:62: Unknown column [first_name]", - error("from test | stats max_salary = max(salary) by emp_no | where first_name : \"Anna\"") - ); - } - public void testMatchFunctionNullArgs() throws Exception { assertEquals( "1:19: first argument of [match(null, \"query\")] cannot be null, received [null]", @@ -1586,15 +1561,6 @@ public void testTermFunctionArgNotConstant() throws Exception { // Other value types are tested in QueryStringFunctionTests } - // These should pass eventually once we lift some restrictions on match function - public void testTermFunctionCurrentlyUnsupportedBehaviour() throws Exception { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - assertEquals( - "1:67: Unknown column [first_name]", - error("from test | stats max_salary = max(salary) by emp_no | where term(first_name, \"Anna\")") - ); - } - public void testTermFunctionNullArgs() throws Exception { assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); assertEquals( @@ -2124,21 +2090,15 @@ public void testLookupJoinDataTypeMismatch() { ); } - public void testMatchOptions() { + public void testFullTextFunctionOptions() { checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(first_name, \"Jean\", {\"%s\": %s})"); - } - - public void testMultiMatchOptions() { - checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})"); - } - - public void testQueryStringOptions() { checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"first_name: Jean\", {\"%s\": %s})"); - } - - public void testKnnOptions() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()); - checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()){ + checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); + } } /** @@ -2189,39 +2149,6 @@ private static String exampleValueForType(DataType currentType) { }; } - public void testMultiMatchFunctionIsNotNullable() { - assertEquals( - "1:62: [MultiMatch] function cannot operate on [text::keyword], which is not a field from an index mapping", - error("row n = null | eval text = n + 5 | where multi_match(\"Anna\", text::keyword)") - ); - } - - public void testMultiMatchWithNonIndexedColumnCurrentlyUnsupported() { - assertEquals( - "1:78: [MultiMatch] function cannot operate on [initial], which is not a field from an index mapping", - error("from test | eval initial = substring(first_name, 1) | where multi_match(\"A\", initial)") - ); - assertEquals( - "1:80: [MultiMatch] function cannot operate on [text], which is not a field from an index mapping", - error("from test | eval text=concat(first_name, last_name) | where multi_match(\"cat\", text)") - ); - } - - public void testMultiMatchFunctionNotAllowedAfterCommands() throws Exception { - assertEquals( - "1:24: [MultiMatch] function cannot be used after LIMIT", - error("from test | limit 10 | where multi_match(\"Anna\", first_name)") - ); - assertEquals( - "1:47: [MultiMatch] function cannot be used after STATS", - error("from test | STATS c = AVG(salary) BY gender | where multi_match(\"F\", gender)") - ); - } - - public void testMultiMatchFunctionWithNonBooleanFunctions() { - checkFullTextFunctionsWithNonBooleanFunctions("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); - } - public void testMultiMatchFunctionArgNotConstant() throws Exception { assertEquals( "1:19: second argument of [match(first_name, first_name)] must be a constant, received [first_name]", @@ -2231,14 +2158,24 @@ public void testMultiMatchFunctionArgNotConstant() throws Exception { "1:59: second argument of [match(first_name, query)] must be a constant, received [query]", error("from test | eval query = concat(\"first\", \" name\") | where match(first_name, query)") ); - // Other value types are tested in QueryStringFunctionTests } - // Should pass eventually once we lift some restrictions on the multi-match function. + // Should pass eventually once we lift some restrictions on full text search functions. public void testMultiMatchFunctionCurrentlyUnsupportedBehaviour() throws Exception { - assertEquals( - "1:82: Unknown column [first_name]\nline 1:94: Unknown column [last_name]", - error("from test | stats max_salary = max(salary) by emp_no | where multi_match(\"Anna\", first_name, last_name)") + testFullTextFunctionsCurrentlyUnsupportedBehaviour("match(first_name, \"Anna\")"); + testFullTextFunctionsCurrentlyUnsupportedBehaviour("first_name : \"Anna\""); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + testFullTextFunctionsCurrentlyUnsupportedBehaviour("multi_match(\"Anna\", first_name)"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + testFullTextFunctionsCurrentlyUnsupportedBehaviour("term(first_name, \"Anna\")"); + } + } + + private void testFullTextFunctionsCurrentlyUnsupportedBehaviour(String functionInvocation) throws Exception { + assertThat( + error("from test | stats max_salary = max(salary) by emp_no | where " + functionInvocation), + containsString("Unknown column [first_name]") ); } From d28f2ea5ddb2e58e57d78884ffa40ef060c01413 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 2 Jun 2025 17:50:16 +0200 Subject: [PATCH 35/64] Simplify tests --- .../function/fulltext/MultiMatch.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 181 +++++++----------- 2 files changed, 74 insertions(+), 109 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java index 5014263ba755b..2c398c7f6c6f1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MultiMatch.java @@ -379,7 +379,7 @@ public String functionName() { private TypeResolution resolveFields() { return fields.stream() .map( - (Expression field) -> isNotNull(field, sourceText(), FIRST).and( + (Expression field) -> isNotNull(field, sourceText(), SECOND).and( isType( field, FIELD_DATA_TYPES::contains, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 7b8602457a4da..1230b8539f005 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1226,7 +1226,8 @@ public void testFieldBasedFullTextFunctions() throws Exception { testFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); } - public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, String functionType, String functionInvocation) throws Exception { + public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, String functionType, String functionInvocation) + throws Exception { assertThat( error("from test | limit 10 | where " + functionInvocation), containsString("[" + functionName + "] " + functionType + " cannot be used after LIMIT") @@ -1242,22 +1243,20 @@ public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, S public void testFieldBasedWithNonIndexedColumn(String functionName, String functionInvocation, String functionType) { assertThat( error("from test | eval text = substring(first_name, 1) | where " + functionInvocation), - containsString("[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping") + containsString( + "[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping" + ) ); assertThat( error("from test | eval text=concat(first_name, last_name) | where " + functionInvocation), - containsString("[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping") + containsString( + "[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping" + ) ); var keywordInvocation = functionInvocation.replace("text", "text::keyword"); String keywordError = error("row n = null | eval text = n + 5 | where " + keywordInvocation); - assertThat( - keywordError, - containsString("[" + functionName + "] " + functionType + " cannot operate on") - ); - assertThat( - keywordError, - containsString("which is not a field from an index mapping") - ); + assertThat(keywordError, containsString("[" + functionName + "] " + functionType + " cannot operate on")); + assertThat(keywordError, containsString("which is not a field from an index mapping")); } public void testQueryStringFunctionsNotAllowedAfterCommands() throws Exception { @@ -1346,12 +1345,15 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere(":", "first_name:\"Anna\"", "operator"); checkFullTextFunctionsOnlyAllowedInWhere("QSTR", "qstr(\"Anna\")", "function"); checkFullTextFunctionsOnlyAllowedInWhere("KQL", "kql(\"Anna\")", "function"); - if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { - checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [1, 2, 3])", "function"); - } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(first_name, \"Anna\")", "function"); } + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [1, 2, 3])", "function"); + } } private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, String functionInvocation, String functionType) @@ -1368,7 +1370,7 @@ private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, Strin "1:47: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", error("from test | stats max_salary = max(salary) by " + functionInvocation) ); - if( "KQL".equals(functionName) || "QSTR".equals(functionName)) { + if ("KQL".equals(functionName) || "QSTR".equals(functionName)) { assertEquals( "1:9: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", error("row a = " + functionInvocation) @@ -1376,24 +1378,6 @@ private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, Strin } } - public void testQueryStringFunctionArgNotNullOrConstant() throws Exception { - assertEquals( - "1:19: first argument of [qstr(first_name)] must be a constant, received [first_name]", - error("from test | where qstr(first_name)") - ); - assertEquals("1:19: first argument of [qstr(null)] cannot be null, received [null]", error("from test | where qstr(null)")); - // Other value types are tested in QueryStringFunctionTests - } - - public void testKqlFunctionArgNotNullOrConstant() throws Exception { - assertEquals( - "1:19: argument of [kql(first_name)] must be a constant, received [first_name]", - error("from test | where kql(first_name)") - ); - assertEquals("1:19: argument of [kql(null)] cannot be null, received [null]", error("from test | where kql(null)")); - // Other value types are tested in KqlFunctionTests - } - public void testFullTextFunctionsDisjunctions() { checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")"); checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)"); @@ -1520,62 +1504,22 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, ); } - public void testMatchFunctionArgNotConstant() throws Exception { - assertEquals( - "1:19: second argument of [match(first_name, first_name)] must be a constant, received [first_name]", - error("from test | where match(first_name, first_name)") - ); - assertEquals( - "1:59: second argument of [match(first_name, query)] must be a constant, received [query]", - error("from test | eval query = concat(\"first\", \" name\") | where match(first_name, query)") - ); - // Other value types are tested in QueryStringFunctionTests - } - - public void testMatchFunctionNullArgs() throws Exception { - assertEquals( - "1:19: first argument of [match(null, \"query\")] cannot be null, received [null]", - error("from test | where match(null, \"query\")") - ); - assertEquals( - "1:19: second argument of [match(first_name, null)] cannot be null, received [null]", - error("from test | where match(first_name, null)") - ); - } - - public void testMatchTargetsExistingField() throws Exception { - assertEquals("1:39: Unknown column [first_name]", error("from test | keep emp_no | where match(first_name, \"Anna\")")); - assertEquals("1:33: Unknown column [first_name]", error("from test | keep emp_no | where first_name : \"Anna\"")); - } - - public void testTermFunctionArgNotConstant() throws Exception { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - assertEquals( - "1:19: second argument of [term(first_name, first_name)] must be a constant, received [first_name]", - error("from test | where term(first_name, first_name)") - ); - assertEquals( - "1:59: second argument of [term(first_name, query)] must be a constant, received [query]", - error("from test | eval query = concat(\"first\", \" name\") | where term(first_name, query)") - ); - // Other value types are tested in QueryStringFunctionTests - } - - public void testTermFunctionNullArgs() throws Exception { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - assertEquals( - "1:19: first argument of [term(null, \"query\")] cannot be null, received [null]", - error("from test | where term(null, \"query\")") - ); - assertEquals( - "1:19: second argument of [term(first_name, null)] cannot be null, received [null]", - error("from test | where term(first_name, null)") - ); + public void testFullTextFunctionsTargetsExistingField() throws Exception { + testFullTextFunctionTargetsExistingField("match(first_name, \"Anna\")"); + testFullTextFunctionTargetsExistingField("first_name : \"Anna\""); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + testFullTextFunctionTargetsExistingField("multi_match(\"Anna\", first_name)"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + testFullTextFunctionTargetsExistingField("term(fist_name, \"Anna\")"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + testFullTextFunctionTargetsExistingField("knn(vector, [0, 1, 2])"); + } } - public void testTermTargetsExistingField() throws Exception { - assumeTrue("term function capability not available", EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()); - assertEquals("1:38: Unknown column [first_name]", error("from test | keep emp_no | where term(first_name, \"Anna\")")); + private void testFullTextFunctionTargetsExistingField(String functionInvocation) throws Exception { + assertThat(error("from test | keep emp_no | where " + functionInvocation), containsString("Unknown column")); } public void testConditionalFunctionsWithMixedNumericTypes() { @@ -2093,7 +2037,7 @@ public void testLookupJoinDataTypeMismatch() { public void testFullTextFunctionOptions() { checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(first_name, \"Jean\", {\"%s\": %s})"); checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"first_name: Jean\", {\"%s\": %s})"); - if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()){ + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { @@ -2161,7 +2105,7 @@ public void testMultiMatchFunctionArgNotConstant() throws Exception { } // Should pass eventually once we lift some restrictions on full text search functions. - public void testMultiMatchFunctionCurrentlyUnsupportedBehaviour() throws Exception { + public void testFullTextFunctionCurrentlyUnsupportedBehaviour() throws Exception { testFullTextFunctionsCurrentlyUnsupportedBehaviour("match(first_name, \"Anna\")"); testFullTextFunctionsCurrentlyUnsupportedBehaviour("first_name : \"Anna\""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { @@ -2179,30 +2123,51 @@ private void testFullTextFunctionsCurrentlyUnsupportedBehaviour(String functionI ); } - public void testMultiMatchFunctionNullArgs() throws Exception { - assertEquals( - "1:19: first argument of [multi_match(\"query\", null)] cannot be null, received [null]", - error("from test | where multi_match(\"query\", null)") - ); - assertEquals( - "1:19: first argument of [multi_match(null, first_name)] cannot be null, received [null]", - error("from test | where multi_match(null, first_name)") - ); + public void testFullTextFunctionsNullArgs() throws Exception { + testFullTextFunctionNullArgs("match(null, \"query\")", "first"); + testFullTextFunctionNullArgs("match(first_name, null)", "second"); + testFullTextFunctionNullArgs("qstr(null)", ""); + testFullTextFunctionNullArgs("kql(null)", ""); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + testFullTextFunctionNullArgs("multi_match(null, first_name)", "first"); + testFullTextFunctionNullArgs("multi_match(\"query\", null)", "second"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + testFullTextFunctionNullArgs("term(null, \"query\")", "first"); + testFullTextFunctionNullArgs("term(first_name, null)", "second"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + testFullTextFunctionNullArgs("knn(null, [0, 1, 2])", "first"); + testFullTextFunctionNullArgs("knn(vector, null)", "second"); + } } - public void testMultiMatchTargetsExistingField() throws Exception { - assertEquals( - "1:53: Unknown column [first_name]\nline 1:65: Unknown column [last_name]", - error("from test | keep emp_no | where multi_match(\"Anna\", first_name, last_name)") + private void testFullTextFunctionNullArgs(String functionInvocation, String argOrdinal) throws Exception { + assertThat( + error("from test | where " + functionInvocation), + containsString(argOrdinal + " argument of [" + functionInvocation + "] cannot be null, received [null]") ); } - public void testMultiMatchInsideEval() throws Exception { - assumeTrue("MultiMatch operator is available just for snapshots", Build.current().isSnapshot()); - assertEquals( - "1:36: [MultiMatch] function is only supported in WHERE and STATS commands\n" - + "line 1:55: [MultiMatch] function cannot operate on [title], which is not a field from an index mapping", - error("row title = \"brown fox\" | eval x = multi_match(\"fox\", title)") + public void testFullTextFunctionsConstantQuery() throws Exception { + testFullTextFunctionsConstantQuery("match(first_name, last_name)", "second"); + testFullTextFunctionsConstantQuery("qstr(first_name)", ""); + testFullTextFunctionsConstantQuery("kql(first_name)", ""); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + testFullTextFunctionsConstantQuery("multi_match(first_name, first_name)", "first"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + testFullTextFunctionsConstantQuery("term(first_name, last_name)", "second"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + testFullTextFunctionsConstantQuery("knn(vector, vector)", "second"); + } + } + + private void testFullTextFunctionsConstantQuery(String functionInvocation, String argOrdinal) throws Exception { + assertThat( + error("from test | where " + functionInvocation), + containsString(argOrdinal + " argument of [" + functionInvocation + "] must be a constant") ); } From 9caed8676bf92b2ccf2fc77e2ab5369dc6dc8e3e Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 08:30:01 +0200 Subject: [PATCH 36/64] Simplify tests --- .../xpack/esql/analysis/VerifierTests.java | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 1230b8539f005..dd021387a4aa4 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2093,17 +2093,6 @@ private static String exampleValueForType(DataType currentType) { }; } - public void testMultiMatchFunctionArgNotConstant() throws Exception { - assertEquals( - "1:19: second argument of [match(first_name, first_name)] must be a constant, received [first_name]", - error("from test | where match(first_name, first_name)") - ); - assertEquals( - "1:59: second argument of [match(first_name, query)] must be a constant, received [query]", - error("from test | eval query = concat(\"first\", \" name\") | where match(first_name, query)") - ); - } - // Should pass eventually once we lift some restrictions on full text search functions. public void testFullTextFunctionCurrentlyUnsupportedBehaviour() throws Exception { testFullTextFunctionsCurrentlyUnsupportedBehaviour("match(first_name, \"Anna\")"); @@ -2155,6 +2144,7 @@ public void testFullTextFunctionsConstantQuery() throws Exception { testFullTextFunctionsConstantQuery("kql(first_name)", ""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { testFullTextFunctionsConstantQuery("multi_match(first_name, first_name)", "first"); + testFullTextFunctionsConstantQuery("multi_match(concat(first_name, \"world\"), first_name)", "first"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionsConstantQuery("term(first_name, last_name)", "second"); From e8d8c257f5e62289a12c7f737975ebbfd7e0ac7f Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 08:57:24 +0200 Subject: [PATCH 37/64] Add capabilities checks --- .../xpack/esql/analysis/VerifierTests.java | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index dd021387a4aa4..43d13e6c5529d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1217,13 +1217,22 @@ public void testMatchInsideEval() throws Exception { public void testFieldBasedFullTextFunctions() throws Exception { testFieldBasedWithNonIndexedColumn("MATCH", "match(text, \"cat\")", "function"); - testFieldBasedWithNonIndexedColumn(":", "text : \"cat\"", "operator"); - testFieldBasedWithNonIndexedColumn("MultiMatch", "multi_match(\"cat\", text)", "function"); - testFieldBasedFunctionNotAllowedAfterCommands("MATCH", "function", "match(first_name, \"Anna\")"); + + testFieldBasedWithNonIndexedColumn(":", "text : \"cat\"", "operator"); testFieldBasedFunctionNotAllowedAfterCommands(":", "operator", "first_name : \"Anna\""); - testFieldBasedFunctionNotAllowedAfterCommands("MultiMatch", "function", "multi_match(\"Anna\", first_name)"); - testFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); + + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + testFieldBasedWithNonIndexedColumn("MultiMatch", "multi_match(\"cat\", text)", "function"); + testFieldBasedFunctionNotAllowedAfterCommands("MultiMatch", "function", "multi_match(\"Anna\", first_name)"); + } + if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { + testFieldBasedWithNonIndexedColumn("Term", "term(text, \"cat\")", "function"); + testFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(first_name, \"Anna\")"); + } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + testFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); + } } public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, String functionType, String functionInvocation) @@ -1380,10 +1389,12 @@ private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, Strin public void testFullTextFunctionsDisjunctions() { checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")"); - checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)"); checkWithFullTextFunctionsDisjunctions("last_name : \"Smith\""); checkWithFullTextFunctionsDisjunctions("qstr(\"last_name: Smith\")"); checkWithFullTextFunctionsDisjunctions("kql(\"last_name: Smith\")"); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)"); + } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkWithFullTextFunctionsDisjunctions("term(last_name, \"Smith\")"); } @@ -1440,12 +1451,12 @@ public void testFullTextFunctionsWithNonBooleanFunctions() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { - checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3])", "function"); - } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(first_name, \"Anna\")", "function"); } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3])", "function"); + } } private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, String functionInvocation, String functionType) { @@ -2103,12 +2114,15 @@ public void testFullTextFunctionCurrentlyUnsupportedBehaviour() throws Exception if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionsCurrentlyUnsupportedBehaviour("term(first_name, \"Anna\")"); } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + testFullTextFunctionsCurrentlyUnsupportedBehaviour("knn(vector, [0, 1, 2])"); + } } private void testFullTextFunctionsCurrentlyUnsupportedBehaviour(String functionInvocation) throws Exception { assertThat( error("from test | stats max_salary = max(salary) by emp_no | where " + functionInvocation), - containsString("Unknown column [first_name]") + containsString("Unknown column") ); } @@ -2172,14 +2186,18 @@ public void testInsistNotOnTopOfFrom() { public void testFullTextFunctionsInStats() { checkFullTextFunctionsInStats("match(last_name, \"Smith\")"); - checkFullTextFunctionsInStats("multi_match(\"Smith\", first_name, last_name)"); checkFullTextFunctionsInStats("last_name : \"Smith\""); checkFullTextFunctionsInStats("qstr(\"last_name: Smith\")"); checkFullTextFunctionsInStats("kql(\"last_name: Smith\")"); + if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { + checkFullTextFunctionsInStats("multi_match(\"Smith\", first_name, last_name)"); + } + if( EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkFullTextFunctionsInStats("knn(vector, [0, 1, 2])"); + } } private void checkFullTextFunctionsInStats(String functionInvocation) { - query("from test | stats c = max(salary) where " + functionInvocation); query("from test | stats c = max(salary) where " + functionInvocation + " or length(first_name) > 10"); query("from test metadata _score | where " + functionInvocation + " | stats c = max(_score)"); From f2975a3e710d2284f23b06133d8c9d4c6c5bd645 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 3 Jun 2025 07:18:42 +0000 Subject: [PATCH 38/64] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/esql/analysis/VerifierTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 43d13e6c5529d..ccc60fd324479 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2192,7 +2192,7 @@ public void testFullTextFunctionsInStats() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("multi_match(\"Smith\", first_name, last_name)"); } - if( EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("knn(vector, [0, 1, 2])"); } } From 7a18aec0a1673c55439687fd55404760dbb7d386 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 09:58:33 +0200 Subject: [PATCH 39/64] Remove unnecessary changes --- .../qa/testFixtures/src/main/resources/dense_vector.csv-spec | 1 + .../elasticsearch/xpack/esql/parser/StatementParserTests.java | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec index 7736e69d77890..74ef532313055 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -1,3 +1,4 @@ + retrieveDenseVectorData required_capability: dense_vector_field_type diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 8fcb13864bed5..2b96ee5d3c381 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -231,7 +231,6 @@ public void testRowCommandWithEscapedFieldName() { public void testCompositeCommand() { assertEquals( new Filter(EMPTY, new Row(EMPTY, List.of(new Alias(EMPTY, "a", integer(1)))), TRUE), - statement("row a = 1 | where true") ); } From fccf9a5157ebf54e7627426193489f2ced76f97e Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 11:38:02 +0200 Subject: [PATCH 40/64] Add new full text functions data set and modify VerifierTests --- .../xpack/esql/CsvTestsDataLoader.java | 4 +- .../main/resources/data/full_text_search.csv | 21 ++ .../src/main/resources/knn-function.csv-spec | 2 +- .../src/main/resources/mapping-default.json | 4 - .../resources/mapping-full_text_search.json | 26 ++ .../xpack/esql/analysis/VerifierTests.java | 306 +++++++++--------- 6 files changed, 207 insertions(+), 156 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index c041fe55c32fc..66fca56efc1a8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -145,6 +145,7 @@ public class CsvTestsDataLoader { private static final TestDataset MV_TEXT = new TestDataset("mv_text"); private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector"); private static final TestDataset COLORS = new TestDataset("colors"); + private static final TestDataset FULL_TEXT_SEARCH = new TestDataset("full_text_search"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -206,7 +207,8 @@ public class CsvTestsDataLoader { Map.entry(LOGS.indexName, LOGS), Map.entry(MV_TEXT.indexName, MV_TEXT), Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR), - Map.entry(COLORS.indexName, COLORS) + Map.entry(COLORS.indexName, COLORS), + Map.entry(FULL_TEXT_SEARCH.indexName, FULL_TEXT_SEARCH) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv new file mode 100644 index 0000000000000..8d0fecdb4f8aa --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv @@ -0,0 +1,21 @@ +id:integer,title:text,body:text,tags:keyword,category:keyword,published_date:date,vector:dense_vector +1,The Rise of AI,Artificial intelligence is revolutionizing industries, from healthcare to finance.,ai,technology,future,technology,2023-01-15,[0.89, 0.61, 0.13] +2,Hiking the Grand Canyon,Exploring the vast landscapes of the Grand Canyon is an unforgettable experience.,travel,nature,hiking,travel,2022-11-20,[0.31, 0.85, 0.44] +3,Understanding Quantum Computing,Quantum computing leverages the principles of quantum mechanics for computation.,quantum,computing,research,science,2023-06-05,[0.92, 0.47, 0.22] +4,Healthy Meal Planning,Meal prepping with nutritious ingredients can save time and improve well-being.,health,food,planning,lifestyle,2024-03-08,[0.41, 0.66, 0.30] +5,Dogs: Loyal Companions,Dogs provide emotional support and are known for their loyalty and affection.,dogs,pets,companionship,animals,2021-12-30,[0.14, 0.91, 0.19] +6,A Guide to the Solar System,The solar system is home to eight planets, each with unique characteristics.,space,planets,astronomy,science,2022-07-14,[0.75, 0.34, 0.56] +7,Meditation for Beginners,Meditation can help reduce stress and improve mental clarity when practiced regularly.,meditation,wellness,mental health,lifestyle,2023-02-28,[0.36, 0.72, 0.28] +8,Exploring Tokyo,Tokyo blends modern skyscrapers with traditional temples and vibrant street culture.,japan,tokyo,travel,travel,2024-09-10,[0.45, 0.82, 0.39] +9,Introduction to Neural Networks,Neural networks are foundational to deep learning, a subfield of machine learning.,neural networks,deep learning,ai,technology,2023-10-01,[0.88, 0.60, 0.15] +10,Gardening Tips for Spring,Spring is the ideal time to start planting flowers, herbs, and vegetables.,gardening,plants,spring,lifestyle,2022-03-15,[0.33, 0.76, 0.21] +11,Basics of Blockchain Technology,Blockchain provides a decentralized way to store and verify transactions.,blockchain,cryptocurrency,tech,technology,2023-05-22,[0.91, 0.55, 0.20] +12,Cats vs Dogs,Cats and dogs are both popular pets, each with unique behaviors and needs.,cats,dogs,pets,animals,2022-08-18,[0.18, 0.89, 0.23] +13,The Benefits of Yoga,Yoga combines physical postures with breathing exercises and meditation.,yoga,fitness,health,lifestyle,2024-01-04,[0.40, 0.69, 0.27] +14,Visiting the Louvre Museum,The Louvre houses famous artworks like the Mona Lisa and Venus de Milo.,art,museum,paris,travel,2021-06-25,[0.50, 0.78, 0.41] +15,Climate Change Explained,Climate change is a global challenge affecting ecosystems and weather patterns.,climate,environment,science,science,2022-10-11,[0.66, 0.70, 0.45] +16,Intro to Programming with Python,Python is a versatile language for beginners and experts alike.,python,programming,code,technology,2023-11-30,[0.87, 0.64, 0.18] +17,Exploring the Amazon Rainforest,The Amazon Rainforest is rich in biodiversity and cultural heritage.,amazon,nature,exploration,travel,2023-07-19,[0.38, 0.84, 0.46] +18,The Basics of Nutrition,Balanced nutrition is essential for energy, growth, and health maintenance.,nutrition,food,health,lifestyle,2022-05-12,[0.43, 0.68, 0.29] +19,Mars Missions Update,NASA and private companies continue exploring Mars with rovers and future plans.,mars,space,exploration,science,2024-04-26,[0.79, 0.37, 0.52] +20,Bird Watching in Costa Rica,Costa Rica offers a paradise for bird watchers with hundreds of species.,birds,wildlife,nature,travel,2021-09-09,[0.29, 0.83, 0.38] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 0cabc83b12647..de446123c4fcf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -230,7 +230,7 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 5}) or length(color) > 17 -| sort _score desc +| sort _score desc, color asc | eval round_score = round(_score, 4) | keep color, round_score ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json index fcb3269131b0e..61603776f3f3d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default.json @@ -75,10 +75,6 @@ "type" : "keyword" } } - }, - "vector": { - "type": "dense_vector", - "similarity": "l2_norm" } } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json new file mode 100644 index 0000000000000..160f285d792d1 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json @@ -0,0 +1,26 @@ +{ + "properties": { + "id": { + "type": "integer" + }, + "title": { + "type": "text" + }, + "body": { + "type": "text" + }, + "tags": { + "type": "keyword" + }, + "category": { + "type": "integer" + }, + "published_date": { + "type": "date" + }, + "vector": { + "type": "dense_vector", + "similarity": "l2_norm" + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index ccc60fd324479..d5477a92a17cf 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -67,6 +67,7 @@ public class VerifierTests extends ESTestCase { private static final EsqlParser parser = new EsqlParser(); private final Analyzer defaultAnalyzer = AnalyzerTestUtils.expandedDefaultAnalyzer(); + private final Analyzer fullTextAnalyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-full_text_search.json", "test")); private final Analyzer tsdb = AnalyzerTestUtils.analyzer(AnalyzerTestUtils.tsdbIndexResolution()); private final List TIME_DURATIONS = List.of("millisecond", "second", "minute", "hour"); @@ -1217,18 +1218,18 @@ public void testMatchInsideEval() throws Exception { public void testFieldBasedFullTextFunctions() throws Exception { testFieldBasedWithNonIndexedColumn("MATCH", "match(text, \"cat\")", "function"); - testFieldBasedFunctionNotAllowedAfterCommands("MATCH", "function", "match(first_name, \"Anna\")"); + testFieldBasedFunctionNotAllowedAfterCommands("MATCH", "function", "match(title, \"Meditation\")"); testFieldBasedWithNonIndexedColumn(":", "text : \"cat\"", "operator"); - testFieldBasedFunctionNotAllowedAfterCommands(":", "operator", "first_name : \"Anna\""); + testFieldBasedFunctionNotAllowedAfterCommands(":", "operator", "title : \"Meditation\""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { testFieldBasedWithNonIndexedColumn("MultiMatch", "multi_match(\"cat\", text)", "function"); - testFieldBasedFunctionNotAllowedAfterCommands("MultiMatch", "function", "multi_match(\"Anna\", first_name)"); + testFieldBasedFunctionNotAllowedAfterCommands("MultiMatch", "function", "multi_match(\"Meditation\", title)"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFieldBasedWithNonIndexedColumn("Term", "term(text, \"cat\")", "function"); - testFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(first_name, \"Anna\")"); + testFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(title, \"Meditation\")"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { testFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); @@ -1238,12 +1239,12 @@ public void testFieldBasedFullTextFunctions() throws Exception { public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, String functionType, String functionInvocation) throws Exception { assertThat( - error("from test | limit 10 | where " + functionInvocation), + error("from test | limit 10 | where " + functionInvocation, fullTextAnalyzer), containsString("[" + functionName + "] " + functionType + " cannot be used after LIMIT") ); - String fieldName = "KNN".equals(functionName) ? "vector" : "first_name"; + String fieldName = "KNN".equals(functionName) ? "vector" : "title"; assertThat( - error("from test | STATS c = COUNT(emp_no) BY " + fieldName + " | where " + functionInvocation), + error("from test | STATS c = COUNT(id) BY " + fieldName + " | where " + functionInvocation, fullTextAnalyzer), containsString("[" + functionName + "] " + functionType + " cannot be used after STATS") ); } @@ -1251,114 +1252,117 @@ public void testFieldBasedFunctionNotAllowedAfterCommands(String functionName, S // These should pass eventually once we lift some restrictions on match function public void testFieldBasedWithNonIndexedColumn(String functionName, String functionInvocation, String functionType) { assertThat( - error("from test | eval text = substring(first_name, 1) | where " + functionInvocation), + error("from test | eval text = substring(title, 1) | where " + functionInvocation, fullTextAnalyzer), containsString( "[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping" ) ); assertThat( - error("from test | eval text=concat(first_name, last_name) | where " + functionInvocation), + error("from test | eval text=concat(title, body) | where " + functionInvocation, fullTextAnalyzer), containsString( "[" + functionName + "] " + functionType + " cannot operate on [text], which is not a field from an index mapping" ) ); var keywordInvocation = functionInvocation.replace("text", "text::keyword"); - String keywordError = error("row n = null | eval text = n + 5 | where " + keywordInvocation); + String keywordError = error("row n = null | eval text = n + 5 | where " + keywordInvocation, fullTextAnalyzer); assertThat(keywordError, containsString("[" + functionName + "] " + functionType + " cannot operate on")); assertThat(keywordError, containsString("which is not a field from an index mapping")); } public void testQueryStringFunctionsNotAllowedAfterCommands() throws Exception { - testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands("QSTR", "qstr(\"field_name: Anna\")"); + testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands("QSTR", "qstr(\"field_name: Meditation\")"); } public void testKqlFunctionsNotAllowedAfterCommands() throws Exception { - testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands("KQL", "kql(\"field_name: Anna\")"); + testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands("KQL", "kql(\"field_name: Meditation\")"); } public void testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands(String functionName, String functionInvocation) throws Exception { // Source commands - assertEquals("1:13: [" + functionName + "] function cannot be used after SHOW", error("show info | where " + functionInvocation)); - assertEquals( - "1:17: [" + functionName + "] function cannot be used after ROW", - error("row a= \"Anna\" | where " + functionInvocation) + assertThat( + error("show info | where " + functionInvocation), + containsString("[" + functionName + "] function cannot be used after SHOW") + ); + assertThat( + error("row a= \"Meditation\" | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after ROW") ); // Processing commands - assertEquals( - "1:43: [" + functionName + "] function cannot be used after DISSECT", - error("from test | dissect first_name \"%{foo}\" | where " + functionInvocation) + assertThat( + error("from test | dissect title \"%{foo}\" | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after DISSECT") ); - assertEquals( - "1:27: [" + functionName + "] function cannot be used after DROP", - error("from test | drop emp_no | where " + functionInvocation) + assertThat( + error("from test | drop body | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after DROP") ); - assertEquals( - "1:71: [" + functionName + "] function cannot be used after ENRICH", - error("from test | enrich languages on languages with lang = language_name | where " + functionInvocation) + assertThat( + error("from test | enrich languages on category with lang = language_name | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after ENRICH") ); - assertEquals( - "1:26: [" + functionName + "] function cannot be used after EVAL", - error("from test | eval z = 2 | where " + functionInvocation) + assertThat( + error("from test | eval z = 2 | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after EVAL") ); - assertEquals( - "1:44: [" + functionName + "] function cannot be used after GROK", - error("from test | grok last_name \"%{WORD:foo}\" | where " + functionInvocation) + assertThat( + error("from test | grok body \"%{WORD:foo}\" | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after GROK") ); - assertEquals( - "1:27: [" + functionName + "] function cannot be used after KEEP", - error("from test | keep emp_no | where " + functionInvocation) + assertThat( + error("from test | keep category | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after KEEP") ); - assertEquals( - "1:24: [" + functionName + "] function cannot be used after LIMIT", - error("from test | limit 10 | where " + functionInvocation) + assertThat( + error("from test | limit 10 | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after LIMIT") ); - assertEquals( - "1:35: [" + functionName + "] function cannot be used after MV_EXPAND", - error("from test | mv_expand last_name | where " + functionInvocation) + assertThat( + error("from test | mv_expand body | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after MV_EXPAND") ); - assertEquals( - "1:45: [" + functionName + "] function cannot be used after RENAME", - error("from test | rename last_name as full_name | where " + functionInvocation) + assertThat( + error("from test | rename body as full_body | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after RENAME") ); - assertEquals( - "1:52: [" + functionName + "] function cannot be used after STATS", - error("from test | STATS c = COUNT(emp_no) BY languages | where " + functionInvocation) + assertThat( + error("from test | STATS c = COUNT(*) BY category | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after STATS") ); // Some combination of processing commands - assertEquals( - "1:38: [" + functionName + "] function cannot be used after LIMIT", - error("from test | keep emp_no | limit 10 | where " + functionInvocation) + assertThat( + error("from test | keep category | limit 10 | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after LIMIT") ); - assertEquals( - "1:46: [" + functionName + "] function cannot be used after MV_EXPAND", - error("from test | limit 10 | mv_expand last_name | where " + functionInvocation) + assertThat( + error("from test | limit 10 | mv_expand body | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after MV_EXPAND") ); - assertEquals( - "1:52: [" + functionName + "] function cannot be used after KEEP", - error("from test | mv_expand last_name | keep last_name | where " + functionInvocation) + assertThat( + error("from test | mv_expand body | keep body | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after KEEP") ); - assertEquals( - "1:77: [" + functionName + "] function cannot be used after RENAME", - error("from test | STATS c = COUNT(emp_no) BY languages | rename c as total_emps | where " + functionInvocation) + assertThat( + error("from test | STATS c = COUNT(id) BY category | rename c as total_categories | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after RENAME") ); - assertEquals( - "1:54: [" + functionName + "] function cannot be used after DROP", - error("from test | rename last_name as name | drop emp_no | where " + functionInvocation) + assertThat( + error("from test | rename title as name | drop category | where " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] function cannot be used after DROP") ); } public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { - checkFullTextFunctionsOnlyAllowedInWhere("MATCH", "match(first_name, \"Anna\")", "function"); - checkFullTextFunctionsOnlyAllowedInWhere(":", "first_name:\"Anna\"", "operator"); - checkFullTextFunctionsOnlyAllowedInWhere("QSTR", "qstr(\"Anna\")", "function"); - checkFullTextFunctionsOnlyAllowedInWhere("KQL", "kql(\"Anna\")", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("MATCH", "match(title, \"Meditation\")", "function"); + checkFullTextFunctionsOnlyAllowedInWhere(":", "title:\"Meditation\"", "operator"); + checkFullTextFunctionsOnlyAllowedInWhere("QSTR", "qstr(\"Meditation\")", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("KQL", "kql(\"Meditation\")", "function"); if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(first_name, \"Anna\")", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("Term", "term(title, \"Meditation\")", "function"); } if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [1, 2, 3])", "function"); @@ -1367,36 +1371,36 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { private void checkFullTextFunctionsOnlyAllowedInWhere(String functionName, String functionInvocation, String functionType) throws Exception { - assertEquals( - "1:22: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", - error("from test | eval y = " + functionInvocation) + assertThat( + error("from test | eval y = " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands") ); - assertEquals( - "1:18: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", - error("from test | sort " + functionInvocation + " asc") + assertThat( + error("from test | sort " + functionInvocation + " asc", fullTextAnalyzer), + containsString("[" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands") ); - assertEquals( - "1:47: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", - error("from test | stats max_salary = max(salary) by " + functionInvocation) + assertThat( + error("from test | stats max_id = max(id) by " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands") ); if ("KQL".equals(functionName) || "QSTR".equals(functionName)) { - assertEquals( - "1:9: [" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands", - error("row a = " + functionInvocation) + assertThat( + error("row a = " + functionInvocation, fullTextAnalyzer), + containsString("[" + functionName + "] " + functionType + " is only supported in WHERE and STATS commands") ); } } public void testFullTextFunctionsDisjunctions() { - checkWithFullTextFunctionsDisjunctions("match(last_name, \"Smith\")"); - checkWithFullTextFunctionsDisjunctions("last_name : \"Smith\""); - checkWithFullTextFunctionsDisjunctions("qstr(\"last_name: Smith\")"); - checkWithFullTextFunctionsDisjunctions("kql(\"last_name: Smith\")"); + checkWithFullTextFunctionsDisjunctions("match(title, \"Meditation\")"); + checkWithFullTextFunctionsDisjunctions("title : \"Meditation\""); + checkWithFullTextFunctionsDisjunctions("qstr(\"title: Meditation\")"); + checkWithFullTextFunctionsDisjunctions("kql(\"title: Meditation\")"); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - checkWithFullTextFunctionsDisjunctions("multi_match(\"Smith\", first_name, last_name)"); + checkWithFullTextFunctionsDisjunctions("multi_match(\"Meditation\", title, body)"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - checkWithFullTextFunctionsDisjunctions("term(last_name, \"Smith\")"); + checkWithFullTextFunctionsDisjunctions("term(title, \"Meditation\")"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3])"); @@ -1406,53 +1410,52 @@ public void testFullTextFunctionsDisjunctions() { private void checkWithFullTextFunctionsDisjunctions(String functionInvocation) { // Disjunctions with non-pushable functions - scoring - query("from test | where " + functionInvocation + " or length(first_name) > 10"); - query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)"); + query("from test | where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); + query("from test | where match(title, \"Meditation\") or (" + functionInvocation + " and length(title) > 10)", fullTextAnalyzer); query( "from test | where (" + functionInvocation - + " and length(first_name) > 0) or (match(last_name, \"Anneke\") and length(first_name) > 10)" + + " and length(title) > 0) or (match(title, \"Meditation\") and length(title) > 10)", fullTextAnalyzer ); // Disjunctions with non-pushable functions - no scoring - query("from test | where " + functionInvocation + " or length(first_name) > 10"); - query("from test | where match(last_name, \"Anneke\") or (" + functionInvocation + " and length(first_name) > 10)"); + query("from test | where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); + query("from test | where match(title, \"Meditation\") or (" + functionInvocation + " and length(title) > 10)", fullTextAnalyzer); query( "from test | where (" + functionInvocation - + " and length(first_name) > 0) or (match(last_name, \"Anneke\") and length(first_name) > 10)" + + " and length(title) > 0) or (match(title, \"Meditation\") and length(title) > 10)", fullTextAnalyzer ); // Disjunctions with full text functions - no scoring - query("from test | where " + functionInvocation + " or match(first_name, \"Anna\")"); - query("from test | where " + functionInvocation + " or not match(first_name, \"Anna\")"); - query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10"); - query("from test | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")"); - query("from test | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))"); + query("from test | where " + functionInvocation + " or match(title, \"Meditation\")", fullTextAnalyzer); + query("from test | where " + functionInvocation + " or not match(title, \"Meditation\")", fullTextAnalyzer); + query("from test | where (" + functionInvocation + " or match(title, \"Meditation\")) and length(title) > 10", fullTextAnalyzer); + query("from test | where (" + functionInvocation + " or match(title, \"Meditation\")) and match(body, \"Smith\")", fullTextAnalyzer); + query("from test | where " + functionInvocation + " or (match(title, \"Meditation\") and match(body, \"Smith\"))", fullTextAnalyzer); // Disjunctions with full text functions - scoring - query("from test metadata _score | where " + functionInvocation + " or match(first_name, \"Anna\")"); - query("from test metadata _score | where " + functionInvocation + " or not match(first_name, \"Anna\")"); - query("from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and length(first_name) > 10"); + query("from test metadata _score | where " + functionInvocation + " or match(title, \"Meditation\")", fullTextAnalyzer); + query("from test metadata _score | where " + functionInvocation + " or not match(title, \"Meditation\")", fullTextAnalyzer); + query("from test metadata _score | where (" + functionInvocation + " or match(title, \"Meditation\")) and length(title) > 10", fullTextAnalyzer); query( - "from test metadata _score | where (" + functionInvocation + " or match(first_name, \"Anna\")) and match(last_name, \"Smith\")" + "from test metadata _score | where (" + functionInvocation + " or match(title, \"Meditation\")) and match(body, \"Smith\")", fullTextAnalyzer ); query( - "from test metadata _score | where " + functionInvocation + " or (match(first_name, \"Anna\") and match(last_name, \"Smith\"))" + "from test metadata _score | where " + functionInvocation + " or (match(title, \"Meditation\") and match(body, \"Smith\"))", fullTextAnalyzer ); - } public void testFullTextFunctionsWithNonBooleanFunctions() { - checkFullTextFunctionsWithNonBooleanFunctions("MATCH", "match(first_name, \"Anna\")", "function"); - checkFullTextFunctionsWithNonBooleanFunctions(":", "first_name:\"Anna\"", "operator"); - checkFullTextFunctionsWithNonBooleanFunctions("QSTR", "qstr(\"first_name: Anna\")", "function"); - checkFullTextFunctionsWithNonBooleanFunctions("KQL", "kql(\"first_name: Anna\")", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("MATCH", "match(title, \"Meditation\")", "function"); + checkFullTextFunctionsWithNonBooleanFunctions(":", "title:\"Meditation\"", "operator"); + checkFullTextFunctionsWithNonBooleanFunctions("QSTR", "qstr(\"title: Meditation\")", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("KQL", "kql(\"title: Meditation\")", "function"); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - checkFullTextFunctionsWithNonBooleanFunctions("MultiMatch", "multi_match(\"Anna\", first_name, last_name)", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(first_name, \"Anna\")", "function"); + checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(title, \"Meditation\")", "function"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3])", "function"); @@ -1470,7 +1473,7 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, + "] " + functionType + " can't be used with ISNOTNULL", - error("from test | where " + functionInvocation + " is not null") + error("from test | where " + functionInvocation + " is not null", fullTextAnalyzer) ); assertEquals( "1:19: Invalid condition [" @@ -1480,7 +1483,7 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, + "] " + functionType + " can't be used with ISNULL", - error("from test | where " + functionInvocation + " is null") + error("from test | where " + functionInvocation + " is null", fullTextAnalyzer) ); assertEquals( "1:19: Invalid condition [" @@ -1490,7 +1493,7 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, + "] " + functionType + " can't be used with IN", - error("from test | where " + functionInvocation + " in (\"hello\", \"world\")") + error("from test | where " + functionInvocation + " in (\"hello\", \"world\")", fullTextAnalyzer) ); } assertEquals( @@ -1503,7 +1506,7 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, + "] " + functionType + " can't be used with COALESCE", - error("from test | where coalesce(" + functionInvocation + ", " + functionInvocation + ")") + error("from test | where coalesce(" + functionInvocation + ", " + functionInvocation + ")", fullTextAnalyzer) ); assertEquals( "1:19: argument of [concat(" @@ -1511,18 +1514,18 @@ private void checkFullTextFunctionsWithNonBooleanFunctions(String functionName, + ", \"a\")] must be [string], found value [" + functionInvocation + "] type [boolean]", - error("from test | where concat(" + functionInvocation + ", \"a\")") + error("from test | where concat(" + functionInvocation + ", \"a\")", fullTextAnalyzer) ); } public void testFullTextFunctionsTargetsExistingField() throws Exception { - testFullTextFunctionTargetsExistingField("match(first_name, \"Anna\")"); - testFullTextFunctionTargetsExistingField("first_name : \"Anna\""); + testFullTextFunctionTargetsExistingField("match(title, \"Meditation\")"); + testFullTextFunctionTargetsExistingField("title : \"Meditation\""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - testFullTextFunctionTargetsExistingField("multi_match(\"Anna\", first_name)"); + testFullTextFunctionTargetsExistingField("multi_match(\"Meditation\", title)"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - testFullTextFunctionTargetsExistingField("term(fist_name, \"Anna\")"); + testFullTextFunctionTargetsExistingField("term(fist_name, \"Meditation\")"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { testFullTextFunctionTargetsExistingField("knn(vector, [0, 1, 2])"); @@ -1530,7 +1533,10 @@ public void testFullTextFunctionsTargetsExistingField() throws Exception { } private void testFullTextFunctionTargetsExistingField(String functionInvocation) throws Exception { - assertThat(error("from test | keep emp_no | where " + functionInvocation), containsString("Unknown column")); + assertThat( + error("from test | keep emp_no | where " + functionInvocation), + containsString("Unknown column") + ); } public void testConditionalFunctionsWithMixedNumericTypes() { @@ -2046,10 +2052,10 @@ public void testLookupJoinDataTypeMismatch() { } public void testFullTextFunctionOptions() { - checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(first_name, \"Jean\", {\"%s\": %s})"); - checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"first_name: Jean\", {\"%s\": %s})"); + checkOptionDataTypes(Match.ALLOWED_OPTIONS, "FROM test | WHERE match(title, \"Jean\", {\"%s\": %s})"); + checkOptionDataTypes(QueryString.ALLOWED_OPTIONS, "FROM test | WHERE QSTR(\"title: Jean\", {\"%s\": %s})"); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", first_name, last_name, {\"%s\": %s})"); + checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", title, body, {\"%s\": %s})"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); @@ -2078,10 +2084,10 @@ private void checkOptionDataTypes(Map allowedOptionsMap, Strin // Check conversion is possible DataTypeConverter.convert(optionValue, optionType); // If no exception was thrown, conversion is possible and should be done - query(query); + query(query, fullTextAnalyzer); } catch (InvalidArgumentException e) { // Conversion is not possible, query should fail - String error = error(query); + String error = error(query, fullTextAnalyzer); assertThat(error, containsString("Invalid option [" + optionName + "]")); assertThat(error, containsString("cannot cast [" + optionValue + "] to [" + optionType.typeName() + "]")); } @@ -2089,7 +2095,7 @@ private void checkOptionDataTypes(Map allowedOptionsMap, Strin } String errorQuery = String.format(Locale.ROOT, queryTemplate, "unknown_option", "\"any_value\""); - assertThat(error(errorQuery), containsString("Invalid option [unknown_option]")); + assertThat(error(errorQuery, fullTextAnalyzer), containsString("Invalid option [unknown_option]")); } private static String exampleValueForType(DataType currentType) { @@ -2106,13 +2112,13 @@ private static String exampleValueForType(DataType currentType) { // Should pass eventually once we lift some restrictions on full text search functions. public void testFullTextFunctionCurrentlyUnsupportedBehaviour() throws Exception { - testFullTextFunctionsCurrentlyUnsupportedBehaviour("match(first_name, \"Anna\")"); - testFullTextFunctionsCurrentlyUnsupportedBehaviour("first_name : \"Anna\""); + testFullTextFunctionsCurrentlyUnsupportedBehaviour("match(title, \"Meditation\")"); + testFullTextFunctionsCurrentlyUnsupportedBehaviour("title : \"Meditation\""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - testFullTextFunctionsCurrentlyUnsupportedBehaviour("multi_match(\"Anna\", first_name)"); + testFullTextFunctionsCurrentlyUnsupportedBehaviour("multi_match(\"Meditation\", title)"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - testFullTextFunctionsCurrentlyUnsupportedBehaviour("term(first_name, \"Anna\")"); + testFullTextFunctionsCurrentlyUnsupportedBehaviour("term(title, \"Meditation\")"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { testFullTextFunctionsCurrentlyUnsupportedBehaviour("knn(vector, [0, 1, 2])"); @@ -2121,23 +2127,23 @@ public void testFullTextFunctionCurrentlyUnsupportedBehaviour() throws Exception private void testFullTextFunctionsCurrentlyUnsupportedBehaviour(String functionInvocation) throws Exception { assertThat( - error("from test | stats max_salary = max(salary) by emp_no | where " + functionInvocation), + error("from test | stats max_salary = max(salary) by emp_no | where " + functionInvocation, fullTextAnalyzer), containsString("Unknown column") ); } public void testFullTextFunctionsNullArgs() throws Exception { testFullTextFunctionNullArgs("match(null, \"query\")", "first"); - testFullTextFunctionNullArgs("match(first_name, null)", "second"); + testFullTextFunctionNullArgs("match(title, null)", "second"); testFullTextFunctionNullArgs("qstr(null)", ""); testFullTextFunctionNullArgs("kql(null)", ""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - testFullTextFunctionNullArgs("multi_match(null, first_name)", "first"); + testFullTextFunctionNullArgs("multi_match(null, title)", "first"); testFullTextFunctionNullArgs("multi_match(\"query\", null)", "second"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionNullArgs("term(null, \"query\")", "first"); - testFullTextFunctionNullArgs("term(first_name, null)", "second"); + testFullTextFunctionNullArgs("term(title, null)", "second"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { testFullTextFunctionNullArgs("knn(null, [0, 1, 2])", "first"); @@ -2147,21 +2153,21 @@ public void testFullTextFunctionsNullArgs() throws Exception { private void testFullTextFunctionNullArgs(String functionInvocation, String argOrdinal) throws Exception { assertThat( - error("from test | where " + functionInvocation), + error("from test | where " + functionInvocation, fullTextAnalyzer), containsString(argOrdinal + " argument of [" + functionInvocation + "] cannot be null, received [null]") ); } public void testFullTextFunctionsConstantQuery() throws Exception { - testFullTextFunctionsConstantQuery("match(first_name, last_name)", "second"); - testFullTextFunctionsConstantQuery("qstr(first_name)", ""); - testFullTextFunctionsConstantQuery("kql(first_name)", ""); + testFullTextFunctionsConstantQuery("match(title, category)", "second"); + testFullTextFunctionsConstantQuery("qstr(title)", ""); + testFullTextFunctionsConstantQuery("kql(title)", ""); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - testFullTextFunctionsConstantQuery("multi_match(first_name, first_name)", "first"); - testFullTextFunctionsConstantQuery("multi_match(concat(first_name, \"world\"), first_name)", "first"); + testFullTextFunctionsConstantQuery("multi_match(category, body)", "first"); + testFullTextFunctionsConstantQuery("multi_match(concat(title, \"world\"), title)", "first"); } if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - testFullTextFunctionsConstantQuery("term(first_name, last_name)", "second"); + testFullTextFunctionsConstantQuery("term(title, tags)", "second"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { testFullTextFunctionsConstantQuery("knn(vector, vector)", "second"); @@ -2170,7 +2176,7 @@ public void testFullTextFunctionsConstantQuery() throws Exception { private void testFullTextFunctionsConstantQuery(String functionInvocation, String argOrdinal) throws Exception { assertThat( - error("from test | where " + functionInvocation), + error("from test | where " + functionInvocation, fullTextAnalyzer), containsString(argOrdinal + " argument of [" + functionInvocation + "] must be a constant") ); } @@ -2185,12 +2191,12 @@ public void testInsistNotOnTopOfFrom() { } public void testFullTextFunctionsInStats() { - checkFullTextFunctionsInStats("match(last_name, \"Smith\")"); - checkFullTextFunctionsInStats("last_name : \"Smith\""); - checkFullTextFunctionsInStats("qstr(\"last_name: Smith\")"); - checkFullTextFunctionsInStats("kql(\"last_name: Smith\")"); + checkFullTextFunctionsInStats("match(title, \"Meditation\")"); + checkFullTextFunctionsInStats("title : \"Meditation\""); + checkFullTextFunctionsInStats("qstr(\"title: Meditation\")"); + checkFullTextFunctionsInStats("kql(\"title: Meditation\")"); if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - checkFullTextFunctionsInStats("multi_match(\"Smith\", first_name, last_name)"); + checkFullTextFunctionsInStats("multi_match(\"Meditation\", title, body)"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("knn(vector, [0, 1, 2])"); @@ -2198,13 +2204,13 @@ public void testFullTextFunctionsInStats() { } private void checkFullTextFunctionsInStats(String functionInvocation) { - query("from test | stats c = max(salary) where " + functionInvocation); - query("from test | stats c = max(salary) where " + functionInvocation + " or length(first_name) > 10"); - query("from test metadata _score | where " + functionInvocation + " | stats c = max(_score)"); - query("from test metadata _score | where " + functionInvocation + " or length(first_name) > 10 | stats c = max(_score)"); + query("from test | stats c = max(id) where " + functionInvocation, fullTextAnalyzer); + query("from test | stats c = max(id) where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); + query("from test metadata _score | where " + functionInvocation + " | stats c = max(_score)", fullTextAnalyzer); + query("from test metadata _score | where " + functionInvocation + " or length(title) > 10 | stats c = max(_score)", fullTextAnalyzer); assertThat( - error("from test metadata _score | stats c = max(_score) where " + functionInvocation), + error("from test metadata _score | stats c = max(_score) where " + functionInvocation, fullTextAnalyzer), containsString("cannot use _score aggregations with a WHERE filter in a STATS command") ); } From 3d705589c0e2963b8c968e95c0ba3d67e3691a92 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 12:07:13 +0200 Subject: [PATCH 41/64] Don't use data with the CSV data loader, just the mapping --- .../xpack/esql/CsvTestsDataLoader.java | 4 +--- .../main/resources/data/full_text_search.csv | 21 ------------------- .../src/main/resources/knn-function.csv-spec | 18 ++++++++-------- 3 files changed, 10 insertions(+), 33 deletions(-) delete mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 66fca56efc1a8..c041fe55c32fc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -145,7 +145,6 @@ public class CsvTestsDataLoader { private static final TestDataset MV_TEXT = new TestDataset("mv_text"); private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector"); private static final TestDataset COLORS = new TestDataset("colors"); - private static final TestDataset FULL_TEXT_SEARCH = new TestDataset("full_text_search"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -207,8 +206,7 @@ public class CsvTestsDataLoader { Map.entry(LOGS.indexName, LOGS), Map.entry(MV_TEXT.indexName, MV_TEXT), Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR), - Map.entry(COLORS.indexName, COLORS), - Map.entry(FULL_TEXT_SEARCH.indexName, FULL_TEXT_SEARCH) + Map.entry(COLORS.indexName, COLORS) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv deleted file mode 100644 index 8d0fecdb4f8aa..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/full_text_search.csv +++ /dev/null @@ -1,21 +0,0 @@ -id:integer,title:text,body:text,tags:keyword,category:keyword,published_date:date,vector:dense_vector -1,The Rise of AI,Artificial intelligence is revolutionizing industries, from healthcare to finance.,ai,technology,future,technology,2023-01-15,[0.89, 0.61, 0.13] -2,Hiking the Grand Canyon,Exploring the vast landscapes of the Grand Canyon is an unforgettable experience.,travel,nature,hiking,travel,2022-11-20,[0.31, 0.85, 0.44] -3,Understanding Quantum Computing,Quantum computing leverages the principles of quantum mechanics for computation.,quantum,computing,research,science,2023-06-05,[0.92, 0.47, 0.22] -4,Healthy Meal Planning,Meal prepping with nutritious ingredients can save time and improve well-being.,health,food,planning,lifestyle,2024-03-08,[0.41, 0.66, 0.30] -5,Dogs: Loyal Companions,Dogs provide emotional support and are known for their loyalty and affection.,dogs,pets,companionship,animals,2021-12-30,[0.14, 0.91, 0.19] -6,A Guide to the Solar System,The solar system is home to eight planets, each with unique characteristics.,space,planets,astronomy,science,2022-07-14,[0.75, 0.34, 0.56] -7,Meditation for Beginners,Meditation can help reduce stress and improve mental clarity when practiced regularly.,meditation,wellness,mental health,lifestyle,2023-02-28,[0.36, 0.72, 0.28] -8,Exploring Tokyo,Tokyo blends modern skyscrapers with traditional temples and vibrant street culture.,japan,tokyo,travel,travel,2024-09-10,[0.45, 0.82, 0.39] -9,Introduction to Neural Networks,Neural networks are foundational to deep learning, a subfield of machine learning.,neural networks,deep learning,ai,technology,2023-10-01,[0.88, 0.60, 0.15] -10,Gardening Tips for Spring,Spring is the ideal time to start planting flowers, herbs, and vegetables.,gardening,plants,spring,lifestyle,2022-03-15,[0.33, 0.76, 0.21] -11,Basics of Blockchain Technology,Blockchain provides a decentralized way to store and verify transactions.,blockchain,cryptocurrency,tech,technology,2023-05-22,[0.91, 0.55, 0.20] -12,Cats vs Dogs,Cats and dogs are both popular pets, each with unique behaviors and needs.,cats,dogs,pets,animals,2022-08-18,[0.18, 0.89, 0.23] -13,The Benefits of Yoga,Yoga combines physical postures with breathing exercises and meditation.,yoga,fitness,health,lifestyle,2024-01-04,[0.40, 0.69, 0.27] -14,Visiting the Louvre Museum,The Louvre houses famous artworks like the Mona Lisa and Venus de Milo.,art,museum,paris,travel,2021-06-25,[0.50, 0.78, 0.41] -15,Climate Change Explained,Climate change is a global challenge affecting ecosystems and weather patterns.,climate,environment,science,science,2022-10-11,[0.66, 0.70, 0.45] -16,Intro to Programming with Python,Python is a versatile language for beginners and experts alike.,python,programming,code,technology,2023-11-30,[0.87, 0.64, 0.18] -17,Exploring the Amazon Rainforest,The Amazon Rainforest is rich in biodiversity and cultural heritage.,amazon,nature,exploration,travel,2023-07-19,[0.38, 0.84, 0.46] -18,The Basics of Nutrition,Balanced nutrition is essential for energy, growth, and health maintenance.,nutrition,food,health,lifestyle,2022-05-12,[0.43, 0.68, 0.29] -19,Mars Missions Update,NASA and private companies continue exploring Mars with rovers and future plans.,mars,space,exploration,science,2024-04-26,[0.79, 0.37, 0.52] -20,Bird Watching in Costa Rica,Costa Rica offers a paradise for bird watchers with hundreds of species.,birds,wildlife,nature,travel,2021-09-09,[0.29, 0.83, 0.38] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index de446123c4fcf..be71373d63e23 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -235,15 +235,15 @@ from colors metadata _score | keep color, round_score ; -color:text | round_score: double -olive | 1.0 -olive drab | 0.0014 -dark olive green | 4.0E-4 -dark golden rod | 3.0E-4 -sienna | 3.0E-4 -medium aqua marine | 0.0 -medium spring green | 0.0 -light golden rod yellow | 0.0 +color:text | round_score:double +olive | 1.0 +olive drab | 0.0014 +dark olive green | 4.0E-4 +dark golden rod | 3.0E-4 +sienna | 3.0E-4 +light golden rod yellow | 0.0 +medium aqua marine | 0.0 +medium spring green | 0.0 ; testKnnWithNonPushableDisjunctionsOnComplexExpressions From 19548fa39f6c8e5d3ff873c7e432b768f37bcd32 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 12:07:36 +0200 Subject: [PATCH 42/64] Spotless --- .../xpack/esql/analysis/VerifierTests.java | 46 ++++++++++++------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d5477a92a17cf..6f2668d16d845 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1344,7 +1344,10 @@ public void testNonFieldBasedFullTextFunctionsNotAllowedAfterCommands(String fun containsString("[" + functionName + "] function cannot be used after KEEP") ); assertThat( - error("from test | STATS c = COUNT(id) BY category | rename c as total_categories | where " + functionInvocation, fullTextAnalyzer), + error( + "from test | STATS c = COUNT(id) BY category | rename c as total_categories | where " + functionInvocation, + fullTextAnalyzer + ), containsString("[" + functionName + "] function cannot be used after RENAME") ); assertThat( @@ -1413,36 +1416,45 @@ private void checkWithFullTextFunctionsDisjunctions(String functionInvocation) { query("from test | where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); query("from test | where match(title, \"Meditation\") or (" + functionInvocation + " and length(title) > 10)", fullTextAnalyzer); query( - "from test | where (" - + functionInvocation - + " and length(title) > 0) or (match(title, \"Meditation\") and length(title) > 10)", fullTextAnalyzer + "from test | where (" + functionInvocation + " and length(title) > 0) or (match(title, \"Meditation\") and length(title) > 10)", + fullTextAnalyzer ); // Disjunctions with non-pushable functions - no scoring query("from test | where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); query("from test | where match(title, \"Meditation\") or (" + functionInvocation + " and length(title) > 10)", fullTextAnalyzer); query( - "from test | where (" - + functionInvocation - + " and length(title) > 0) or (match(title, \"Meditation\") and length(title) > 10)", fullTextAnalyzer + "from test | where (" + functionInvocation + " and length(title) > 0) or (match(title, \"Meditation\") and length(title) > 10)", + fullTextAnalyzer ); // Disjunctions with full text functions - no scoring query("from test | where " + functionInvocation + " or match(title, \"Meditation\")", fullTextAnalyzer); query("from test | where " + functionInvocation + " or not match(title, \"Meditation\")", fullTextAnalyzer); query("from test | where (" + functionInvocation + " or match(title, \"Meditation\")) and length(title) > 10", fullTextAnalyzer); - query("from test | where (" + functionInvocation + " or match(title, \"Meditation\")) and match(body, \"Smith\")", fullTextAnalyzer); - query("from test | where " + functionInvocation + " or (match(title, \"Meditation\") and match(body, \"Smith\"))", fullTextAnalyzer); + query( + "from test | where (" + functionInvocation + " or match(title, \"Meditation\")) and match(body, \"Smith\")", + fullTextAnalyzer + ); + query( + "from test | where " + functionInvocation + " or (match(title, \"Meditation\") and match(body, \"Smith\"))", + fullTextAnalyzer + ); // Disjunctions with full text functions - scoring query("from test metadata _score | where " + functionInvocation + " or match(title, \"Meditation\")", fullTextAnalyzer); query("from test metadata _score | where " + functionInvocation + " or not match(title, \"Meditation\")", fullTextAnalyzer); - query("from test metadata _score | where (" + functionInvocation + " or match(title, \"Meditation\")) and length(title) > 10", fullTextAnalyzer); query( - "from test metadata _score | where (" + functionInvocation + " or match(title, \"Meditation\")) and match(body, \"Smith\")", fullTextAnalyzer + "from test metadata _score | where (" + functionInvocation + " or match(title, \"Meditation\")) and length(title) > 10", + fullTextAnalyzer ); query( - "from test metadata _score | where " + functionInvocation + " or (match(title, \"Meditation\") and match(body, \"Smith\"))", fullTextAnalyzer + "from test metadata _score | where (" + functionInvocation + " or match(title, \"Meditation\")) and match(body, \"Smith\")", + fullTextAnalyzer + ); + query( + "from test metadata _score | where " + functionInvocation + " or (match(title, \"Meditation\") and match(body, \"Smith\"))", + fullTextAnalyzer ); } @@ -1533,10 +1545,7 @@ public void testFullTextFunctionsTargetsExistingField() throws Exception { } private void testFullTextFunctionTargetsExistingField(String functionInvocation) throws Exception { - assertThat( - error("from test | keep emp_no | where " + functionInvocation), - containsString("Unknown column") - ); + assertThat(error("from test | keep emp_no | where " + functionInvocation), containsString("Unknown column")); } public void testConditionalFunctionsWithMixedNumericTypes() { @@ -2207,7 +2216,10 @@ private void checkFullTextFunctionsInStats(String functionInvocation) { query("from test | stats c = max(id) where " + functionInvocation, fullTextAnalyzer); query("from test | stats c = max(id) where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); query("from test metadata _score | where " + functionInvocation + " | stats c = max(_score)", fullTextAnalyzer); - query("from test metadata _score | where " + functionInvocation + " or length(title) > 10 | stats c = max(_score)", fullTextAnalyzer); + query( + "from test metadata _score | where " + functionInvocation + " or length(title) > 10 | stats c = max(_score)", + fullTextAnalyzer + ); assertThat( error("from test metadata _score | stats c = max(_score) where " + functionInvocation, fullTextAnalyzer), From 22a4c2681316a3fc5fd9e3c68c3f44d09b144ac1 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 12:51:59 +0200 Subject: [PATCH 43/64] Fix tests with the same result scoring --- .../testFixtures/src/main/resources/knn-function.csv-spec | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index be71373d63e23..f7aec9e83fcc8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -102,10 +102,10 @@ knnAfterKeep required_capability: knn_function from colors metadata _score -| keep rgb_vector, _score +| keep rgb_vector, color, _score | where knn(rgb_vector, [128,128,0]) | eval round_score = round(_score, 4) -| sort round_score desc +| sort round_score desc, color asc | keep rgb_vector, round_score | limit 5 ; @@ -114,8 +114,8 @@ rgb_vector:dense_vector | round_score:double [128.0, 128.0, 0.0] | 1.0 [107.0, 142.0, 35.0] | 0.0014 [85.0, 107.0, 47.0] | 4.0E-4 -[139.0, 69.0, 19.0] | 3.0E-4 -[184.0, 134.0, 11.0] | 3.0E-4 +[184.0, 134.0, 11.0] | 3.0E-4 +[139.0, 69.0, 19.0] | 3.0E-4 ; knnAfterDrop From a34fc891dabb845a60a79430d8c3c702498ce9ab Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 20:30:10 +0200 Subject: [PATCH 44/64] Fix merge --- .../xpack/esql/analysis/VerifierTests.java | 30 ++++--------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index b0bf5ee2b2d2d..1f27a813be7b8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1364,7 +1364,7 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { - checkFullTextFunctionsOnlyAllowedInWhere("KNN", "multi_match(\"Meditation\", title, body)", "function"); + checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2])", "function"); } } @@ -2148,7 +2148,7 @@ public void testFullTextFunctionsNullArgs() throws Exception { checkFullTextFunctionNullArgs("term(title, null)", "second"); } if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { - checkFullTextFunctionNullArgs("knn(null, [0, 1, 2]])", "first"); + checkFullTextFunctionNullArgs("knn(null, [0, 1, 2])", "first"); checkFullTextFunctionNullArgs("knn(vector, null)", "second"); } } @@ -2160,29 +2160,6 @@ private void checkFullTextFunctionNullArgs(String functionInvocation, String arg ); } - public void testFullTextFunctionsConstantQuery() throws Exception { - testFullTextFunctionsConstantQuery("match(title, category)", "second"); - testFullTextFunctionsConstantQuery("qstr(title)", ""); - testFullTextFunctionsConstantQuery("kql(title)", ""); - if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { - testFullTextFunctionsConstantQuery("multi_match(category, body)", "first"); - testFullTextFunctionsConstantQuery("multi_match(concat(title, \"world\"), title)", "first"); - } - if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { - testFullTextFunctionsConstantQuery("term(title, tags)", "second"); - } - if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { - testFullTextFunctionsConstantQuery("knn(vector, vector)", "second"); - } - } - - private void testFullTextFunctionsConstantQuery(String functionInvocation, String argOrdinal) throws Exception { - assertThat( - error("from test | where " + functionInvocation, fullTextAnalyzer), - containsString(argOrdinal + " argument of [" + functionInvocation + "] must be a constant") - ); - } - public void testFullTextFunctionsConstantQuery() throws Exception { checkFullTextFunctionsConstantQuery("match(title, category)", "second"); checkFullTextFunctionsConstantQuery("qstr(title)", ""); @@ -2194,6 +2171,9 @@ public void testFullTextFunctionsConstantQuery() throws Exception { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsConstantQuery("term(title, tags)", "second"); } + if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) { + checkFullTextFunctionsConstantQuery("knn(vector, vector)", "second"); + } } private void checkFullTextFunctionsConstantQuery(String functionInvocation, String argOrdinal) throws Exception { From 6fe7b2a2d54903f3c7ba523a801f30e111bd9770 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 3 Jun 2025 20:30:14 +0200 Subject: [PATCH 45/64] Spotless --- .../xpack/esql/expression/function/vector/Knn.java | 3 ++- .../xpack/esql/querydsl/query/KnnQuery.java | 3 ++- .../optimizer/LocalPhysicalPlanOptimizerTests.java | 10 +++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 58a83948c4e47..62d66012e1d9d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -128,7 +128,8 @@ public Knn( type = "double", valueHint = { "3.5" }, description = "Applies the specified oversampling for rescoring quantized vectors. " - + "See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details." + + "See [oversampling and rescoring quantized vectors]" + + "(docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details." ), }, description = "(Optional) kNN additional options as <>." + " See <> for more information.", diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index cbd8a4ca3d789..aa0e896dfc013 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -64,8 +64,9 @@ protected String innerToString() { @Override public boolean equals(Object o) { - if (!(o instanceof KnnQuery knnQuery)) return false; if (super.equals(o) == false) return false; + + KnnQuery knnQuery = (KnnQuery) o; return Objects.equals(field, knnQuery.field) && Objects.deepEquals(query, knnQuery.query) && Objects.equals(options, knnQuery.options); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 6eb221d98ba84..8f6b4b071a870 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1348,11 +1348,11 @@ public void testMultiMatchOptionsPushDown() { } public void testKnnOptionsPushDown() { - String query = - """ - from test - | where KNN(dense_vector, [0.1, 0.2, 0.3], { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7, "boost": 3.5 }) - """; + String query = """ + from test + | where KNN(dense_vector, [0.1, 0.2, 0.3], + { "k": 5, "similarity": 0.001, "num_candidates": 10, "rescore_oversample": 7, "boost": 3.5 }) + """; var analyzer = makeAnalyzer("mapping-all-types.json"); var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); From a2588206da7831a35e95d46191192c1d02581d73 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 08:30:50 +0200 Subject: [PATCH 46/64] Add test capability --- .../qa/testFixtures/src/main/resources/knn-function.csv-spec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index f7aec9e83fcc8..89ff70756e0c3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -283,7 +283,6 @@ c: long 59 ; - testKnnInStatsPushableAndNonPushable required_capability: knn_function required_capability: full_text_functions_in_stats_where @@ -297,6 +296,9 @@ c:long ; testKnnInStatsWithGrouping +required_capability: knn_function +required_capability: full_text_functions_in_stats_where + from colors | where length(color) < 10 | stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140}) by primary From d3262a4089ee9e888a98e30faa48e2576c910909 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 09:44:25 +0200 Subject: [PATCH 47/64] Fix loading dense_vectors when no data has been indexed (no dims specified) --- .../index/mapper/vectors/DenseVectorFieldMapper.java | 5 +++++ .../elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java | 2 +- .../testFixtures/src/main/resources/mapping-all-types.json | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 0d6970fba1927..f57303ce53ae5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -2577,6 +2577,11 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) { return null; } + if (dims == null) { + // No data has been indexed yet + return BlockLoader.CONSTANT_NULLS; + } + if (indexed) { return new BlockDocValuesReader.DenseVectorBlockLoader(name(), dims); } diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java index c0cba1900dc37..b4f933d19bc6a 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java @@ -1022,7 +1022,7 @@ public void testMultipleBatchesWithLookupJoin() throws IOException { var query = requestObjectBuilder().query(format(null, "from * | lookup join {} on integer {}", testIndexName(), sort)); Map result = runEsql(query); var columns = as(result.get("columns"), List.class); - assertEquals(21, columns.size()); + assertEquals(22, columns.size()); var values = as(result.get("values"), List.class); assertEquals(10, values.size()); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json index 17348adb6af4f..a7ef2f4840709 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-all-types.json @@ -63,6 +63,9 @@ "semantic_text": { "type": "semantic_text", "inference_id": "foo_inference_id" + }, + "dense_vector": { + "type": "dense_vector" } } } From be1e57865e8b4f72d8aaad0b6230713b6a9036c7 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 09:51:41 +0200 Subject: [PATCH 48/64] Fix constructor visibility --- .../xpack/esql/expression/function/vector/Knn.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 62d66012e1d9d..ecce0b069693d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -139,7 +139,7 @@ public Knn( this(source, field, query, options, null); } - public Knn(Source source, Expression field, Expression query, Expression options, QueryBuilder queryBuilder) { + private Knn(Source source, Expression field, Expression query, Expression options, QueryBuilder queryBuilder) { super(source, query, options == null ? List.of(field, query) : List.of(field, query, options), queryBuilder); this.field = field; this.options = options; From 524c93ceb7ecdfc09e63c386818e4f95319af822 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 10:18:40 +0200 Subject: [PATCH 49/64] Change base functions for FTFs --- .../esql/expression/function/fulltext/FullTextFunction.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java index 696e545296b90..d9668625014d3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/FullTextFunction.java @@ -143,7 +143,7 @@ public String functionType() { @Override public int hashCode() { - return Objects.hash(super.hashCode(), queryBuilder); + return Objects.hash(super.hashCode(), query, queryBuilder); } @Override @@ -331,7 +331,7 @@ public ScoreOperator.ExpressionScorer.Factory toScorer(ToScorer toScorer) { return new LuceneQueryScoreEvaluator.Factory(shardConfigs); } - public static void populateOptionsMap( + protected static void populateOptionsMap( final MapExpression options, final Map optionsMap, final TypeResolutions.ParamOrdinal paramOrdinal, From 65b3256ad9e510a4563f4f1c28ada84a1f93d916 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 10:19:51 +0200 Subject: [PATCH 50/64] Add capability check for tests --- .../xpack/esql/expression/function/fulltext/KnnTests.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index ac31b3fab23d8..c2bc381e2663c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -11,6 +11,7 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; @@ -22,6 +23,7 @@ import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates; +import org.junit.Before; import java.util.ArrayList; import java.util.List; @@ -46,6 +48,11 @@ public static Iterable parameters() { return parameterSuppliersFromTypedData(addFunctionNamedParams(testCaseSuppliers())); } + @Before + public void checkCapability() { + assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()); + } + private static List testCaseSuppliers() { List suppliers = new ArrayList<>(); From 853e0967cd36cec7c2ae53bdbe13e43250aec427 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 13:05:30 +0200 Subject: [PATCH 51/64] Fixed tests via large k and limit --- .../src/main/resources/knn-function.csv-spec | 139 +++++++++--------- 1 file changed, 68 insertions(+), 71 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 89ff70756e0c3..ee51b768f46de 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -1,3 +1,7 @@ +# TODO Most tests explicitly set k. Until knn function uses LIMIT as k, we need to explicitly set it to all values +# in the dataset to avoid test failures due to docs allocation in different shards, which can impact results for a +# top-n query at the shard level + knnSearch required_capability: knn_function @@ -33,6 +37,7 @@ from colors metadata _score | sort _score desc // end::knn-function-options[] | keep color, rgb_vector +| limit 4 ; color:text | rgb_vector:dense_vector @@ -46,7 +51,7 @@ knnSearchWithSimilarityOption required_capability: knn_function from colors metadata _score -| where knn(rgb_vector, [255,192,203], {"similarity": 40}) +| where knn(rgb_vector, [255,192,203], {"k": 140, "similarity": 40}) | sort _score desc | keep color, rgb_vector ; @@ -64,29 +69,31 @@ knnHybridSearch required_capability: knn_function from colors metadata _score -| where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 5}) +| where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 140}) | sort _score desc | eval round_score = round(_score, 4) | keep color, rgb_vector, round_score +| limit 10 ; color:text | rgb_vector:dense_vector | round_score:double -violet | [238.0, 130.0, 238.0] | 13.9457 -blue violet | [138.0, 43.0, 226.0] | 3.0871 -dark violet | [148.0, 0.0, 211.0] | 3.0871 -medium violet red | [199.0, 21.0, 133.0] | 2.5355 -pale violet red | [219.0, 112.0, 147.0] | 2.5355 -orchid | [218.0, 112.0, 214.0] | 0.0083 -plum | [221.0, 160.0, 221.0] | 0.0071 -hot pink | [255.0, 105.0, 180.0] | 0.0024 -thistle | [216.0, 191.0, 216.0] | 0.0021 +violet | [238.0, 130.0, 238.0] | 13.9457 +blue violet | [138.0, 43.0, 226.0] | 3.0877 +dark violet | [148.0, 0.0, 211.0] | 3.0877 +pale violet red | [219.0, 112.0, 147.0] | 2.5366 +medium violet red | [199.0, 21.0, 133.0] | 2.5359 +orchid | [218.0, 112.0, 214.0] | 0.0083 +plum | [221.0, 160.0, 221.0] | 0.0071 +hot pink | [255.0, 105.0, 180.0] | 0.0024 +thistle | [216.0, 191.0, 216.0] | 0.0021 +light pink | [255.0, 182.0, 193.0] | 0.0021 ; knnWithMultipleFunctions required_capability: knn_function from colors metadata _score -| where knn(rgb_vector, [128,128,0]) and match(color, "olive") +| where knn(rgb_vector, [128,128,0], {"k": 140}) and match(color, "olive") | sort _score desc | eval round_score = round(_score, 4) | keep color, rgb_vector, round_score @@ -103,7 +110,7 @@ required_capability: knn_function from colors metadata _score | keep rgb_vector, color, _score -| where knn(rgb_vector, [128,128,0]) +| where knn(rgb_vector, [128,128,0], {"k": 140}) | eval round_score = round(_score, 4) | sort round_score desc, color asc | keep rgb_vector, round_score @@ -123,8 +130,9 @@ required_capability: knn_function from colors metadata _score | drop color -| where knn(rgb_vector, [128,128,0]) +| where knn(rgb_vector, [128,128,0], {"k": 140}) | eval round_score = round(_score, 4) +| sort round_score desc | keep rgb_vector, round_score | limit 5 ; @@ -142,22 +150,23 @@ required_capability: knn_function from colors metadata _score | eval composed_name = locate(color, " ") > 0 -| where knn(rgb_vector, [128,128,0]) -| sort _score, color desc +| where knn(rgb_vector, [128,128,0], {"k": 140}) +| sort _score desc, color asc | keep color, composed_name +| limit 10 ; color:text | composed_name:boolean -peru | false -yellow green | true -chocolate | false -dim gray | true -saddle brown | true -sienna | false -dark golden rod | true -dark olive green | true -olive drab | true -olive | false +olive | false +olive drab | true +dark olive green | true +dark golden rod | true +sienna | false +saddle brown | true +dim gray | true +chocolate | false +yellow green | true +peru | false ; knnWithConjunction @@ -165,19 +174,23 @@ required_capability: knn_function # TODO We need kNN prefiltering here so we get more candidates that pass the filter from colors metadata _score -| where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*" +| where knn(rgb_vector, [255,255,238], {"k": 140}) and hex_code like "#FFF*" +| sort _score desc, color asc | keep color, hex_code, rgb_vector ; ignoreOrder:true -color:text | hex_code: keyword | rgb_vector:dense_vector -light yellow | #FFFFE0 | [255.0, 255.0, 224.0] -lavender blush | #FFF0F5 | [255.0, 240.0, 245.0] -sea shell | #FFF5EE | [255.0, 245.0, 238.0] -floral white | #FFFAF0 | [255.0, 250.0, 240.0] -ivory | #FFFFF0 | [255.0, 255.0, 240.0] -snow | #FFFAFA | [255.0, 250.0, 250.0] -white | #FFFFFF | [255.0, 255.0, 255.0] +color:text | hex_code:keyword | rgb_vector:dense_vector +corn silk | #FFF8DC | [255.0, 248.0, 220.0] +floral white | #FFFAF0 | [255.0, 250.0, 240.0] +ivory | #FFFFF0 | [255.0, 255.0, 240.0] +lavender blush | #FFF0F5 | [255.0, 240.0, 245.0] +lemon chiffon | #FFFACD | [255.0, 250.0, 205.0] +light yellow | #FFFFE0 | [255.0, 255.0, 224.0] +sea shell | #FFF5EE | [255.0, 245.0, 238.0] +snow | #FFFAFA | [255.0, 250.0, 250.0] +white | #FFFFFF | [255.0, 255.0, 255.0] +yellow | #FFFF00 | [255.0, 255.0, 0.0] ; knnWithDisjunctionAndFiltersConjunction @@ -185,27 +198,22 @@ required_capability: knn_function # TODO We need kNN prefiltering here so we get more candidates that pass the filter from colors metadata _score -| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true +| where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [128, 0, 255], {"k": 140})) and primary == true | keep color, rgb_vector, _score +| sort _score desc, color asc +| limit 10 ; color:text | rgb_vector:dense_vector | _score:double -cyan | [0.0, 255.0, 255.0] | 1.0 -blue | [0.0, 0.0, 255.0] | 9.922293975250795E-5 -; - -knnWithDisjunctionAndConjunction -required_capability: knn_function -required_capability: full_text_functions_disjunctions - -# TODO We need kNN prefiltering here so we get more candidates that pass the filter -from colors metadata _score -| where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0]) -| keep color, rgb_vector, _score -; - -color:text | rgb_vector:dense_vector | _score:double -medium spring green | [0.0, 250.0, 154.0] | 1.6871128173079342E-4 +red | [255.0, 0.0, 0.0] | 2.1994377675582655E-5 +yellow | [255.0, 255.0, 0.0] | 1.867113314801827E-5 +green | [0.0, 128.0, 0.0] | 2.9579907277366146E-5 +cyan | [0.0, 255.0, 255.0] | 1.000016689300537 +blue | [0.0, 0.0, 255.0] | 1.1921183613594621E-4 +magenta | [255.0, 0.0, 255.0] | 7.578763325000182E-5 +black | [0.0, 0.0, 0.0] | 2.6632071239873767E-5 +gray | [128.0, 128.0, 128.0] | 6.426929758163169E-5 +white | [255.0, 255.0, 255.0] | 3.5320219467394054E-5 ; knnWithNonPushableConjunction @@ -213,9 +221,10 @@ required_capability: knn_function from colors metadata _score | eval composed_name = locate(color, " ") > 0 -| where knn(rgb_vector, [128,128,0]) and composed_name == false +| where knn(rgb_vector, [128,128,0], {"k": 140}) and composed_name == false | eval round_score = round(_score, 4) | keep color, composed_name, round_score +| sort round_score desc, color asc ; color:text | composed_name:boolean | round_score:double @@ -229,7 +238,7 @@ testKnnWithNonPushableDisjunctions required_capability: knn_function from colors metadata _score -| where knn(rgb_vector, [128,128,0], {"k": 5}) or length(color) > 17 +| where knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 30}) or length(color) > 17 | sort _score desc, color asc | eval round_score = round(_score, 4) | keep color, round_score @@ -238,9 +247,6 @@ from colors metadata _score color:text | round_score:double olive | 1.0 olive drab | 0.0014 -dark olive green | 4.0E-4 -dark golden rod | 3.0E-4 -sienna | 3.0E-4 light golden rod yellow | 0.0 medium aqua marine | 0.0 medium spring green | 0.0 @@ -250,25 +256,16 @@ testKnnWithNonPushableDisjunctionsOnComplexExpressions required_capability: knn_function from colors metadata _score -| where (knn(rgb_vector, [128,128,0]) and length(color) > 12) or (knn(rgb_vector, [128,0,128]) and primary == false) +| where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 40}) and length(color) > 5) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 40}) and primary == false) | sort _score desc | eval round_score = round(_score, 4) | keep color, primary, round_score ; -color: text | primary: boolean | round_score: double -purple | false | 1.0 -dark magenta | false | 0.0045 -dark olive green | false | 4.0E-4 -indigo | false | 4.0E-4 -dark golden rod | false | 3.0E-4 -dim gray | false | 3.0E-4 -dark slate blue | false | 2.0E-4 -medium violet red | false | 2.0E-4 -dark orchid | false | 1.0E-4 -dark violet | false | 1.0E-4 -brown | false | 1.0E-4 -blue violet | false | 1.0E-4 +color:text | primary:boolean | round_score:double +purple | false | 1.0 +dark magenta | false | 0.0045 +olive drab | false | 0.0014 ; testKnnInStatsNonPushable From dc7154958e900113c94606b811e0334d190acd1c Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 13:51:38 +0200 Subject: [PATCH 52/64] Fix test for serverless / multi cluster --- .../qa/testFixtures/src/main/resources/knn-function.csv-spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index ee51b768f46de..17e44c47ca9f7 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -34,7 +34,7 @@ required_capability: knn_function // tag::knn-function-options[] from colors metadata _score | where knn(rgb_vector, [0,255,255], {"k": 4}) -| sort _score desc +| sort _score desc, color asc // end::knn-function-options[] | keep color, rgb_vector | limit 4 From 97b6c63165e8f783c043ab80bc8f21600b472698 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Wed, 4 Jun 2025 17:04:39 +0200 Subject: [PATCH 53/64] Replacing scores with round to avoid rounding errors --- .../src/main/resources/knn-function.csv-spec | 79 ++++++++++--------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 17e44c47ca9f7..fcbec7ea7eebe 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -110,19 +110,18 @@ required_capability: knn_function from colors metadata _score | keep rgb_vector, color, _score -| where knn(rgb_vector, [128,128,0], {"k": 140}) -| eval round_score = round(_score, 4) -| sort round_score desc, color asc -| keep rgb_vector, round_score +| where knn(rgb_vector, [128,255,0], {"k": 140}) +| sort _score desc, color asc +| keep rgb_vector | limit 5 ; -rgb_vector:dense_vector | round_score:double -[128.0, 128.0, 0.0] | 1.0 -[107.0, 142.0, 35.0] | 0.0014 -[85.0, 107.0, 47.0] | 4.0E-4 -[184.0, 134.0, 11.0] | 3.0E-4 -[139.0, 69.0, 19.0] | 3.0E-4 +rgb_vector:dense_vector +[127.0, 255.0, 0.0] +[124.0, 252.0, 0.0] +[173.0, 255.0, 47.0] +[154.0, 205.0, 50.0] +[50.0, 205.0, 50.0] ; knnAfterDrop @@ -130,19 +129,18 @@ required_capability: knn_function from colors metadata _score | drop color -| where knn(rgb_vector, [128,128,0], {"k": 140}) -| eval round_score = round(_score, 4) -| sort round_score desc -| keep rgb_vector, round_score +| where knn(rgb_vector, [128,255,0], {"k": 140}) +| sort _score desc +| keep rgb_vector | limit 5 ; -rgb_vector:dense_vector | round_score:double -[184.0, 134.0, 11.0] | 3.0E-4 -[128.0, 128.0, 0.0] | 1.0 -[154.0, 205.0, 50.0] | 1.0E-4 -[85.0, 107.0, 47.0] | 4.0E-4 -[107.0, 142.0, 35.0] | 0.0014 +rgb_vector:dense_vector +[127.0, 255.0, 0.0] +[124.0, 252.0, 0.0] +[173.0, 255.0, 47.0] +[154.0, 205.0, 50.0] +[50.0, 205.0, 50.0] ; knnAfterEval @@ -201,19 +199,21 @@ from colors metadata _score | where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [128, 0, 255], {"k": 140})) and primary == true | keep color, rgb_vector, _score | sort _score desc, color asc +| eval round_score = round(_score) +| drop _score | limit 10 ; -color:text | rgb_vector:dense_vector | _score:double -red | [255.0, 0.0, 0.0] | 2.1994377675582655E-5 -yellow | [255.0, 255.0, 0.0] | 1.867113314801827E-5 -green | [0.0, 128.0, 0.0] | 2.9579907277366146E-5 -cyan | [0.0, 255.0, 255.0] | 1.000016689300537 -blue | [0.0, 0.0, 255.0] | 1.1921183613594621E-4 -magenta | [255.0, 0.0, 255.0] | 7.578763325000182E-5 -black | [0.0, 0.0, 0.0] | 2.6632071239873767E-5 -gray | [128.0, 128.0, 128.0] | 6.426929758163169E-5 -white | [255.0, 255.0, 255.0] | 3.5320219467394054E-5 +color:text | rgb_vector:dense_vector | round_score:double +cyan | [0.0, 255.0, 255.0] | 1.0 +blue | [0.0, 0.0, 255.0] | 0.0 +magenta | [255.0, 0.0, 255.0] | 0.0 +gray | [128.0, 128.0, 128.0] | 0.0 +white | [255.0, 255.0, 255.0] | 0.0 +green | [0.0, 128.0, 0.0] | 0.0 +black | [0.0, 0.0, 0.0] | 0.0 +red | [255.0, 0.0, 0.0] | 0.0 +yellow | [255.0, 255.0, 0.0] | 0.0 ; knnWithNonPushableConjunction @@ -222,16 +222,23 @@ required_capability: knn_function from colors metadata _score | eval composed_name = locate(color, " ") > 0 | where knn(rgb_vector, [128,128,0], {"k": 140}) and composed_name == false -| eval round_score = round(_score, 4) +| eval round_score = round(_score) +| sort _score desc, color asc | keep color, composed_name, round_score -| sort round_score desc, color asc +| limit 10 ; color:text | composed_name:boolean | round_score:double -olive | false | 1.0 -sienna | false | 3.0E-4 -chocolate | false | 1.0E-4 -peru | false | 1.0E-4 +olive | false | 1.0 +sienna | false | 0.0 +chocolate | false | 0.0 +peru | false | 0.0 +brown | false | 0.0 +gray | false | 0.0 +green | false | 0.0 +maroon | false | 0.0 +firebrick | false | 0.0 +chartreuse | false | 0.0 ; testKnnWithNonPushableDisjunctions From d824faae0cbe7928f3b3c1082623f92f0cb6cfe4 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 5 Jun 2025 09:41:59 +0200 Subject: [PATCH 54/64] Replacing scores with round to avoid rounding errors --- .../src/main/resources/knn-function.csv-spec | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index fcbec7ea7eebe..602dff51bdab2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -71,22 +71,22 @@ required_capability: knn_function from colors metadata _score | where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 140}) | sort _score desc -| eval round_score = round(_score, 4) +| eval round_score = round(_score) | keep color, rgb_vector, round_score | limit 10 ; color:text | rgb_vector:dense_vector | round_score:double -violet | [238.0, 130.0, 238.0] | 13.9457 -blue violet | [138.0, 43.0, 226.0] | 3.0877 -dark violet | [148.0, 0.0, 211.0] | 3.0877 -pale violet red | [219.0, 112.0, 147.0] | 2.5366 -medium violet red | [199.0, 21.0, 133.0] | 2.5359 -orchid | [218.0, 112.0, 214.0] | 0.0083 -plum | [221.0, 160.0, 221.0] | 0.0071 -hot pink | [255.0, 105.0, 180.0] | 0.0024 -thistle | [216.0, 191.0, 216.0] | 0.0021 -light pink | [255.0, 182.0, 193.0] | 0.0021 +violet | [238.0, 130.0, 238.0] | 14.0 +blue violet | [138.0, 43.0, 226.0] | 3.0 +dark violet | [148.0, 0.0, 211.0] | 3.0 +pale violet red | [219.0, 112.0, 147.0] | 3.0 +medium violet red | [199.0, 21.0, 133.0] | 3.0 +orchid | [218.0, 112.0, 214.0] | 0.0 +plum | [221.0, 160.0, 221.0] | 0.0 +hot pink | [255.0, 105.0, 180.0] | 0.0 +thistle | [216.0, 191.0, 216.0] | 0.0 +light pink | [255.0, 182.0, 193.0] | 0.0 ; knnWithMultipleFunctions @@ -95,14 +95,14 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 140}) and match(color, "olive") | sort _score desc -| eval round_score = round(_score, 4) +| eval round_score = round(_score) | keep color, rgb_vector, round_score ; color:text | rgb_vector:dense_vector | round_score:double -olive | [128.0, 128.0, 0.0] | 5.4979 -olive drab | [107.0, 142.0, 35.0] | 3.5206 -dark olive green | [85.0, 107.0, 47.0] | 2.8906 +olive | [128.0, 128.0, 0.0] | 5.0 +olive drab | [107.0, 142.0, 35.0] | 4.0 +dark olive green | [85.0, 107.0, 47.0] | 3.0 ; knnAfterKeep @@ -247,13 +247,13 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 30}) or length(color) > 17 | sort _score desc, color asc -| eval round_score = round(_score, 4) +| eval round_score = round(_score) | keep color, round_score ; color:text | round_score:double olive | 1.0 -olive drab | 0.0014 +olive drab | 0.0 light golden rod yellow | 0.0 medium aqua marine | 0.0 medium spring green | 0.0 @@ -265,14 +265,14 @@ required_capability: knn_function from colors metadata _score | where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 40}) and length(color) > 5) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 40}) and primary == false) | sort _score desc -| eval round_score = round(_score, 4) +| eval round_score = round(_score) | keep color, primary, round_score ; color:text | primary:boolean | round_score:double purple | false | 1.0 -dark magenta | false | 0.0045 -olive drab | false | 0.0014 +dark magenta | false | 0.0 +olive drab | false | 0.0 ; testKnnInStatsNonPushable From 78aa6d0530997e18f8bdb8892f76f014f6d0601e Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 5 Jun 2025 11:12:00 +0200 Subject: [PATCH 55/64] Remove quantization for less brittle tests --- .../src/main/resources/knn-function.csv-spec | 47 +++++++++---------- .../src/main/resources/mapping-colors.json | 11 +++-- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 602dff51bdab2..aa808e769fdd3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -8,23 +8,23 @@ required_capability: knn_function // tag::knn-function[] from colors metadata _score | where knn(rgb_vector, [0, 120, 0]) -| sort _score desc +| sort _score desc, color asc // end::knn-function[] | keep color, rgb_vector ; // tag::knn-function-result[] color:text | rgb_vector:dense_vector -green | [0.0, 128.0, 0.0] -dark green | [0.0, 100.0, 0.0] -forest green | [34.0, 139.0, 34.0] -dark olive green | [85.0, 107.0, 47.0] -sea green | [46.0, 139.0, 87.0] -dark slate gray | [47.0, 79.0, 79.0] -olive drab | [107.0, 142.0, 35.0] -lime green | [50.0, 205.0, 50.0] -black | [0.0, 0.0, 0.0] -olive | [128.0, 128.0, 0.0] +green | [0.0, 128.0, 0.0] +dark green | [0.0, 100.0, 0.0] +forest green | [34.0, 139.0, 34.0] +dark olive green | [85.0, 107.0, 47.0] +sea green | [46.0, 139.0, 87.0] +dark slate gray | [47.0, 79.0, 79.0] +lime green | [50.0, 205.0, 50.0] +olive drab | [107.0, 142.0, 35.0] +black | [0.0, 0.0, 0.0] +olive | [128.0, 128.0, 0.0] // end::knn-function-result[] ; @@ -52,17 +52,16 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [255,192,203], {"k": 140, "similarity": 40}) -| sort _score desc +| sort _score desc, color asc | keep color, rgb_vector ; color:text | rgb_vector:dense_vector -pink | [255.0, 192.0, 203.0] -light pink | [255.0, 182.0, 193.0] -peach puff | [255.0, 218.0, 185.0] -bisque | [255.0, 228.0, 196.0] -thistle | [216.0, 191.0, 216.0] -wheat | [245.0, 222.0, 179.0] +pink | [255.0, 192.0, 203.0] +light pink | [255.0, 182.0, 193.0] +peach puff | [255.0, 218.0, 185.0] +bisque | [255.0, 228.0, 196.0] +wheat | [245.0, 222.0, 179.0] ; knnHybridSearch @@ -157,14 +156,14 @@ from colors metadata _score color:text | composed_name:boolean olive | false olive drab | true -dark olive green | true dark golden rod | true -sienna | false saddle brown | true -dim gray | true +dark olive green | true +sienna | false chocolate | false yellow green | true peru | false +forest green | true ; knnWithConjunction @@ -234,11 +233,11 @@ sienna | false | 0.0 chocolate | false | 0.0 peru | false | 0.0 brown | false | 0.0 +firebrick | false | 0.0 +chartreuse | false | 0.0 gray | false | 0.0 green | false | 0.0 maroon | false | 0.0 -firebrick | false | 0.0 -chartreuse | false | 0.0 ; testKnnWithNonPushableDisjunctions @@ -253,7 +252,6 @@ from colors metadata _score color:text | round_score:double olive | 1.0 -olive drab | 0.0 light golden rod yellow | 0.0 medium aqua marine | 0.0 medium spring green | 0.0 @@ -272,7 +270,6 @@ from colors metadata _score color:text | primary:boolean | round_score:double purple | false | 1.0 dark magenta | false | 0.0 -olive drab | false | 0.0 ; testKnnInStatsNonPushable diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json index e5c5e7d65fd1e..24c4102e428f8 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json @@ -6,12 +6,15 @@ "hex_code": { "type": "keyword" }, - "rgb_vector": { - "type": "dense_vector", - "similarity": "l2_norm" - }, "primary": { "type": "boolean" + }, + "rgb_vector": { + "type": "dense_vector", + "similarity": "l2_norm", + "index_options": { + "type": "hnsw" + } } } } From 47b91f5f9c8aefee74d679d2e7239a93a1ee774d Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 5 Jun 2025 12:48:00 +0200 Subject: [PATCH 56/64] Add LIMIT to avoid multi cluster test failures --- .../qa/testFixtures/src/main/resources/knn-function.csv-spec | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index aa808e769fdd3..af9222d18d3af 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -11,6 +11,7 @@ from colors metadata _score | sort _score desc, color asc // end::knn-function[] | keep color, rgb_vector +| limit 10 ; // tag::knn-function-result[] @@ -174,6 +175,7 @@ from colors metadata _score | where knn(rgb_vector, [255,255,238], {"k": 140}) and hex_code like "#FFF*" | sort _score desc, color asc | keep color, hex_code, rgb_vector +| limit 10 ; ignoreOrder:true From 52f057b7463616554abb1aee8e664e6fb95f4d0f Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 5 Jun 2025 14:21:43 +0200 Subject: [PATCH 57/64] I give up on testing scores. You win, multiple shards on serverless. --- .../src/main/resources/knn-function.csv-spec | 104 +++++++++--------- 1 file changed, 49 insertions(+), 55 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index af9222d18d3af..a3e478d916ea3 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -71,22 +71,21 @@ required_capability: knn_function from colors metadata _score | where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 140}) | sort _score desc -| eval round_score = round(_score) -| keep color, rgb_vector, round_score +| keep color, rgb_vector | limit 10 ; -color:text | rgb_vector:dense_vector | round_score:double -violet | [238.0, 130.0, 238.0] | 14.0 -blue violet | [138.0, 43.0, 226.0] | 3.0 -dark violet | [148.0, 0.0, 211.0] | 3.0 -pale violet red | [219.0, 112.0, 147.0] | 3.0 -medium violet red | [199.0, 21.0, 133.0] | 3.0 -orchid | [218.0, 112.0, 214.0] | 0.0 -plum | [221.0, 160.0, 221.0] | 0.0 -hot pink | [255.0, 105.0, 180.0] | 0.0 -thistle | [216.0, 191.0, 216.0] | 0.0 -light pink | [255.0, 182.0, 193.0] | 0.0 +color:text | rgb_vector:dense_vector +violet | [238.0, 130.0, 238.0] +blue violet | [138.0, 43.0, 226.0] +dark violet | [148.0, 0.0, 211.0] +pale violet red | [219.0, 112.0, 147.0] +medium violet red | [199.0, 21.0, 133.0] +orchid | [218.0, 112.0, 214.0] +plum | [221.0, 160.0, 221.0] +hot pink | [255.0, 105.0, 180.0] +thistle | [216.0, 191.0, 216.0] +light pink | [255.0, 182.0, 193.0] ; knnWithMultipleFunctions @@ -95,14 +94,13 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 140}) and match(color, "olive") | sort _score desc -| eval round_score = round(_score) -| keep color, rgb_vector, round_score +| keep color, rgb_vector ; -color:text | rgb_vector:dense_vector | round_score:double -olive | [128.0, 128.0, 0.0] | 5.0 -olive drab | [107.0, 142.0, 35.0] | 4.0 -dark olive green | [85.0, 107.0, 47.0] | 3.0 +color:text | rgb_vector:dense_vector +olive | [128.0, 128.0, 0.0] +olive drab | [107.0, 142.0, 35.0] +dark olive green | [85.0, 107.0, 47.0] ; knnAfterKeep @@ -200,21 +198,20 @@ from colors metadata _score | where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [128, 0, 255], {"k": 140})) and primary == true | keep color, rgb_vector, _score | sort _score desc, color asc -| eval round_score = round(_score) | drop _score | limit 10 ; -color:text | rgb_vector:dense_vector | round_score:double -cyan | [0.0, 255.0, 255.0] | 1.0 -blue | [0.0, 0.0, 255.0] | 0.0 -magenta | [255.0, 0.0, 255.0] | 0.0 -gray | [128.0, 128.0, 128.0] | 0.0 -white | [255.0, 255.0, 255.0] | 0.0 -green | [0.0, 128.0, 0.0] | 0.0 -black | [0.0, 0.0, 0.0] | 0.0 -red | [255.0, 0.0, 0.0] | 0.0 -yellow | [255.0, 255.0, 0.0] | 0.0 +color:text | rgb_vector:dense_vector +cyan | [0.0, 255.0, 255.0] +blue | [0.0, 0.0, 255.0] +magenta | [255.0, 0.0, 255.0] +gray | [128.0, 128.0, 128.0] +white | [255.0, 255.0, 255.0] +green | [0.0, 128.0, 0.0] +black | [0.0, 0.0, 0.0] +red | [255.0, 0.0, 0.0] +yellow | [255.0, 255.0, 0.0] ; knnWithNonPushableConjunction @@ -223,23 +220,22 @@ required_capability: knn_function from colors metadata _score | eval composed_name = locate(color, " ") > 0 | where knn(rgb_vector, [128,128,0], {"k": 140}) and composed_name == false -| eval round_score = round(_score) | sort _score desc, color asc -| keep color, composed_name, round_score +| keep color, composed_name | limit 10 ; -color:text | composed_name:boolean | round_score:double -olive | false | 1.0 -sienna | false | 0.0 -chocolate | false | 0.0 -peru | false | 0.0 -brown | false | 0.0 -firebrick | false | 0.0 -chartreuse | false | 0.0 -gray | false | 0.0 -green | false | 0.0 -maroon | false | 0.0 +color:text | composed_name:boolean +olive | false +sienna | false +chocolate | false +peru | false +brown | false +firebrick | false +chartreuse | false +gray | false +green | false +maroon | false ; testKnnWithNonPushableDisjunctions @@ -248,15 +244,14 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 30}) or length(color) > 17 | sort _score desc, color asc -| eval round_score = round(_score) -| keep color, round_score +| keep color ; -color:text | round_score:double -olive | 1.0 -light golden rod yellow | 0.0 -medium aqua marine | 0.0 -medium spring green | 0.0 +color:text +olive +light golden rod yellow +medium aqua marine +medium spring green ; testKnnWithNonPushableDisjunctionsOnComplexExpressions @@ -265,13 +260,12 @@ required_capability: knn_function from colors metadata _score | where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 40}) and length(color) > 5) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 40}) and primary == false) | sort _score desc -| eval round_score = round(_score) -| keep color, primary, round_score +| keep color, primary ; -color:text | primary:boolean | round_score:double -purple | false | 1.0 -dark magenta | false | 0.0 +color:text | primary:boolean +purple | false +dark magenta | false ; testKnnInStatsNonPushable From 68ec878bc669765e108990b48fcadfaab3344969 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Thu, 5 Jun 2025 18:10:05 +0200 Subject: [PATCH 58/64] Some tests make no sense as we're not deduplicating --- .../src/main/resources/knn-function.csv-spec | 31 ++----------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index a3e478d916ea3..b7f1c29fd4ad5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -70,7 +70,7 @@ required_capability: knn_function from colors metadata _score | where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 140}) -| sort _score desc +| sort _score desc, color asc | keep color, rgb_vector | limit 10 ; @@ -93,7 +93,7 @@ required_capability: knn_function from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 140}) and match(color, "olive") -| sort _score desc +| sort _score desc, color asc | keep color, rgb_vector ; @@ -259,7 +259,7 @@ required_capability: knn_function from colors metadata _score | where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 40}) and length(color) > 5) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 40}) and primary == false) -| sort _score desc +| sort _score desc, color asc | keep color, primary ; @@ -280,18 +280,6 @@ c: long 59 ; -testKnnInStatsPushableAndNonPushable -required_capability: knn_function -required_capability: full_text_functions_in_stats_where - -from colors metadata _score -| stats c = count(*) where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [0, 0, 255])) and knn(rgb_vector, [0, 255, 0], {"k": 40}) -; - -c:long -40 -; - testKnnInStatsWithGrouping required_capability: knn_function required_capability: full_text_functions_in_stats_where @@ -305,16 +293,3 @@ c: long | primary: boolean 50 | false 9 | true ; - -testKnnInStatsPushable -required_capability: knn_function -required_capability: full_text_functions_in_stats_where - -from colors -| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 40}) -; - -# No surprises, gets the number of top k -c:long -40 -; From 12fb39c67057755d75bfa45be616721c597dc909 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 6 Jun 2025 10:16:13 +0200 Subject: [PATCH 59/64] Fixing test for serverless. Again. --- .../src/main/resources/knn-function.csv-spec | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index b7f1c29fd4ad5..dcd3913a4a917 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -69,23 +69,23 @@ knnHybridSearch required_capability: knn_function from colors metadata _score -| where match(color, "violet") or knn(rgb_vector, [238,130,238], {"boost": 10.0, "k": 140}) +| where match(color, "blue") or knn(rgb_vector, [65,105,225], {"k": 140}) +| where primary == true | sort _score desc, color asc | keep color, rgb_vector | limit 10 ; -color:text | rgb_vector:dense_vector -violet | [238.0, 130.0, 238.0] -blue violet | [138.0, 43.0, 226.0] -dark violet | [148.0, 0.0, 211.0] -pale violet red | [219.0, 112.0, 147.0] -medium violet red | [199.0, 21.0, 133.0] -orchid | [218.0, 112.0, 214.0] -plum | [221.0, 160.0, 221.0] -hot pink | [255.0, 105.0, 180.0] -thistle | [216.0, 191.0, 216.0] -light pink | [255.0, 182.0, 193.0] +color:text | rgb_vector:dense_vector +blue | [0.0, 0.0, 255.0] +gray | [128.0, 128.0, 128.0] +cyan | [0.0, 255.0, 255.0] +magenta | [255.0, 0.0, 255.0] +green | [0.0, 128.0, 0.0] +white | [255.0, 255.0, 255.0] +black | [0.0, 0.0, 0.0] +red | [255.0, 0.0, 0.0] +yellow | [255.0, 255.0, 0.0] ; knnWithMultipleFunctions From 36c26a5f34900c2a289051db207b81371de6f1dc Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 6 Jun 2025 12:55:52 +0200 Subject: [PATCH 60/64] Add test for null dimensions --- .../xpack/esql/DenseVectorFieldTypeIT.java | 62 ++++++++++++++----- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java index 12631fdeaed5b..e30f71fc7a1e2 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java @@ -127,9 +127,55 @@ public void testRetrieveDenseVectorFieldData() { } } + public void testNonIndexedDenseVectorField() throws IOException { + createIndexWithDenseVector("no_dense_vectors"); + + int numDocs = randomIntBetween(10, 100); + IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; + for (int i = 0; i < numDocs; i++) { + docs[i] = prepareIndex("no_dense_vectors").setId("" + i).setSource("id", String.valueOf(i)); + } + + indexRandom(true, docs); + + var query = """ + FROM no_dense_vectors + | KEEP id, vector + """; + + try (var resp = run(query)) { + List> valuesList = EsqlTestUtils.getValuesList(resp); + assertEquals(numDocs, valuesList.size()); + valuesList.forEach(value -> { + assertEquals(2, value.size()); + Integer id = (Integer) value.get(0); + assertNotNull(id); + Object vector = value.get(1); + assertNull(vector); + }); + } + } + @Before public void setup() throws IOException { - var indexName = "test"; + createIndexWithDenseVector("test"); + + int numDims = randomIntBetween(32, 64) * 2; // min 64, even number + int numDocs = randomIntBetween(10, 100); + IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; + for (int i = 0; i < numDocs; i++) { + List vector = new ArrayList<>(numDims); + for (int j = 0; j < numDims; j++) { + vector.add(randomFloat()); + } + docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector); + indexedVectors.put(i, vector); + } + + indexRandom(true, docs); + } + + private void createIndexWithDenseVector(String indexName) throws IOException { var client = client().admin().indices(); XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() @@ -159,19 +205,5 @@ public void setup() throws IOException { .setMapping(mapping) .setSettings(settingsBuilder.build()); assertAcked(CreateRequest); - - int numDims = randomIntBetween(32, 64) * 2; // min 64, even number - int numDocs = randomIntBetween(10, 100); - IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; - for (int i = 0; i < numDocs; i++) { - List vector = new ArrayList<>(numDims); - for (int j = 0; j < numDims; j++) { - vector.add(randomFloat()); - } - docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector); - indexedVectors.put(i, vector); - } - - indexRandom(true, docs); } } From c5b129280ced8a0e88218d179f94f4d7766e71d6 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 6 Jun 2025 17:16:57 +0200 Subject: [PATCH 61/64] Remove colors that have duplicate names to help with matching and scoring --- .../src/main/resources/data/colors.csv | 79 ------------ .../src/main/resources/knn-function.csv-spec | 117 +++++++++--------- 2 files changed, 56 insertions(+), 140 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv index 7922ff5d2ccb1..b82ef7087a54c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/colors.csv @@ -1,139 +1,60 @@ color:text,hex_code:keyword,rgb_vector:dense_vector,primary:boolean maroon, #800000, [128,0,0], false -dark red, #8B0000, [139,0,0], false brown, #A52A2A, [165,42,42], false firebrick, #B22222, [178,34,34], false crimson, #DC143C, [220,20,60], false red, #FF0000, [255,0,0], true tomato, #FF6347, [255,99,71], false coral, #FF7F50, [255,127,80], false -indian red, #CD5C5C, [205,92,92], false -light coral, #F08080, [240,128,128], false -dark salmon, #E9967A, [233,150,122], false salmon, #FA8072, [250,128,114], false -light salmon, #FFA07A, [255,160,122], false -orange red, #FF4500, [255,69,0], false -dark orange, #FF8C00, [255,140,0], false orange, #FFA500, [255,165,0], false gold, #FFD700, [255,215,0], false -dark golden rod, #B8860B, [184,134,11], false golden rod, #DAA520, [218,165,32], false -pale golden rod, #EEE8AA, [238,232,170], false -dark khaki, #BDB76B, [189,183,107], false khaki, #F0E68C, [240,230,140], false olive, #808000, [128,128,0], false yellow, #FFFF00, [255,255,0], true -yellow green, #9ACD32, [154,205,50], false -dark olive green, #556B2F, [85,107,47], false -olive drab, #6B8E23, [107,142,35], false -lawn green, #7CFC00, [124,252,0], false chartreuse, #7FFF00, [127,255,0], false -green yellow, #ADFF2F, [173,255,47], false -dark green, #006400, [0,100,0], false green, #008000, [0,128,0], true -forest green, #228B22, [34,139,34], false lime, #00FF00, [0,255,0], false -lime green, #32CD32, [50,205,50], false -light green, #90EE90, [144,238,144], false -pale green, #98FB98, [152,251,152], false -dark sea green, #8FBC8F, [143,188,143], false -medium spring green, #00FA9A, [0,250,154], false -spring green, #00FF7F, [0,255,127], false -sea green, #2E8B57, [46,139,87], false -medium aqua marine, #66CDAA, [102,205,170], false -medium sea green, #3CB371, [60,179,113], false -light sea green, #20B2AA, [32,178,170], false -dark slate gray, #2F4F4F, [47,79,79], false teal, #008080, [0,128,128], false -dark cyan, #008B8B, [0,139,139], false cyan, #00FFFF, [0,255,255], true -light cyan, #E0FFFF, [224,255,255], false -dark turquoise, #00CED1, [0,206,209], false turquoise, #40E0D0, [64,224,208], false -medium turquoise, #48D1CC, [72,209,204], false -pale turquoise, #AFEEEE, [175,238,238], false aqua marine, #7FFFD4, [127,255,212], false -powder blue, #B0E0E6, [176,224,230], false -cadet blue, #5F9EA0, [95,158,160], false -steel blue, #4682B4, [70,130,180], false -corn flower blue, #6495ED, [100,149,237], false -deep sky blue, #00BFFF, [0,191,255], false -dodger blue, #1E90FF, [30,144,255], false -light blue, #ADD8E6, [173,216,230], false -sky blue, #87CEEB, [135,206,235], false -light sky blue, #87CEFA, [135,206,250], false -midnight blue, #191970, [25,25,112], false navy, #000080, [0,0,128], false -dark blue, #00008B, [0,0,139], false -medium blue, #0000CD, [0,0,205], false blue, #0000FF, [0,0,255], true -royal blue, #4169E1, [65,105,225], false -blue violet, #8A2BE2, [138,43,226], false indigo, #4B0082, [75,0,130], false -dark slate blue, #483D8B, [72,61,139], false -slate blue, #6A5ACD, [106,90,205], false -medium slate blue, #7B68EE, [123,104,238], false -medium purple, #9370DB, [147,112,219], false -dark magenta, #8B008B, [139,0,139], false -dark violet, #9400D3, [148,0,211], false -dark orchid, #9932CC, [153,50,204], false -medium orchid, #BA55D3, [186,85,211], false purple, #800080, [128,0,128], false thistle, #D8BFD8, [216,191,216], false plum, #DDA0DD, [221,160,221], false violet, #EE82EE, [238,130,238], false magenta, #FF00FF, [255,0,255], true orchid, #DA70D6, [218,112,214], false -medium violet red, #C71585, [199,21,133], false -pale violet red, #DB7093, [219,112,147], false -deep pink, #FF1493, [255,20,147], false -hot pink, #FF69B4, [255,105,180], false -light pink, #FFB6C1, [255,182,193], false pink, #FFC0CB, [255,192,203], false -antique white, #FAEBD7, [250,235,215], false beige, #F5F5DC, [245,245,220], false bisque, #FFE4C4, [255,228,196], false -blanched almond, #FFEBCD, [255,235,205], false wheat, #F5DEB3, [245,222,179], false corn silk, #FFF8DC, [255,248,220], false lemon chiffon, #FFFACD, [255,250,205], false -light golden rod yellow, #FAFAD2, [250,250,210], false -light yellow, #FFFFE0, [255,255,224], false -saddle brown, #8B4513, [139,69,19], false sienna, #A0522D, [160,82,45], false chocolate, #D2691E, [210,105,30], false peru, #CD853F, [205,133,63], false -sandy brown, #F4A460, [244,164,96], false burly wood, #DEB887, [222,184,135], false tan, #D2B48C, [210,180,140], false -rosy brown, #BC8F8F, [188,143,143], false moccasin, #FFE4B5, [255,228,181], false -navajo white, #FFDEAD, [255,222,173], false peach puff, #FFDAB9, [255,218,185], false misty rose, #FFE4E1, [255,228,225], false -lavender blush, #FFF0F5, [255,240,245], false linen, #FAF0E6, [250,240,230], false old lace, #FDF5E6, [253,245,230], false papaya whip, #FFEFD5, [255,239,213], false sea shell, #FFF5EE, [255,245,238], false mint cream, #F5FFFA, [245,255,250], false -slate gray, #708090, [112,128,144], false -light slate gray, #778899, [119,136,153], false -light steel blue, #B0C4DE, [176,196,222], false lavender, #E6E6FA, [230,230,250], false -floral white, #FFFAF0, [255,250,240], false -alice blue, #F0F8FF, [240,248,255], false -ghost white, #F8F8FF, [248,248,255], false honeydew, #F0FFF0, [240,255,240], false ivory, #FFFFF0, [255,255,240], false azure, #F0FFFF, [240,255,255], false snow, #FFFAFA, [255,250,250], false black, #000000, [0,0,0], true -dim gray, #696969, [105,105,105], false gray, #808080, [128,128,128], true -dark gray, #A9A9A9, [169,169,169], false silver, #C0C0C0, [192,192,192], false -light gray, #D3D3D3, [211,211,211], false gainsboro, #DCDCDC, [220,220,220], false -white smoke, #F5F5F5, [245,245,245], false white, #FFFFFF, [255,255,255], true diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index dcd3913a4a917..0916e2fec8e18 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -15,17 +15,17 @@ from colors metadata _score ; // tag::knn-function-result[] -color:text | rgb_vector:dense_vector -green | [0.0, 128.0, 0.0] -dark green | [0.0, 100.0, 0.0] -forest green | [34.0, 139.0, 34.0] -dark olive green | [85.0, 107.0, 47.0] -sea green | [46.0, 139.0, 87.0] -dark slate gray | [47.0, 79.0, 79.0] -lime green | [50.0, 205.0, 50.0] -olive drab | [107.0, 142.0, 35.0] -black | [0.0, 0.0, 0.0] -olive | [128.0, 128.0, 0.0] +color:text | rgb_vector:dense_vector +green | [0.0, 128.0, 0.0] +black | [0.0, 0.0, 0.0] +olive | [128.0, 128.0, 0.0] +teal | [0.0, 128.0, 128.0] +lime | [0.0, 255.0, 0.0] +sienna | [160.0, 82.0, 45.0] +maroon | [128.0, 0.0, 0.0] +navy | [0.0, 0.0, 128.0] +gray | [128.0, 128.0, 128.0] +chartreuse | [127.0, 255.0, 0.0] // end::knn-function-result[] ; @@ -41,11 +41,11 @@ from colors metadata _score | limit 4 ; -color:text | rgb_vector:dense_vector -cyan | [0.0, 255.0, 255.0] -deep sky blue | [0.0, 191.0, 255.0] -dark turquoise | [0.0, 206.0, 209.0] -turquoise | [64.0, 224.0, 208.0] +color:text | rgb_vector:dense_vector +cyan | [0.0, 255.0, 255.0] +turquoise | [64.0, 224.0, 208.0] +aqua marine | [127.0, 255.0, 212.0] +teal | [0.0, 128.0, 128.0] ; knnSearchWithSimilarityOption @@ -57,12 +57,12 @@ from colors metadata _score | keep color, rgb_vector ; -color:text | rgb_vector:dense_vector +color:text | rgb_vector:dense_vector pink | [255.0, 192.0, 203.0] -light pink | [255.0, 182.0, 193.0] peach puff | [255.0, 218.0, 185.0] bisque | [255.0, 228.0, 196.0] wheat | [245.0, 222.0, 179.0] + ; knnHybridSearch @@ -99,8 +99,6 @@ from colors metadata _score color:text | rgb_vector:dense_vector olive | [128.0, 128.0, 0.0] -olive drab | [107.0, 142.0, 35.0] -dark olive green | [85.0, 107.0, 47.0] ; knnAfterKeep @@ -116,10 +114,10 @@ from colors metadata _score rgb_vector:dense_vector [127.0, 255.0, 0.0] -[124.0, 252.0, 0.0] -[173.0, 255.0, 47.0] -[154.0, 205.0, 50.0] -[50.0, 205.0, 50.0] +[128.0, 128.0, 0.0] +[255.0, 255.0, 0.0] +[0.0, 255.0, 0.0] +[218.0, 165.0, 32.0] ; knnAfterDrop @@ -134,11 +132,11 @@ from colors metadata _score ; rgb_vector:dense_vector -[127.0, 255.0, 0.0] -[124.0, 252.0, 0.0] -[173.0, 255.0, 47.0] -[154.0, 205.0, 50.0] -[50.0, 205.0, 50.0] +[127.0, 255.0, 0.0] +[255.0, 255.0, 0.0] +[128.0, 128.0, 0.0] +[0.0, 255.0, 0.0] +[218.0, 165.0, 32.0] ; knnAfterEval @@ -152,17 +150,17 @@ from colors metadata _score | limit 10 ; -color:text | composed_name:boolean -olive | false -olive drab | true -dark golden rod | true -saddle brown | true -dark olive green | true -sienna | false -chocolate | false -yellow green | true -peru | false -forest green | true +color:text | composed_name:boolean +olive | false +sienna | false +chocolate | false +peru | false +golden rod | true +brown | false +firebrick | false +chartreuse | false +green | false +maroon | false ; knnWithConjunction @@ -175,19 +173,15 @@ from colors metadata _score | keep color, hex_code, rgb_vector | limit 10 ; -ignoreOrder:true - -color:text | hex_code:keyword | rgb_vector:dense_vector -corn silk | #FFF8DC | [255.0, 248.0, 220.0] -floral white | #FFFAF0 | [255.0, 250.0, 240.0] -ivory | #FFFFF0 | [255.0, 255.0, 240.0] -lavender blush | #FFF0F5 | [255.0, 240.0, 245.0] -lemon chiffon | #FFFACD | [255.0, 250.0, 205.0] -light yellow | #FFFFE0 | [255.0, 255.0, 224.0] -sea shell | #FFF5EE | [255.0, 245.0, 238.0] -snow | #FFFAFA | [255.0, 250.0, 250.0] -white | #FFFFFF | [255.0, 255.0, 255.0] -yellow | #FFFF00 | [255.0, 255.0, 0.0] + +color:text | hex_code:keyword | rgb_vector:dense_vector +ivory | #FFFFF0 | [255.0, 255.0, 240.0] +sea shell | #FFF5EE | [255.0, 245.0, 238.0] +snow | #FFFAFA | [255.0, 250.0, 250.0] +white | #FFFFFF | [255.0, 255.0, 255.0] +corn silk | #FFF8DC | [255.0, 248.0, 220.0] +lemon chiffon | #FFFACD | [255.0, 250.0, 205.0] +yellow | #FFFF00 | [255.0, 255.0, 0.0] ; knnWithDisjunctionAndFiltersConjunction @@ -242,30 +236,31 @@ testKnnWithNonPushableDisjunctions required_capability: knn_function from colors metadata _score -| where knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 30}) or length(color) > 17 +| where knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 30}) or length(color) > 10 | sort _score desc, color asc | keep color ; color:text olive -light golden rod yellow -medium aqua marine -medium spring green +aqua marine +lemon chiffon +papaya whip ; testKnnWithNonPushableDisjunctionsOnComplexExpressions required_capability: knn_function from colors metadata _score -| where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 40}) and length(color) > 5) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 40}) and primary == false) +| where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 60}) and primary == false) | sort _score desc, color asc | keep color, primary ; color:text | primary:boolean +olive | false purple | false -dark magenta | false +indigo | false ; testKnnInStatsNonPushable @@ -277,7 +272,7 @@ from colors ; c: long -59 +50 ; testKnnInStatsWithGrouping @@ -290,6 +285,6 @@ from colors ; c: long | primary: boolean -50 | false +41 | false 9 | true ; From 49addf3c13c73fda04bceea40f30a6a3ccc89bf0 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Fri, 6 Jun 2025 17:47:25 +0200 Subject: [PATCH 62/64] More test fixing --- .../src/main/resources/knn-function.csv-spec | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 0916e2fec8e18..5e65e6269e652 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -124,19 +124,19 @@ knnAfterDrop required_capability: knn_function from colors metadata _score -| drop color -| where knn(rgb_vector, [128,255,0], {"k": 140}) -| sort _score desc -| keep rgb_vector +| drop primary +| where knn(rgb_vector, [128,250,0], {"k": 140}) +| sort _score desc, color asc +| keep color, rgb_vector | limit 5 ; -rgb_vector:dense_vector -[127.0, 255.0, 0.0] -[255.0, 255.0, 0.0] -[128.0, 128.0, 0.0] -[0.0, 255.0, 0.0] -[218.0, 165.0, 32.0] +color:text | rgb_vector: dense_vector +chartreuse | [127.0, 255.0, 0.0] +olive | [128.0, 128.0, 0.0] +yellow | [255.0, 255.0, 0.0] +golden rod | [218.0, 165.0, 32.0] +lime | [0.0, 255.0, 0.0] ; knnAfterEval @@ -147,7 +147,7 @@ from colors metadata _score | where knn(rgb_vector, [128,128,0], {"k": 140}) | sort _score desc, color asc | keep color, composed_name -| limit 10 +| limit 5 ; color:text | composed_name:boolean @@ -156,11 +156,6 @@ sienna | false chocolate | false peru | false golden rod | true -brown | false -firebrick | false -chartreuse | false -green | false -maroon | false ; knnWithConjunction From d1cd92c5595e6f8b448c649e573a4b78e825aae4 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 9 Jun 2025 09:47:17 +0200 Subject: [PATCH 63/64] Add check for knn availability --- .../org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index b3076175867aa..a262943909938 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -14,6 +14,7 @@ import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.junit.Before; import java.io.IOException; @@ -110,6 +111,8 @@ public void testKnnNonPushedDown() { @Before public void setup() throws IOException { + assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()); + var indexName = "test"; var client = client().admin().indices(); XContentBuilder mapping = XContentFactory.jsonBuilder() From aaf86844916c3bf19df3bd278f175885be059b51 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 10 Jun 2025 08:01:51 +0000 Subject: [PATCH 64/64] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 1c10424765e8f..d25f5b9e81d5b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1192,7 +1192,6 @@ public enum Cap { */ KNN_FUNCTION(Build.current().isSnapshot()); - private final boolean enabled; Cap() {