diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/text_embedding.md b/docs/reference/query-languages/esql/_snippets/functions/description/text_embedding.md new file mode 100644 index 0000000000000..70ebddbdb7c0a --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/text_embedding.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Generates dense vector embeddings for text using a specified inference endpoint. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/text_embedding.md b/docs/reference/query-languages/esql/_snippets/functions/examples/text_embedding.md new file mode 100644 index 0000000000000..71d05c9524350 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/text_embedding.md @@ -0,0 +1,13 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +Generate text embeddings using the 'test_dense_inference' inference endpoint. + +```esql +ROW input="Who is Victor Hugo?" +| EVAL embedding = TEXT_EMBEDDING("Who is Victor Hugo?", "test_dense_inference") +; +``` + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/text_embedding.md b/docs/reference/query-languages/esql/_snippets/functions/layout/text_embedding.md new file mode 100644 index 0000000000000..a120fff2d7a22 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/text_embedding.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `TEXT_EMBEDDING` [esql-text_embedding] +```{applies_to} +stack: development +serverless: preview +``` + +**Syntax** + +:::{image} ../../../images/functions/text_embedding.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/text_embedding.md +::: + +:::{include} ../description/text_embedding.md +::: + +:::{include} ../types/text_embedding.md +::: + +:::{include} ../examples/text_embedding.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md index e2b852912c5f5..80175caaf09dd 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md @@ -1,4 +1,4 @@ -% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. **Parameters** diff --git a/docs/reference/query-languages/esql/_snippets/functions/types/text_embedding.md b/docs/reference/query-languages/esql/_snippets/functions/types/text_embedding.md new file mode 100644 index 0000000000000..6e45a6eb84c5c --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/types/text_embedding.md @@ -0,0 +1,8 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Supported types** + +| text | inference_id | result | +| --- | --- | --- | +| keyword | keyword | dense_vector | + diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json b/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json index 5f1f68a2b14bd..343cf597a0a53 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json @@ -3,7 +3,26 @@ "type" : "scalar", "name" : "text_embedding", "description" : "Generates dense vector embeddings for text using a specified inference endpoint.", - "signatures" : [ ], + "signatures" : [ + { + "params" : [ + { + "name" : "text", + "type" : "keyword", + "optional" : false, + "description" : "Text to generate embeddings from" + }, + { + "name" : "inference_id", + "type" : "keyword", + "optional" : false, + "description" : "Identifier of the inference endpoint" + } + ], + "variadic" : false, + "returnType" : "dense_vector" + } + ], "examples" : [ "ROW input=\"Who is Victor Hugo?\"\n| EVAL embedding = TEXT_EMBEDDING(\"Who is Victor Hugo?\", \"test_dense_inference\")\n;" ], diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java index fed250051d84d..2ecd089dedd2f 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java @@ -77,6 +77,7 @@ import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.RERANK; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.SEMANTIC_TEXT_FIELD_CAPS; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.SOURCE_FIELD_MAPPING; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION; import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.assertNotPartial; import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.hasCapabilities; @@ -224,7 +225,8 @@ protected boolean requiresInferenceEndpoint() { SEMANTIC_TEXT_FIELD_CAPS.capabilityName(), RERANK.capabilityName(), COMPLETION.capabilityName(), - KNN_FUNCTION_V5.capabilityName() + KNN_FUNCTION_V5.capabilityName(), + TEXT_EMBEDDING_FUNCTION.capabilityName() ).anyMatch(testCase.requiredCapabilities::contains); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec index f026800598e10..86e0fcd0eb6a4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec @@ -1,6 +1,6 @@ -placeholder +text_embedding using a row source operator required_capability: text_embedding_function -required_capability: not_existing_capability +required_capability: dense_vector_field_type_released // tag::embedding-eval[] ROW input="Who is Victor Hugo?" @@ -8,8 +8,84 @@ ROW input="Who is Victor Hugo?" ; // end::embedding-eval[] +input:keyword | embedding:dense_vector +Who is Victor Hugo? | [56.0, 50.0, 48.0] +; + + +text_embedding using a row source operator with query build using CONCAT +required_capability: text_embedding_function +required_capability: dense_vector_field_type_released + +ROW input="Who is Victor Hugo?" +| EVAL embedding = TEXT_EMBEDDING(CONCAT("Who is ", "Victor Hugo?"), "test_dense_inference") +; input:keyword | embedding:dense_vector Who is Victor Hugo? | [56.0, 50.0, 48.0] ; + +text_embedding with knn on semantic_text_dense_field +required_capability: text_embedding_function +required_capability: dense_vector_field_type_released +required_capability: knn_function_v5 +required_capability: semantic_text_field_caps + +FROM semantic_text METADATA _score +| EVAL query_embedding = TEXT_EMBEDDING("be excellent to each other", "test_dense_inference") +| WHERE KNN(semantic_text_dense_field, query_embedding) +| SORT _score DESC +| LIMIT 10 +| KEEP semantic_text_field, query_embedding +; + +semantic_text_field:text | query_embedding:dense_vector +be excellent to each other | [45.0, 55.0, 54.0] +live long and prosper | [45.0, 55.0, 54.0] +all we have to decide is what to do with the time that is given to us | [45.0, 55.0, 54.0] +; + +text_embedding with knn (inline) on semantic_text_dense_field +required_capability: text_embedding_function +required_capability: dense_vector_field_type_released +required_capability: knn_function_v5 +required_capability: semantic_text_field_caps + +FROM semantic_text METADATA _score +| WHERE KNN(semantic_text_dense_field, TEXT_EMBEDDING("be excellent to each other", "test_dense_inference")) +| SORT _score DESC +| LIMIT 10 +| KEEP semantic_text_field +; + +semantic_text_field:text +be excellent to each other +live long and prosper +all we have to decide is what to do with the time that is given to us +; + + +text_embedding with multiple knn queries in fork +required_capability: text_embedding_function +required_capability: dense_vector_field_type_released +required_capability: knn_function_v5 +required_capability: fork_v9 +required_capability: semantic_text_field_caps + +FROM semantic_text METADATA _score +| FORK (EVAL query_embedding = TEXT_EMBEDDING("be excellent to each other", "test_dense_inference") | WHERE KNN(semantic_text_dense_field, query_embedding)) + (EVAL query_embedding = TEXT_EMBEDDING("live long and prosper", "test_dense_inference") | WHERE KNN(semantic_text_dense_field, query_embedding)) +| SORT _score DESC, _fork ASC +| LIMIT 10 +| KEEP semantic_text_field, query_embedding, _fork +; + +semantic_text_field:text | query_embedding:dense_vector | _fork:keyword +be excellent to each other | [45.0, 55.0, 54.0] | fork1 +live long and prosper | [50.0, 57.0, 56.0] | fork2 +live long and prosper | [45.0, 55.0, 54.0] | fork1 +be excellent to each other | [50.0, 57.0, 56.0] | fork2 +all we have to decide is what to do with the time that is given to us | [45.0, 55.0, 54.0] | fork1 +all we have to decide is what to do with the time that is given to us | [50.0, 57.0, 56.0] | fork2 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java index 2043176f24a29..974b73718ff0b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java @@ -88,7 +88,7 @@ public void esql( indexResolver, enrichPolicyResolver, preAnalyzer, - new LogicalPlanPreOptimizer(new LogicalPreOptimizerContext(foldContext)), + new LogicalPlanPreOptimizer(new LogicalPreOptimizerContext(foldContext, services.inferenceService())), functionRegistry, new LogicalPlanOptimizer(new LogicalOptimizerContext(cfg, foldContext)), mapper, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceFunctionEvaluator.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceFunctionEvaluator.java new file mode 100644 index 0000000000000..e5cc4301c5683 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceFunctionEvaluator.java @@ -0,0 +1,228 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.inference; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.indices.breaker.AllCircuitBreakerStats; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.indices.breaker.CircuitBreakerStats; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.evaluator.EvalMapper; +import org.elasticsearch.xpack.esql.expression.function.inference.InferenceFunction; +import org.elasticsearch.xpack.esql.expression.function.inference.TextEmbedding; +import org.elasticsearch.xpack.esql.inference.textembedding.TextEmbeddingOperator; + +/** + * Evaluator for inference functions that performs constant folding by executing inference operations + * at optimization time and replacing them with their computed results. + */ +public class InferenceFunctionEvaluator { + + private static final Factory FACTORY = new Factory(); + + public static InferenceFunctionEvaluator.Factory factory() { + return FACTORY; + } + + private final FoldContext foldContext; + private final InferenceOperatorProvider inferenceOperatorProvider; + + /** + * Creates a new inference function evaluator with a custom operator provider. + * This constructor is primarily used for testing to inject mock operator providers. + * + * @param foldContext the fold context containing circuit breakers and evaluation settings + * @param inferenceOperatorProvider custom provider for creating inference operators + */ + InferenceFunctionEvaluator(FoldContext foldContext, InferenceOperatorProvider inferenceOperatorProvider) { + this.foldContext = foldContext; + this.inferenceOperatorProvider = inferenceOperatorProvider; + } + + /** + * Folds an inference function by executing it and replacing it with its computed result. + *
+ * This method performs the following steps: + *
+ * This interface abstracts the creation of inference operators for different function types, + * allowing for easier testing and potential future extensibility. The provider is responsible + * for creating an appropriate operator instance given an inference function and driver context. + */ + interface InferenceOperatorProvider { + /** + * Creates an inference operator for the given function and driver context. + * + * @param f the inference function to create an operator for + * @param driverContext the driver context to use for operator creation + * @return an operator instance configured for the given function + */ + Operator getOperator(InferenceFunction> f, DriverContext driverContext); + } + + /** + * Factory for creating {@link InferenceFunctionEvaluator} instances. + */ + public static class Factory { + private Factory() {} + + /** + * Creates a new inference function evaluator. + * + * @param foldContext the fold context + * @param inferenceService the inference service + * @return a new instance of {@link InferenceFunctionEvaluator} + */ + public InferenceFunctionEvaluator create(FoldContext foldContext, InferenceService inferenceService) { + return new InferenceFunctionEvaluator(foldContext, createInferenceOperatorProvider(foldContext, inferenceService)); + } + + /** + * Creates an {@link InferenceOperatorProvider} that can produce operators for all supported inference functions. + */ + private InferenceOperatorProvider createInferenceOperatorProvider(FoldContext foldContext, InferenceService inferenceService) { + return (inferenceFunction, driverContext) -> { + Operator.OperatorFactory operatorFactory = switch (inferenceFunction) { + case TextEmbedding textEmbedding -> new TextEmbeddingOperator.Factory( + inferenceService, + inferenceId(inferenceFunction, foldContext), + expressionEvaluatorFactory(textEmbedding.inputText(), foldContext) + ); + default -> throw new IllegalArgumentException("Unknown inference function: " + inferenceFunction.getClass().getName()); + }; + + return operatorFactory.get(driverContext); + }; + } + + /** + * Extracts the inference endpoint ID from an inference function. + * + * @param f the inference function containing the inference ID + * @return the inference endpoint ID as a string + */ + private String inferenceId(InferenceFunction> f, FoldContext foldContext) { + return BytesRefs.toString(f.inferenceId().fold(foldContext)); + } + + /** + * Creates an expression evaluator factory for a foldable expression. + *
+ * This method converts a foldable expression into an evaluator factory that can be used by inference + * operators. The expression is first folded to its constant value and then wrapped in a literal. + * + * @param e the foldable expression to create an evaluator factory for + * @return an expression evaluator factory for the given expression + */ + private EvalOperator.ExpressionEvaluator.Factory expressionEvaluatorFactory(Expression e, FoldContext foldContext) { + assert e.foldable() : "Input expression must be foldable"; + return EvalMapper.toEvaluator(foldContext, Literal.of(foldContext, e), null); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizer.java index fdd8e1318f636..11c57b38c2331 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanPreOptimizer.java @@ -8,8 +8,13 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.SubscribableListener; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.preoptimizer.FoldInferenceFunctions; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.preoptimizer.LogicalPlanPreOptimizerRule; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import java.util.List; + /** * The class is responsible for invoking any steps that need to be applied to the logical plan, * before this is being optimized. @@ -18,11 +23,14 @@ *
*/ public class LogicalPlanPreOptimizer { - - private final LogicalPreOptimizerContext preOptimizerContext; + private final List+ * This rule identifies inference functions with constant parameters and evaluates them at optimization time, + * replacing them with their computed results. + *
+ * The folding process is recursive and handles nested inference functions by processing them in multiple + * passes until no more foldable functions remain. + *
+ * Example transformation:
+ * {@code TEXT_EMBEDDING("hello world", "model1")} → {@code [0.1, 0.2, 0.3, ...]}
+ */
+public class FoldInferenceFunctions implements LogicalPlanPreOptimizerRule {
+
+ private final InferenceFunctionEvaluator inferenceFunctionEvaluator;
+
+ public FoldInferenceFunctions(LogicalPreOptimizerContext preOptimizerContext) {
+ this(InferenceFunctionEvaluator.factory().create(preOptimizerContext.foldCtx(), preOptimizerContext.inferenceService()));
+ }
+
+ protected FoldInferenceFunctions(InferenceFunctionEvaluator inferenceFunctionEvaluator) {
+ this.inferenceFunctionEvaluator = inferenceFunctionEvaluator;
+ }
+
+ @Override
+ public void apply(LogicalPlan plan, ActionListener
+ * This method collects all foldable inference functions, evaluates them in parallel,
+ * and then replaces them with their computed results. If new foldable inference functions are remaining
+ * after the first round of folding (due to nested function resolution), it recursively processes
+ * them until no more foldable functions remain.
+ *
+ * A function is considered foldable if it meets all of the following criteria:
+ *
+ * Functions with nested inference functions are excluded to ensure proper evaluation order.
+ * They will be considered for folding in subsequent recursive passes after their nested
+ * functions have been resolved.
+ *
+ * @param plan the logical plan to collect inference functions from
+ * @return a list of foldable inference functions, may be empty if none are found
+ */
+ private List
+ *
+ *