add new COMPLETION command

miguelgrinberg · miguelgrinberg · commit 8fa1d3e4838e · 2025-07-24T16:20:26.000+01:00
diff --git a/docs/sphinx/esql.rst b/docs/sphinx/esql.rst
@@ -25,6 +25,10 @@ Commands
    :members:
    :exclude-members: __init__
 
+.. autoclass:: elasticsearch.esql.esql.ChangePoint
+   :members:
+   :exclude-members: __init__
+
 .. autoclass:: elasticsearch.esql.esql.Dissect
    :members:
    :exclude-members: __init__
diff --git a/elasticsearch/esql/esql.py b/elasticsearch/esql/esql.py
@@ -134,6 +134,47 @@ def change_point(self, value: FieldType) -> "ChangePoint":
         """
         return ChangePoint(self, value)
 
+    def completion(self, *prompt: ExpressionType, **named_prompt: ExpressionType):
+        """The `COMPLETION` command allows you to send prompts and context to a Large
+        Language Model (LLM) directly within your ES|QL queries, to perform text
+        generation tasks.
+
+        :param prompt: The input text or expression used to prompt the LLM. This can
+                       be a string literal or a reference to a column containing text.
+        :param named_prompt: The input text or expresion, given as a keyword argument.
+                             The argument name is used for the column name. If not
+                             specified, the results will be stored in a column named
+                             `completion`. If the specified column already exists, it
+                             will be overwritten with the new results.
+
+        Examples::
+
+            query1 = (
+                ESQL.row(question="What is Elasticsearch?")
+                .completion(E("question")).with_("test_completion_model")
+                .keep("question", "completion")
+            )
+            query2 = (
+                ESQL.row(question="What is Elasticsearch?")
+                .completion(answer=E("question")).with_("test_completion_model")
+                .keep("question", "answer")
+            )
+            query3 = (
+                ESQL.from_("movies")
+                .sort("rating DESC")
+                .limit(10)
+                .eval(prompt=\"\"\"CONCAT(
+                    "Summarize this movie using the following information: \n",
+                    "Title: ", title, "\n",
+                    "Synopsis: ", synopsis, "\n",
+                    "Actors: ", MV_CONCAT(actors, ", "), "\n",
+                )\"\"\")
+                .completion(summary="prompt").with_("test_completion_model")
+                .keep("title", "summary", "rating")
+            )
+        """
+        return Completion(self, *prompt, **named_prompt)
+
     def dissect(self, input: FieldType, pattern: str) -> "Dissect":
         """``DISSECT`` enables you to extract structured data out of a string.
 
@@ -306,43 +347,39 @@ def limit(self, max_number_of_rows: int) -> "Limit":
         """
         return Limit(self, max_number_of_rows)
 
-    def lookup_join(self, lookup_index: IndexType, field: FieldType) -> "LookupJoin":
+    def lookup_join(self, lookup_index: IndexType) -> "LookupJoin":
         """`LOOKUP JOIN` enables you to add data from another index, AKA a 'lookup' index,
         to your ES|QL query results, simplifying data enrichment and analysis workflows.
 
         :param lookup_index: The name of the lookup index. This must be a specific index
                              name - wildcards, aliases, and remote cluster references are
                              not supported. Indices used for lookups must be configured
                              with the lookup index mode.
-        :param field: The field to join on. This field must exist in both your current query
-                      results and in the lookup index. If the field contains multi-valued
-                      entries, those entries will not match anything (the added fields will
-                      contain null for those rows).
 
         Examples::
 
             query1 = (
                 ESQL.from_("firewall_logs")
-                .lookup_join("threat_list", "source.IP")
+                .lookup_join("threat_list").on("source.IP")
                 .where("threat_level IS NOT NULL")
             )
             query2 = (
                 ESQL.from_("system_metrics")
-                .lookup_join("host_inventory", "host.name")
-                .lookup_join("ownerships", "host.name")
+                .lookup_join("host_inventory").on("host.name")
+                .lookup_join("ownerships").on("host.name")
             )
             query3 = (
                 ESQL.from_("app_logs")
-                .lookup_join("service_owners", "service_id")
+                .lookup_join("service_owners").on("service_id")
             )
             query4 = (
                 ESQL.from_("employees")
                 .eval(language_code="languages")
                 .where("emp_no >= 10091 AND emp_no < 10094")
-                .lookup_join("languages_lookup", "language_code")
+                .lookup_join("languages_lookup").on("language_code")
             )
         """
-        return LookupJoin(self, lookup_index, field)
+        return LookupJoin(self, lookup_index)
 
     def mv_expand(self, column: FieldType) -> "MvExpand":
         """The `MV_EXPAND` processing command expands multivalued columns into one row per
@@ -635,6 +672,45 @@ def _render_internal(self) -> str:
         return f"CHANGE_POINT {self._value}{key}{names}"
 
 
+class Completion(ESQLBase):
+    """Implementation of the ``COMPLETION`` processing command.
+
+    This class inherits from :class:`ESQLBase <elasticsearch.esql.esql.ESQLBase>`,
+    to make it possible to chain all the commands that belong to an ES|QL query
+    in a single expression.
+    """
+
+    def __init__(self, parent: ESQLBase, *prompt: ExpressionType, **named_prompt: ExpressionType): 
+        if len(prompt) + len(named_prompt) > 1:
+            raise ValueError(
+                "this method requires either one positional or one keyword argument only"
+            )
+        super().__init__(parent)
+        self._prompt = prompt
+        self._named_prompt = named_prompt
+        self._inference_id = None
+
+    def with_(self, inference_id: str) -> "Completion":
+        """Continuation of the `COMPLETION` command.
+
+        :param inference_id: The ID of the inference endpoint to use for the task. The
+                             inference endpoint must be configured with the completion
+                             task type.
+        """
+        self._inference_id = inference_id
+        return self
+
+    def _render_internal(self) -> str:
+        if self._inference_id is None:
+            raise ValueError('The completion command requires an inference ID')
+        if self._named_prompt:
+            column = list(self._named_prompt.keys())[0]
+            prompt = list(self._named_prompt.values())[0]
+            return f'COMPLETION {column} = {prompt} WITH {self._inference_id}'
+        else:
+            return f'COMPLETION {self._prompt[0]} WITH {self._inference_id}'
+
+
 class Dissect(ESQLBase):
     """Implementation of the ``DISSECT`` processing command.
 
@@ -861,12 +937,25 @@ class LookupJoin(ESQLBase):
     in a single expression.
     """
 
-    def __init__(self, parent: ESQLBase, lookup_index: IndexType, field: FieldType):
+    def __init__(self, parent: ESQLBase, lookup_index: IndexType):
         super().__init__(parent)
         self._lookup_index = lookup_index
+        self._field = None
+
+    def on(self, field: FieldType):
+        """Continuation of the `LOOKUP_JOIN` command.
+
+        :param field: The field to join on. This field must exist in both your current query
+                      results and in the lookup index. If the field contains multi-valued
+                      entries, those entries will not match anything (the added fields will
+                      contain null for those rows).
+        """
         self._field = field
+        return self
 
     def _render_internal(self) -> str:
+        if self._field is None:
+            raise ValueError("Joins require a field to join on.")
         index = (
             self._lookup_index
             if isinstance(self._lookup_index, str)
diff --git a/test_elasticsearch/test_esql.py b/test_elasticsearch/test_esql.py
@@ -74,6 +74,70 @@ def test_change_point():
     )
 
 
+def test_completion():
+    query = (
+        ESQL.row(question="What is Elasticsearch?")
+        .completion("question").with_("test_completion_model")
+        .keep("question", "completion")
+    )
+    assert query.render() == """ROW question = "What is Elasticsearch?"
+| COMPLETION question WITH test_completion_model
+| KEEP question, completion"""
+
+    query = (
+        ESQL.row(question="What is Elasticsearch?")
+        .completion(answer=E("question")).with_("test_completion_model")
+        .keep("question", "answer")
+    )
+    assert query.render() == """ROW question = "What is Elasticsearch?"
+| COMPLETION answer = question WITH test_completion_model
+| KEEP question, answer"""
+
+    query = (
+        ESQL.from_("movies")
+        .sort("rating DESC")
+        .limit(10)
+        .eval(prompt="""CONCAT(
+      "Summarize this movie using the following information: \\n",
+      "Title: ", title, "\\n",
+      "Synopsis: ", synopsis, "\\n",
+      "Actors: ", MV_CONCAT(actors, ", "), "\\n",
+  )""")
+        .completion(summary="prompt").with_("test_completion_model")
+        .keep("title", "summary", "rating")
+    )
+    assert query.render() == """FROM movies
+| SORT rating DESC
+| LIMIT 10
+| EVAL prompt = CONCAT(
+      "Summarize this movie using the following information: \\n",
+      "Title: ", title, "\\n",
+      "Synopsis: ", synopsis, "\\n",
+      "Actors: ", MV_CONCAT(actors, ", "), "\\n",
+  )
+| COMPLETION summary = prompt WITH test_completion_model
+| KEEP title, summary, rating"""
+
+    query = (
+        ESQL.from_("movies")
+        .sort("rating DESC")
+        .limit(10)
+        .eval(prompt=functions.concat(
+            "Summarize this movie using the following information: \n",
+            "Title: ", E("title"), "\n",
+            "Synopsis: ", E("synopsis"), "\n",
+            "Actors: ", functions.mv_concat(E("actors"), ", "), "\n",
+        ))
+        .completion(summary="prompt").with_("test_completion_model")
+        .keep("title", "summary", "rating")
+    )
+    assert query.render() == """FROM movies
+| SORT rating DESC
+| LIMIT 10
+| EVAL prompt = CONCAT("Summarize this movie using the following information: \\n", "Title: ", title, "\\n", "Synopsis: ", synopsis, "\\n", "Actors: ", MV_CONCAT(actors, ", "), "\\n")
+| COMPLETION summary = prompt WITH test_completion_model
+| KEEP title, summary, rating"""
+
 def test_dissect():
     query = (
         ESQL.row(a="2023-01-23T12:15:00.000Z - some text - 127.0.0.1")
@@ -260,7 +324,7 @@ def test_limit():
 def test_lookup_join():
     query = (
         ESQL.from_("firewall_logs")
-        .lookup_join("threat_list", "source.IP")
+        .lookup_join("threat_list").on("source.IP")
         .where("threat_level IS NOT NULL")
     )
     assert (
@@ -272,8 +336,8 @@ def test_lookup_join():
 
     query = (
         ESQL.from_("system_metrics")
-        .lookup_join("host_inventory", "host.name")
-        .lookup_join("ownerships", "host.name")
+        .lookup_join("host_inventory").on("host.name")
+        .lookup_join("ownerships").on("host.name")
     )
     assert (
         query.render()
@@ -282,7 +346,7 @@ def test_lookup_join():
 | LOOKUP JOIN ownerships ON host.name"""
     )
 
-    query = ESQL.from_("app_logs").lookup_join("service_owners", "service_id")
+    query = ESQL.from_("app_logs").lookup_join("service_owners").on("service_id")
     assert (
         query.render()
         == """FROM app_logs
@@ -293,7 +357,7 @@ def test_lookup_join():
         ESQL.from_("employees")
         .eval(language_code="languages")
         .where(E("emp_no") >= 10091, E("emp_no") < 10094)
-        .lookup_join("languages_lookup", "language_code")
+        .lookup_join("languages_lookup").on("language_code")
     )
     assert (
         query.render()