Adds Groq inference service API docs.

szabosteve · szabosteve · commit eb6ebb26ce64 · 2025-12-08T12:49:40.000+01:00
diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv
@@ -283,6 +283,8 @@ graph,https://www.elastic.co/docs/explore-analyze/visualize/graph,,
 graph-explore-api,https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-graph,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/graph-explore-api.html,
 grok,https://www.elastic.co/docs/explore-analyze/scripting/grok,,
 grok-processor,https://www.elastic.co/docs/reference/enrich-processor/grok-processor,,
+groq-api-models,https://console.groq.com/docs/models,,Groq models,
+groq-rate-limit,https://console.groq.com/docs/rate-limits,,Groq rate limit,
 gsub-processor,https://www.elastic.co/docs/reference/enrich-processor/gsub-processor,,
 health-api,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-health-report,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/health-api.html,
 huggingface-chat-completion-interface,https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms,,
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -1555,6 +1555,41 @@ export enum GoogleVertexAIServiceType {
   googlevertexai
 }
 
+export class GroqServiceSettings {
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the Groq model documentation for the list of supported models and versions.
+   * Service has been tested and confirmed to be working for `completion` and `chat_completion` tasks with the following models:
+   * * `llama-3.3-70b-versatile`
+   * @ext_doc_id groq-api-models
+   */
+  model_id: string
+  /**
+   * A valid API key for accessing Groq API.
+   *
+   * IMPORTANT: You need to provide the API key only once, during the inference model creation.
+   * The get inference endpoint API does not retrieve your API key.
+   * After creating the inference model, you cannot change the associated API key.
+   * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
+   */
+  api_key?: string
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from the Groq API.
+   * By default, the `groq` service sets the number of requests allowed per minute to 200. Refer to Groq documentation for more details.
+   * @ext_doc_id groq-rate-limit
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum GroqTaskType {
+  completion,
+  chat_completion
+}
+
+export enum GroqServiceType {
+  groq
+}
+
 export class HuggingFaceServiceSettings {
   /**
    * A valid access token for your HuggingFace account.
diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -117,6 +117,11 @@ export enum TaskTypeGoogleVertexAI {
   rerank
 }
 
+export enum TaskTypeGroq {
+  completion,
+  chat_completion
+}
+
 export enum TaskTypeHuggingFace {
   chat_completion,
   completion,
diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -36,14 +36,15 @@ import { TaskType } from '@inference/_types/TaskType'
  * * Amazon Bedrock (`completion`, `text_embedding`)
  * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
  * * Anthropic (`completion`)
- * * Azure AI Studio (`completion`, 'rerank', `text_embedding`)
+ * * Azure AI Studio (`completion`, `rerank`, `text_embedding`)
  * * Azure OpenAI (`completion`, `text_embedding`)
  * * Cohere (`completion`, `rerank`, `text_embedding`)
  * * DeepSeek (`chat_completion`, `completion`)
  * * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)
  * * ELSER (`sparse_embedding`)
  * * Google AI Studio (`completion`, `text_embedding`)
  * * Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)
+ * * Groq (`chat_completion`)
  * * Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)
  * * JinaAI (`rerank`, `text_embedding`)
  * * Llama (`chat_completion`, `completion`, `text_embedding`)
diff --git a/specification/inference/put_groq/PutGroqRequest.ts b/specification/inference/put_groq/PutGroqRequest.ts
@@ -0,0 +1,73 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { RequestBase } from '@_types/Base'
+import { Id } from '@_types/common'
+import { Duration } from '@_types/Time'
+import {
+  GroqServiceSettings,
+  GroqServiceType,
+  GroqTaskType
+} from '@inference/_types/CommonTypes'
+
+/**
+ * Create a Groq inference endpoint.
+ *
+ * Create an inference endpoint to perform an inference task with the `groq` service.
+ * @rest_spec_name inference.put_groq
+ * @availability stack since=9.3.0 stability=stable visibility=public
+ * @availability serverless stability=stable visibility=public
+ * @cluster_privileges manage_inference
+ * @doc_id inference-api-put-groq
+ */
+export interface Request extends RequestBase {
+  urls: [
+    {
+      path: '/_inference/{task_type}/{groq_inference_id}'
+      methods: ['PUT']
+    }
+  ]
+  path_parts: {
+    /**
+     * The type of the inference task that the model will perform.
+     */
+    task_type: GroqTaskType
+    /**
+     * The unique identifier of the inference endpoint.
+     */
+    groq_inference_id: Id
+  }
+  query_parameters: {
+    /**
+     * Specifies the amount of time to wait for the inference endpoint to be created.
+     * @server_default 30s
+     */
+    timeout?: Duration
+  }
+  body: {
+    /**
+     * The type of service supported for the specified task type. In this case, `groq`.
+     */
+    service: GroqServiceType
+    /**
+     * Settings used to install the inference model. These settings are specific to the `groq` service.
+     */
+    service_settings: GroqServiceSettings
+  }
+}
diff --git a/specification/inference/put_groq/PutGroqResponse.ts b/specification/inference/put_groq/PutGroqResponse.ts
@@ -0,0 +1,25 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import { InferenceEndpointInfoGroq } from '@inference/_types/Services'
+
+export class Response {
+  /** @codegen_name endpoint_info */
+  body: InferenceEndpointInfoGroq
+}
diff --git a/specification/inference/put_groq/examples/request/PutGroqRequestExample1.yaml b/specification/inference/put_groq/examples/request/PutGroqRequestExample1.yaml
@@ -0,0 +1,13 @@
+# summary:
+description: Run `PUT _inference/completion/groq-completion` to create a Groq inference endpoint that performs a `completion` task.
+method_request: 'PUT _inference/completion/groq-completion'
+# type: "request"
+value: |-
+  {
+    "service": "groq",
+    "service_settings": {
+      "model_id": "llama-3.3-70b-versatile",
+      "api_key": "groq-api-key"
+    }
+  }
+
diff --git a/specification/inference/put_groq/examples/request/PutGroqRequestExample2.yaml b/specification/inference/put_groq/examples/request/PutGroqRequestExample2.yaml
@@ -0,0 +1,14 @@
+# summary:
+description:
+  Run `PUT _inference/chat-completion/groq-chat-completion` to create a Groq inference endpoint that performs a
+  `chat_completion` task.
+method_request: 'PUT _inference/chat-completion/groq-chat-completion'
+# type: "request"
+value: |-
+  {
+    "service": "groq",
+    "service_settings": {
+      "api_key": "groq-api-key",
+      "model_id": "llama-3.3-70b-versatile" 
+    }
+  }