Skip to content

Commit eb6ebb2

Browse files
committed
Adds Groq inference service API docs.
1 parent 6566f69 commit eb6ebb2

File tree

8 files changed

+169
-1
lines changed

8 files changed

+169
-1
lines changed

specification/_doc_ids/table.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ graph,https://www.elastic.co/docs/explore-analyze/visualize/graph,,
283283
graph-explore-api,https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-graph,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/graph-explore-api.html,
284284
grok,https://www.elastic.co/docs/explore-analyze/scripting/grok,,
285285
grok-processor,https://www.elastic.co/docs/reference/enrich-processor/grok-processor,,
286+
groq-api-models,https://console.groq.com/docs/models,,Groq models,
287+
groq-rate-limit,https://console.groq.com/docs/rate-limits,,Groq rate limit,
286288
gsub-processor,https://www.elastic.co/docs/reference/enrich-processor/gsub-processor,,
287289
health-api,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-health-report,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/health-api.html,
288290
huggingface-chat-completion-interface,https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms,,

specification/inference/_types/CommonTypes.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,41 @@ export enum GoogleVertexAIServiceType {
15551555
googlevertexai
15561556
}
15571557

1558+
export class GroqServiceSettings {
1559+
/**
1560+
* The name of the model to use for the inference task.
1561+
* Refer to the Groq model documentation for the list of supported models and versions.
1562+
* Service has been tested and confirmed to be working for `completion` and `chat_completion` tasks with the following models:
1563+
* * `llama-3.3-70b-versatile`
1564+
* @ext_doc_id groq-api-models
1565+
*/
1566+
model_id: string
1567+
/**
1568+
* A valid API key for accessing Groq API.
1569+
*
1570+
* IMPORTANT: You need to provide the API key only once, during the inference model creation.
1571+
* The get inference endpoint API does not retrieve your API key.
1572+
* After creating the inference model, you cannot change the associated API key.
1573+
* If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.
1574+
*/
1575+
api_key?: string
1576+
/**
1577+
* This setting helps to minimize the number of rate limit errors returned from the Groq API.
1578+
* By default, the `groq` service sets the number of requests allowed per minute to 200. Refer to Groq documentation for more details.
1579+
* @ext_doc_id groq-rate-limit
1580+
*/
1581+
rate_limit?: RateLimitSetting
1582+
}
1583+
1584+
export enum GroqTaskType {
1585+
completion,
1586+
chat_completion
1587+
}
1588+
1589+
export enum GroqServiceType {
1590+
groq
1591+
}
1592+
15581593
export class HuggingFaceServiceSettings {
15591594
/**
15601595
* A valid access token for your HuggingFace account.

specification/inference/_types/TaskType.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ export enum TaskTypeGoogleVertexAI {
117117
rerank
118118
}
119119

120+
export enum TaskTypeGroq {
121+
completion,
122+
chat_completion
123+
}
124+
120125
export enum TaskTypeHuggingFace {
121126
chat_completion,
122127
completion,

specification/inference/put/PutRequest.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ import { TaskType } from '@inference/_types/TaskType'
3636
* * Amazon Bedrock (`completion`, `text_embedding`)
3737
* * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
3838
* * Anthropic (`completion`)
39-
* * Azure AI Studio (`completion`, 'rerank', `text_embedding`)
39+
* * Azure AI Studio (`completion`, `rerank`, `text_embedding`)
4040
* * Azure OpenAI (`completion`, `text_embedding`)
4141
* * Cohere (`completion`, `rerank`, `text_embedding`)
4242
* * DeepSeek (`chat_completion`, `completion`)
4343
* * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)
4444
* * ELSER (`sparse_embedding`)
4545
* * Google AI Studio (`completion`, `text_embedding`)
4646
* * Google Vertex AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)
47+
* * Groq (`chat_completion`)
4748
* * Hugging Face (`chat_completion`, `completion`, `rerank`, `text_embedding`)
4849
* * JinaAI (`rerank`, `text_embedding`)
4950
* * Llama (`chat_completion`, `completion`, `text_embedding`)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { RequestBase } from '@_types/Base'
21+
import { Id } from '@_types/common'
22+
import { Duration } from '@_types/Time'
23+
import {
24+
GroqServiceSettings,
25+
GroqServiceType,
26+
GroqTaskType
27+
} from '@inference/_types/CommonTypes'
28+
29+
/**
30+
* Create a Groq inference endpoint.
31+
*
32+
* Create an inference endpoint to perform an inference task with the `groq` service.
33+
* @rest_spec_name inference.put_groq
34+
* @availability stack since=9.3.0 stability=stable visibility=public
35+
* @availability serverless stability=stable visibility=public
36+
* @cluster_privileges manage_inference
37+
* @doc_id inference-api-put-groq
38+
*/
39+
export interface Request extends RequestBase {
40+
urls: [
41+
{
42+
path: '/_inference/{task_type}/{groq_inference_id}'
43+
methods: ['PUT']
44+
}
45+
]
46+
path_parts: {
47+
/**
48+
* The type of the inference task that the model will perform.
49+
*/
50+
task_type: GroqTaskType
51+
/**
52+
* The unique identifier of the inference endpoint.
53+
*/
54+
groq_inference_id: Id
55+
}
56+
query_parameters: {
57+
/**
58+
* Specifies the amount of time to wait for the inference endpoint to be created.
59+
* @server_default 30s
60+
*/
61+
timeout?: Duration
62+
}
63+
body: {
64+
/**
65+
* The type of service supported for the specified task type. In this case, `groq`.
66+
*/
67+
service: GroqServiceType
68+
/**
69+
* Settings used to install the inference model. These settings are specific to the `groq` service.
70+
*/
71+
service_settings: GroqServiceSettings
72+
}
73+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { InferenceEndpointInfoGroq } from '@inference/_types/Services'
21+
22+
export class Response {
23+
/** @codegen_name endpoint_info */
24+
body: InferenceEndpointInfoGroq
25+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# summary:
2+
description: Run `PUT _inference/completion/groq-completion` to create a Groq inference endpoint that performs a `completion` task.
3+
method_request: 'PUT _inference/completion/groq-completion'
4+
# type: "request"
5+
value: |-
6+
{
7+
"service": "groq",
8+
"service_settings": {
9+
"model_id": "llama-3.3-70b-versatile",
10+
"api_key": "groq-api-key"
11+
}
12+
}
13+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# summary:
2+
description:
3+
Run `PUT _inference/chat-completion/groq-chat-completion` to create a Groq inference endpoint that performs a
4+
`chat_completion` task.
5+
method_request: 'PUT _inference/chat-completion/groq-chat-completion'
6+
# type: "request"
7+
value: |-
8+
{
9+
"service": "groq",
10+
"service_settings": {
11+
"api_key": "groq-api-key",
12+
"model_id": "llama-3.3-70b-versatile"
13+
}
14+
}

0 commit comments

Comments
 (0)