Skip to content

Commit f82e4b4

Browse files
committed
Add Groq as an inference provider
1 parent 68c6201 commit f82e4b4

File tree

7 files changed

+148
-0
lines changed

7 files changed

+148
-0
lines changed

packages/inference/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Currently, we support the following providers:
5858
- [Blackforestlabs](https://blackforestlabs.ai)
5959
- [Cohere](https://cohere.com)
6060
- [Cerebras](https://cerebras.ai/)
61+
- [Groq](https://groq.com)
6162

6263
To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
6364
```ts
@@ -84,6 +85,7 @@ Only a subset of models are supported when requesting third-party providers. You
8485
- [Together supported models](https://huggingface.co/api/partners/together/models)
8586
- [Cohere supported models](https://huggingface.co/api/partners/cohere/models)
8687
- [Cerebras supported models](https://huggingface.co/api/partners/cerebras/models)
88+
- [Groq supported models](https://console.groq.com/docs/models)
8789
- [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
8890

8991
**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import * as Cerebras from "../providers/cerebras";
33
import * as Cohere from "../providers/cohere";
44
import * as FalAI from "../providers/fal-ai";
55
import * as Fireworks from "../providers/fireworks-ai";
6+
import * as Groq from "../providers/groq";
67
import * as HFInference from "../providers/hf-inference";
78

89
import * as Hyperbolic from "../providers/hyperbolic";
@@ -95,6 +96,10 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
9596
"fireworks-ai": {
9697
conversational: new Fireworks.FireworksConversationalTask(),
9798
},
99+
groq: {
100+
conversational: new Groq.GroqConversationalTask(),
101+
"text-generation": new Groq.GroqTextGenerationTask(),
102+
},
98103
hyperbolic: {
99104
"text-to-image": new Hyperbolic.HyperbolicTextToImageTask(),
100105
conversational: new Hyperbolic.HyperbolicConversationalTask(),

packages/inference/src/providers/consts.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
2121
cohere: {},
2222
"fal-ai": {},
2323
"fireworks-ai": {},
24+
groq: {},
2425
"hf-inference": {},
2526
hyperbolic: {},
2627
nebius: {},
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper";
2+
3+
/**
4+
* See the registered mapping of HF model ID => Groq model ID here:
5+
*
6+
* https://huggingface.co/api/partners/groq/models
7+
*
8+
* This is a publicly available mapping.
9+
*
10+
* If you want to try to run inference for a new model locally before it's registered on huggingface.co,
11+
* you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
12+
*
13+
* - If you work at Groq and want to update this mapping, please use the model mapping API we provide on huggingface.co
14+
* - If you're a community member and want to add a new supported HF model to Groq, please open an issue on the present repo
15+
* and we will tag Groq team members.
16+
*
17+
* Thanks!
18+
*/
19+
20+
const GROQ_API_BASE_URL = "https://api.groq.com";
21+
22+
export class GroqTextGenerationTask extends BaseTextGenerationTask {
23+
constructor() {
24+
super("groq", GROQ_API_BASE_URL);
25+
}
26+
27+
override makeRoute(): string {
28+
return "/openai/v1/chat/completions";
29+
}
30+
}
31+
32+
export class GroqConversationalTask extends BaseConversationalTask {
33+
constructor() {
34+
super("groq", GROQ_API_BASE_URL);
35+
}
36+
37+
override makeRoute(): string {
38+
return "/openai/v1/chat/completions";
39+
}
40+
}

packages/inference/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export const INFERENCE_PROVIDERS = [
4242
"cohere",
4343
"fal-ai",
4444
"fireworks-ai",
45+
"groq",
4546
"hf-inference",
4647
"hyperbolic",
4748
"nebius",

packages/inference/test/InferenceClient.spec.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,4 +1392,50 @@ describe.concurrent("InferenceClient", () => {
13921392
},
13931393
TIMEOUT
13941394
);
1395+
describe.concurrent(
1396+
"Groq",
1397+
() => {
1398+
const client = new InferenceClient(env.HF_GROQ_KEY ?? "dummy");
1399+
1400+
HARDCODED_MODEL_ID_MAPPING["groq"] = {
1401+
"meta-llama/Llama-3.3-70B-Instruct": "llama-3.3-70b-versatile",
1402+
};
1403+
1404+
it("chatCompletion", async () => {
1405+
const res = await client.chatCompletion({
1406+
model: "meta-llama/Llama-3.3-70B-Instruct",
1407+
provider: "groq",
1408+
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
1409+
});
1410+
if (res.choices && res.choices.length > 0) {
1411+
const completion = res.choices[0].message?.content;
1412+
expect(completion).toContain("two");
1413+
}
1414+
});
1415+
1416+
it("chatCompletion stream", async () => {
1417+
const stream = client.chatCompletionStream({
1418+
model: "meta-llama/Llama-3.3-70B-Instruct",
1419+
provider: "groq",
1420+
messages: [{ role: "user", content: "Say 'this is a test'" }],
1421+
stream: true,
1422+
}) as AsyncGenerator<ChatCompletionStreamOutput>;
1423+
1424+
let fullResponse = "";
1425+
for await (const chunk of stream) {
1426+
if (chunk.choices && chunk.choices.length > 0) {
1427+
const content = chunk.choices[0].delta?.content;
1428+
if (content) {
1429+
fullResponse += content;
1430+
}
1431+
}
1432+
}
1433+
1434+
// Verify we got a meaningful response
1435+
expect(fullResponse).toBeTruthy();
1436+
expect(fullResponse.length).toBeGreaterThan(0);
1437+
});
1438+
},
1439+
TIMEOUT
1440+
);
13951441
});

packages/inference/test/tapes.json

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7155,5 +7155,58 @@
71557155
"vary": "Origin, Access-Control-Request-Method, Access-Control-Request-Headers"
71567156
}
71577157
}
7158+
},
7159+
"01cb4504b502c793085788df0984db81d4f72532cebe5862d9558b0cbf07519c": {
7160+
"url": "https://api.groq.com/openai/v1/chat/completions",
7161+
"init": {
7162+
"headers": {
7163+
"Content-Type": "application/json"
7164+
},
7165+
"method": "POST",
7166+
"body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"model\":\"llama-3.3-70b-versatile\"}"
7167+
},
7168+
"response": {
7169+
"body": "{\"id\":\"chatcmpl-9cf96a10-c66d-42d2-9853-15794c4dfa79\",\"object\":\"chat.completion\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"two.\"},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"queue_time\":0.24120238,\"prompt_tokens\":47,\"prompt_time\":0.002801667,\"completion_tokens\":3,\"completion_time\":0.010909091,\"total_tokens\":50,\"total_time\":0.013710758},\"usage_breakdown\":{\"models\":null},\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"x_groq\":{\"id\":\"req_01jrkj59ndfpe9413ygn441re3\"}}",
7170+
"status": 200,
7171+
"statusText": "OK",
7172+
"headers": {
7173+
"alt-svc": "h3=\":443\"; ma=86400",
7174+
"cache-control": "private, max-age=0, no-store, no-cache, must-revalidate",
7175+
"cf-cache-status": "DYNAMIC",
7176+
"cf-ray": "92ee2f10d8113233-LAX",
7177+
"connection": "keep-alive",
7178+
"content-encoding": "br",
7179+
"content-type": "application/json",
7180+
"server": "cloudflare",
7181+
"transfer-encoding": "chunked",
7182+
"vary": "Origin, Accept-Encoding"
7183+
}
7184+
}
7185+
},
7186+
"5688b06e0eb91dd68eef47fad94783b8b38a56cceae637c57521a48d4711ff2d": {
7187+
"url": "https://api.groq.com/openai/v1/chat/completions",
7188+
"init": {
7189+
"headers": {
7190+
"Content-Type": "application/json"
7191+
},
7192+
"method": "POST",
7193+
"body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Say 'this is a test'\"}],\"stream\":true,\"model\":\"llama-3.3-70b-versatile\"}"
7194+
},
7195+
"response": {
7196+
"body": "data: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}],\"x_groq\":{\"id\":\"req_01jrkj59p0ef3b9j9h4eft5yd8\"}}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"This\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" a\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" test\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"x_groq\":{\"id\":\"req_01jrkj59p0ef3b9j9h4eft5yd8\",\"usage\":{\"queue_time\":0.24191241300000002,\"prompt_tokens\":42,\"prompt_time\":0.010381096,\"completion_tokens\":6,\"completion_time\":0.021818182,\"total_tokens\":48,\"total_time\":0.032199278}}}\n\ndata: [DONE]\n\n",
7197+
"status": 200,
7198+
"statusText": "OK",
7199+
"headers": {
7200+
"alt-svc": "h3=\":443\"; ma=86400",
7201+
"cache-control": "no-cache",
7202+
"cf-cache-status": "DYNAMIC",
7203+
"cf-ray": "92ee2f10d8a1341c-LAX",
7204+
"connection": "keep-alive",
7205+
"content-type": "text/event-stream",
7206+
"server": "cloudflare",
7207+
"transfer-encoding": "chunked",
7208+
"vary": "Origin, Accept-Encoding"
7209+
}
7210+
}
71587211
}
71597212
}

0 commit comments

Comments
 (0)