Add Groq as an inference provider

benank · benank · commit f82e4b4e6da3 · 2025-04-11T16:19:32.000-07:00
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -58,6 +58,7 @@ Currently, we support the following providers:
 - [Blackforestlabs](https://blackforestlabs.ai)
 - [Cohere](https://cohere.com)
 - [Cerebras](https://cerebras.ai/)
+- [Groq](https://groq.com)
 
 To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token.
 ```ts
@@ -84,6 +85,7 @@ Only a subset of models are supported when requesting third-party providers. You
 - [Together supported models](https://huggingface.co/api/partners/together/models)
 - [Cohere supported models](https://huggingface.co/api/partners/cohere/models)
 - [Cerebras supported models](https://huggingface.co/api/partners/cerebras/models)
+- [Groq supported models](https://console.groq.com/docs/models)
 - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
 
 ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type.
diff --git a/packages/inference/src/lib/getProviderHelper.ts b/packages/inference/src/lib/getProviderHelper.ts
@@ -3,6 +3,7 @@ import * as Cerebras from "../providers/cerebras";
 import * as Cohere from "../providers/cohere";
 import * as FalAI from "../providers/fal-ai";
 import * as Fireworks from "../providers/fireworks-ai";
+import * as Groq from "../providers/groq";
 import * as HFInference from "../providers/hf-inference";
 
 import * as Hyperbolic from "../providers/hyperbolic";
@@ -95,6 +96,10 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
 	"fireworks-ai": {
 		conversational: new Fireworks.FireworksConversationalTask(),
 	},
+	groq: {
+		conversational: new Groq.GroqConversationalTask(),
+		"text-generation": new Groq.GroqTextGenerationTask(),
+	},
 	hyperbolic: {
 		"text-to-image": new Hyperbolic.HyperbolicTextToImageTask(),
 		conversational: new Hyperbolic.HyperbolicConversationalTask(),
diff --git a/packages/inference/src/providers/consts.ts b/packages/inference/src/providers/consts.ts
@@ -21,6 +21,7 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
 	cohere: {},
 	"fal-ai": {},
 	"fireworks-ai": {},
+	groq: {},
 	"hf-inference": {},
 	hyperbolic: {},
 	nebius: {},
diff --git a/packages/inference/src/providers/groq.ts b/packages/inference/src/providers/groq.ts
@@ -0,0 +1,40 @@
+import { BaseConversationalTask, BaseTextGenerationTask } from "./providerHelper";
+
+/**
+ * See the registered mapping of HF model ID => Groq model ID here:
+ *
+ * https://huggingface.co/api/partners/groq/models
+ *
+ * This is a publicly available mapping.
+ *
+ * If you want to try to run inference for a new model locally before it's registered on huggingface.co,
+ * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
+ *
+ * - If you work at Groq and want to update this mapping, please use the model mapping API we provide on huggingface.co
+ * - If you're a community member and want to add a new supported HF model to Groq, please open an issue on the present repo
+ * and we will tag Groq team members.
+ *
+ * Thanks!
+ */
+
+const GROQ_API_BASE_URL = "https://api.groq.com";
+
+export class GroqTextGenerationTask extends BaseTextGenerationTask {
+	constructor() {
+		super("groq", GROQ_API_BASE_URL);
+	}
+
+	override makeRoute(): string {
+		return "/openai/v1/chat/completions";
+	}
+}
+
+export class GroqConversationalTask extends BaseConversationalTask {
+	constructor() {
+		super("groq", GROQ_API_BASE_URL);
+	}
+
+	override makeRoute(): string {
+		return "/openai/v1/chat/completions";
+	}
+}
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -42,6 +42,7 @@ export const INFERENCE_PROVIDERS = [
 	"cohere",
 	"fal-ai",
 	"fireworks-ai",
+	"groq",
 	"hf-inference",
 	"hyperbolic",
 	"nebius",
diff --git a/packages/inference/test/InferenceClient.spec.ts b/packages/inference/test/InferenceClient.spec.ts
@@ -1392,4 +1392,50 @@ describe.concurrent("InferenceClient", () => {
 		},
 		TIMEOUT
 	);
+	describe.concurrent(
+		"Groq",
+		() => {
+			const client = new InferenceClient(env.HF_GROQ_KEY ?? "dummy");
+
+			HARDCODED_MODEL_ID_MAPPING["groq"] = {
+				"meta-llama/Llama-3.3-70B-Instruct": "llama-3.3-70b-versatile",
+			};
+
+			it("chatCompletion", async () => {
+				const res = await client.chatCompletion({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "groq",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+				});
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toContain("two");
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "meta-llama/Llama-3.3-70B-Instruct",
+					provider: "groq",
+					messages: [{ role: "user", content: "Say 'this is a test'" }],
+					stream: true,
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+
+				let fullResponse = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						const content = chunk.choices[0].delta?.content;
+						if (content) {
+							fullResponse += content;
+						}
+					}
+				}
+
+				// Verify we got a meaningful response
+				expect(fullResponse).toBeTruthy();
+				expect(fullResponse.length).toBeGreaterThan(0);
+			});
+		},
+		TIMEOUT
+	);
 });
diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json
@@ -7155,5 +7155,58 @@
         "vary": "Origin, Access-Control-Request-Method, Access-Control-Request-Headers"
       }
     }
+  },
+  "01cb4504b502c793085788df0984db81d4f72532cebe5862d9558b0cbf07519c": {
+    "url": "https://api.groq.com/openai/v1/chat/completions",
+    "init": {
+      "headers": {
+        "Content-Type": "application/json"
+      },
+      "method": "POST",
+      "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"model\":\"llama-3.3-70b-versatile\"}"
+    },
+    "response": {
+      "body": "{\"id\":\"chatcmpl-9cf96a10-c66d-42d2-9853-15794c4dfa79\",\"object\":\"chat.completion\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"two.\"},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"queue_time\":0.24120238,\"prompt_tokens\":47,\"prompt_time\":0.002801667,\"completion_tokens\":3,\"completion_time\":0.010909091,\"total_tokens\":50,\"total_time\":0.013710758},\"usage_breakdown\":{\"models\":null},\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"x_groq\":{\"id\":\"req_01jrkj59ndfpe9413ygn441re3\"}}",
+      "status": 200,
+      "statusText": "OK",
+      "headers": {
+        "alt-svc": "h3=\":443\"; ma=86400",
+        "cache-control": "private, max-age=0, no-store, no-cache, must-revalidate",
+        "cf-cache-status": "DYNAMIC",
+        "cf-ray": "92ee2f10d8113233-LAX",
+        "connection": "keep-alive",
+        "content-encoding": "br",
+        "content-type": "application/json",
+        "server": "cloudflare",
+        "transfer-encoding": "chunked",
+        "vary": "Origin, Accept-Encoding"
+      }
+    }
+  },
+  "5688b06e0eb91dd68eef47fad94783b8b38a56cceae637c57521a48d4711ff2d": {
+    "url": "https://api.groq.com/openai/v1/chat/completions",
+    "init": {
+      "headers": {
+        "Content-Type": "application/json"
+      },
+      "method": "POST",
+      "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Say 'this is a test'\"}],\"stream\":true,\"model\":\"llama-3.3-70b-versatile\"}"
+    },
+    "response": {
+      "body": "data: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"role\":\"assistant\",\"content\":\"\"},\"logprobs\":null,\"finish_reason\":null}],\"x_groq\":{\"id\":\"req_01jrkj59p0ef3b9j9h4eft5yd8\"}}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\"This\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" is\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" a\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\" test\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{\"content\":\".\"},\"logprobs\":null,\"finish_reason\":null}]}\n\ndata: {\"id\":\"chatcmpl-c3759b48-d1e1-4ab6-80c9-8b83b47644bb\",\"object\":\"chat.completion.chunk\",\"created\":1744413304,\"model\":\"llama-3.3-70b-versatile\",\"system_fingerprint\":\"fp_3f3b593e33\",\"instance_id\":\"LLAMA-33-70B-DMM1-PROD2-1\",\"choices\":[{\"index\":0,\"delta\":{},\"logprobs\":null,\"finish_reason\":\"stop\"}],\"x_groq\":{\"id\":\"req_01jrkj59p0ef3b9j9h4eft5yd8\",\"usage\":{\"queue_time\":0.24191241300000002,\"prompt_tokens\":42,\"prompt_time\":0.010381096,\"completion_tokens\":6,\"completion_time\":0.021818182,\"total_tokens\":48,\"total_time\":0.032199278}}}\n\ndata: [DONE]\n\n",
+      "status": 200,
+      "statusText": "OK",
+      "headers": {
+        "alt-svc": "h3=\":443\"; ma=86400",
+        "cache-control": "no-cache",
+        "cf-cache-status": "DYNAMIC",
+        "cf-ray": "92ee2f10d8a1341c-LAX",
+        "connection": "keep-alive",
+        "content-type": "text/event-stream",
+        "server": "cloudflare",
+        "transfer-encoding": "chunked",
+        "vary": "Origin, Accept-Encoding"
+      }
+    }
   }
 }