Merge branch 'main' into feat/novita

jasonhp · jasonhp · commit 9452dc212af1 · 2025-02-08T10:34:11.000+08:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -46,6 +46,7 @@ jobs:
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
           HF_NOVITA_KEY: dummy
+          HF_FIREWORKS_KEY: dummy
 
   browser:
     runs-on: ubuntu-latest
@@ -87,6 +88,7 @@ jobs:
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
           HF_NOVITA_KEY: dummy
+          HF_FIREWORKS_KEY: dummy
 
   e2e:
     runs-on: ubuntu-latest
@@ -154,4 +156,5 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
-          HF_NOVITA_KEY: dummy
+          HF_NOVITA_KEY: dummy
+          HF_FIREWORKS_KEY: dummy
diff --git a/README.md b/README.md
@@ -96,7 +96,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```html
 <script type="module">
-    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.2.0/+esm';
+    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.3.0/+esm';
     import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.0.1/+esm";
 </script>
 ```
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -48,6 +48,7 @@ You can send inference requests to third-party providers with the inference clie
 
 Currently, we support the following providers:
 - [Fal.ai](https://fal.ai)
+- [Fireworks AI](https://fireworks.ai)
 - [Replicate](https://replicate.com)
 - [Sambanova](https://sambanova.ai)
 - [Together](https://together.xyz)
@@ -69,10 +70,11 @@ When authenticated with a Hugging Face access token, the request is routed throu
 When authenticated with a third-party provider key, the request is made directly against that provider's inference API.
 
 Only a subset of models are supported when requesting third-party providers. You can check the list of supported models per pipeline tasks here:
-- [Fal.ai supported models](./src/providers/fal-ai.ts)
-- [Replicate supported models](./src/providers/replicate.ts)
-- [Sambanova supported models](./src/providers/sambanova.ts)
-- [Together supported models](./src/providers/together.ts)
+- [Fal.ai supported models](https://huggingface.co/api/partners/fal-ai/models)
+- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
+- [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
+- [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)
+- [Together supported models](https://huggingface.co/api/partners/together/models)
 - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending)
 
 ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. 
diff --git a/packages/inference/package.json b/packages/inference/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@huggingface/inference",
-	"version": "3.2.0",
+	"version": "3.3.0",
 	"packageManager": "pnpm@8.10.5",
 	"license": "MIT",
 	"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
diff --git a/packages/inference/src/lib/getProviderModelId.ts b/packages/inference/src/lib/getProviderModelId.ts
@@ -30,8 +30,8 @@ export async function getProviderModelId(
 		options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;
 
 	// A dict called HARDCODED_MODEL_ID_MAPPING takes precedence in all cases (useful for dev purposes)
-	if (HARDCODED_MODEL_ID_MAPPING[params.model]) {
-		return HARDCODED_MODEL_ID_MAPPING[params.model];
+	if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {
+		return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];
 	}
 
 	let inferenceProviderMapping: InferenceProviderMapping | null;
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -4,6 +4,7 @@ import { REPLICATE_API_BASE_URL } from "../providers/replicate";
 import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
 import { TOGETHER_API_BASE_URL } from "../providers/together";
 import { NOVITA_API_BASE_URL } from "../providers/novita";
+import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
 import type { InferenceProvider } from "../types";
 import type { InferenceTask, Options, RequestArgs } from "../types";
 import { isUrl } from "./isUrl";
@@ -209,11 +210,20 @@ function makeUrl(params: {
 			}
 			return baseUrl;
 		}
+
+		case "fireworks-ai": {
+			const baseUrl = shouldProxy
+				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
+				: FIREWORKS_AI_API_BASE_URL;
+			if (params.taskHint === "text-generation" && params.chatCompletion) {
+				return `${baseUrl}/v1/chat/completions`;
+			}
+			return baseUrl;
+		}
 		case "novita": {
 			const baseUrl = shouldProxy
 				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
 				: NOVITA_API_BASE_URL;
-			/// Novita API matches OpenAI-like APIs: model is defined in the request body
 			if (params.taskHint === "text-generation") {
 				if (params.chatCompletion) {
 					return `${baseUrl}/chat/completions`;
diff --git a/packages/inference/src/providers/consts.ts b/packages/inference/src/providers/consts.ts
@@ -1,15 +1,25 @@
-import type { ModelId } from "../types";
+import type { InferenceProvider } from "../types";
+import { type ModelId } from "../types";
 
 type ProviderId = string;
-
 /**
  * If you want to try to run inference for a new model locally before it's registered on huggingface.co
  * for a given Inference Provider,
  * you can add it to the following dictionary, for dev purposes.
+ *
+ * We also inject into this dictionary from tests.
  */
-export const HARDCODED_MODEL_ID_MAPPING: Record<ModelId, ProviderId> = {
+export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelId, ProviderId>> = {
 	/**
 	 * "HF model ID" => "Model ID on Inference Provider's side"
+	 *
+	 * Example:
+	 * "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
 	 */
-	// "Qwen/Qwen2.5-Coder-32B-Instruct": "Qwen2.5-Coder-32B-Instruct",
+	"fal-ai": {},
+	"fireworks-ai": {},
+	"hf-inference": {},
+	replicate: {},
+	sambanova: {},
+	together: {},
 };
diff --git a/packages/inference/src/providers/fireworks-ai.ts b/packages/inference/src/providers/fireworks-ai.ts
@@ -0,0 +1,18 @@
+export const FIREWORKS_AI_API_BASE_URL = "https://api.fireworks.ai/inference";
+
+/**
+ * See the registered mapping of HF model ID => Fireworks model ID here:
+ *
+ * https://huggingface.co/api/partners/fireworks/models
+ *
+ * This is a publicly available mapping.
+ *
+ * If you want to try to run inference for a new model locally before it's registered on huggingface.co,
+ * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
+ *
+ * - If you work at Fireworks and want to update this mapping, please use the model mapping API we provide on huggingface.co
+ * - If you're a community member and want to add a new supported HF model to Fireworks, please open an issue on the present repo
+ * and we will tag Fireworks team members.
+ *
+ * Thanks!
+ */
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -44,7 +44,16 @@ export interface Options {
 
 export type InferenceTask = Exclude<PipelineType, "other">;
 
-export const INFERENCE_PROVIDERS = ["fal-ai", "replicate", "sambanova", "together", "hf-inference", "novita"] as const;
+export const INFERENCE_PROVIDERS = [
+	"fal-ai",
+	"fireworks-ai",
+	"hf-inference",
+	"replicate",
+	"sambanova",
+	"together",
+	"novita",
+] as const;
+
 export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
 
 export interface BaseArgs {
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -6,6 +6,7 @@ import { chatCompletion, HfInference } from "../src";
 import { textToVideo } from "../src/tasks/cv/textToVideo";
 import { readTestFile } from "./test-files";
 import "./vcr";
+import { HARDCODED_MODEL_ID_MAPPING } from "../src/providers/consts";
 
 const TIMEOUT = 60000 * 3;
 const env = import.meta.env;
@@ -1078,6 +1079,53 @@ describe.concurrent("HfInference", () => {
 		});
 	});
 
+	describe.concurrent(
+		"Fireworks",
+		() => {
+			const client = new HfInference(env.HF_FIREWORKS_KEY);
+
+			HARDCODED_MODEL_ID_MAPPING["fireworks-ai"] = {
+				"deepseek-ai/DeepSeek-R1": "accounts/fireworks/models/deepseek-r1",
+			};
+
+			it("chatCompletion", async () => {
+				const res = await client.chatCompletion({
+					model: "deepseek-ai/DeepSeek-R1",
+					provider: "fireworks-ai",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+				});
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toContain("two");
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "deepseek-ai/DeepSeek-R1",
+					provider: "fireworks-ai",
+					messages: [{ role: "user", content: "Say this is a test" }],
+					stream: true,
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+
+				let fullResponse = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						const content = chunk.choices[0].delta?.content;
+						if (content) {
+							fullResponse += content;
+						}
+					}
+				}
+
+				// Verify we got a meaningful response
+				expect(fullResponse).toBeTruthy();
+				expect(fullResponse.length).toBeGreaterThan(0);
+			});
+		},
+		TIMEOUT
+	);
+
 	describe.concurrent(
 		"Novita",
 		() => {
diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/inference",`
`3`		`- "version": "3.2.0",`
	`3`	`+ "version": "3.3.0",`
`4`	`4`	`"packageManager": "[email protected]",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"author": "Tim Mikeladze <[email protected]>",`
Original file line number	Diff line number	Diff line change
`@@ -30,8 +30,8 @@ export async function getProviderModelId(`
`30`	`30`	`options.taskHint === "text-generation" && options.chatCompletion ? "conversational" : options.taskHint;`
`31`	`31`
`32`	`32`	`// A dict called HARDCODED_MODEL_ID_MAPPING takes precedence in all cases (useful for dev purposes)`
`33`		`- if (HARDCODED_MODEL_ID_MAPPING[params.model]) {`
`34`		`- return HARDCODED_MODEL_ID_MAPPING[params.model];`
	`33`	`+ if (HARDCODED_MODEL_ID_MAPPING[params.provider]?.[params.model]) {`
	`34`	`+ return HARDCODED_MODEL_ID_MAPPING[params.provider][params.model];`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`let inferenceProviderMapping: InferenceProviderMapping \| null;`