Merge branch 'main' into saksham/blackforestlabs-ai

SBrandeis · SBrandeis · commit 2a0552737626 · 2025-02-13T17:01:37.000+01:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -46,6 +46,7 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_NOVITA_KEY: dummy
           HF_FIREWORKS_KEY: dummy
           HF_BLACK_FOREST_LABS_KEY: dummy
 
@@ -89,6 +90,7 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_NOVITA_KEY: dummy
           HF_FIREWORKS_KEY: dummy
           HF_BLACK_FOREST_LABS_KEY: dummy
 
@@ -159,5 +161,6 @@ jobs:
           HF_REPLICATE_KEY: dummy
           HF_SAMBANOVA_KEY: dummy
           HF_TOGETHER_KEY: dummy
+          HF_NOVITA_KEY: dummy
           HF_FIREWORKS_KEY: dummy
           HF_BLACK_FOREST_LABS_KEY: dummy
diff --git a/packages/inference/README.md b/packages/inference/README.md
@@ -50,6 +50,7 @@ Currently, we support the following providers:
 - [Fal.ai](https://fal.ai)
 - [Fireworks AI](https://fireworks.ai)
 - [Nebius](https://studio.nebius.ai)
+- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)
 - [Replicate](https://replicate.com)
 - [Sambanova](https://sambanova.ai)
 - [Together](https://together.xyz)
diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts
@@ -4,6 +4,7 @@ import { NEBIUS_API_BASE_URL } from "../providers/nebius";
 import { REPLICATE_API_BASE_URL } from "../providers/replicate";
 import { SAMBANOVA_API_BASE_URL } from "../providers/sambanova";
 import { TOGETHER_API_BASE_URL } from "../providers/together";
+import { NOVITA_API_BASE_URL } from "../providers/novita";
 import { FIREWORKS_AI_API_BASE_URL } from "../providers/fireworks-ai";
 import { BLACKFORESTLABS_AI_API_BASE_URL } from "../providers/black-forest-labs";
 import type { InferenceProvider } from "../types";
@@ -29,8 +30,6 @@ export async function makeRequestOptions(
 		stream?: boolean;
 	},
 	options?: Options & {
-		/** When a model can be used for multiple tasks, and we want to run a non-default task */
-		forceTask?: string | InferenceTask;
 		/** To load default model if needed */
 		taskHint?: InferenceTask;
 		chatCompletion?: boolean;
@@ -40,14 +39,11 @@ export async function makeRequestOptions(
 	let otherArgs = remainingArgs;
 	const provider = maybeProvider ?? "hf-inference";
 
-	const { forceTask, includeCredentials, taskHint, chatCompletion } = options ?? {};
+	const { includeCredentials, taskHint, chatCompletion } = options ?? {};
 
 	if (endpointUrl && provider !== "hf-inference") {
 		throw new Error(`Cannot use endpointUrl with a third-party provider.`);
 	}
-	if (forceTask && provider !== "hf-inference") {
-		throw new Error(`Cannot use forceTask with a third-party provider.`);
-	}
 	if (maybeModel && isUrl(maybeModel)) {
 		throw new Error(`Model URLs are no longer supported. Use endpointUrl instead.`);
 	}
@@ -78,7 +74,6 @@ export async function makeRequestOptions(
 		: makeUrl({
 				authMethod,
 				chatCompletion: chatCompletion ?? false,
-				forceTask,
 				model,
 				provider: provider ?? "hf-inference",
 				taskHint,
@@ -152,7 +147,6 @@ function makeUrl(params: {
 	model: string;
 	provider: InferenceProvider;
 	taskHint: InferenceTask | undefined;
-	forceTask?: string | InferenceTask;
 }): string {
 	if (params.authMethod === "none" && params.provider !== "hf-inference") {
 		throw new Error("Authentication is required when requesting a third-party provider. Please provide accessToken");
@@ -225,6 +219,7 @@ function makeUrl(params: {
 			}
 			return baseUrl;
 		}
+
 		case "fireworks-ai": {
 			const baseUrl = shouldProxy
 				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
@@ -234,15 +229,24 @@ function makeUrl(params: {
 			}
 			return baseUrl;
 		}
+		case "novita": {
+			const baseUrl = shouldProxy
+				? HF_HUB_INFERENCE_PROXY_TEMPLATE.replace("{{PROVIDER}}", params.provider)
+				: NOVITA_API_BASE_URL;
+			if (params.taskHint === "text-generation") {
+				if (params.chatCompletion) {
+					return `${baseUrl}/chat/completions`;
+				}
+				return `${baseUrl}/completions`;
+			}
+			return baseUrl;
+		}
 		default: {
 			const baseUrl = HF_HUB_INFERENCE_PROXY_TEMPLATE.replaceAll("{{PROVIDER}}", "hf-inference");
-			const url = params.forceTask
-				? `${baseUrl}/pipeline/${params.forceTask}/${params.model}`
-				: `${baseUrl}/models/${params.model}`;
 			if (params.taskHint === "text-generation" && params.chatCompletion) {
-				return url + `/v1/chat/completions`;
+				return `${baseUrl}/models/${params.model}/v1/chat/completions`;
 			}
-			return url;
+			return `${baseUrl}/models/${params.model}`;
 		}
 	}
 }
diff --git a/packages/inference/src/providers/consts.ts b/packages/inference/src/providers/consts.ts
@@ -24,4 +24,5 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
 	replicate: {},
 	sambanova: {},
 	together: {},
+	novita: {},
 };
diff --git a/packages/inference/src/providers/novita.ts b/packages/inference/src/providers/novita.ts
@@ -0,0 +1,18 @@
+export const NOVITA_API_BASE_URL = "https://api.novita.ai/v3/openai";
+
+/**
+ * See the registered mapping of HF model ID => Novita model ID here:
+ *
+ * https://huggingface.co/api/partners/novita/models
+ *
+ * This is a publicly available mapping.
+ *
+ * If you want to try to run inference for a new model locally before it's registered on huggingface.co,
+ * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
+ *
+ * - If you work at Novita and want to update this mapping, please use the model mapping API we provide on huggingface.co
+ * - If you're a community member and want to add a new supported HF model to Novita, please open an issue on the present repo
+ * and we will tag Novita team members.
+ *
+ * Thanks!
+ */ 
diff --git a/packages/inference/src/tasks/nlp/featureExtraction.ts b/packages/inference/src/tasks/nlp/featureExtraction.ts
@@ -1,5 +1,4 @@
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
-import { getDefaultTask } from "../../lib/getDefaultTask";
 import type { BaseArgs, Options } from "../../types";
 import { request } from "../custom/request";
 
@@ -25,12 +24,9 @@ export async function featureExtraction(
 	args: FeatureExtractionArgs,
 	options?: Options
 ): Promise<FeatureExtractionOutput> {
-	const defaultTask = args.model ? await getDefaultTask(args.model, args.accessToken, options) : undefined;
-
 	const res = await request<FeatureExtractionOutput>(args, {
 		...options,
 		taskHint: "feature-extraction",
-		...(defaultTask === "sentence-similarity" && { forceTask: "feature-extraction" }),
 	});
 	let isValidOutput = true;
 
diff --git a/packages/inference/src/tasks/nlp/sentenceSimilarity.ts b/packages/inference/src/tasks/nlp/sentenceSimilarity.ts
@@ -1,6 +1,5 @@
 import type { SentenceSimilarityInput, SentenceSimilarityOutput } from "@huggingface/tasks";
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
-import { getDefaultTask } from "../../lib/getDefaultTask";
 import type { BaseArgs, Options } from "../../types";
 import { request } from "../custom/request";
 import { omit } from "../../utils/omit";
@@ -14,11 +13,9 @@ export async function sentenceSimilarity(
 	args: SentenceSimilarityArgs,
 	options?: Options
 ): Promise<SentenceSimilarityOutput> {
-	const defaultTask = args.model ? await getDefaultTask(args.model, args.accessToken, options) : undefined;
 	const res = await request<SentenceSimilarityOutput>(prepareInput(args), {
 		...options,
 		taskHint: "sentence-similarity",
-		...(defaultTask === "feature-extraction" && { forceTask: "sentence-similarity" }),
 	});
 
 	const isValidOutput = Array.isArray(res) && res.every((x) => typeof x === "number");
diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts
@@ -29,15 +29,17 @@ export interface Options {
 export type InferenceTask = Exclude<PipelineType, "other">;
 
 export const INFERENCE_PROVIDERS = [
+	"black-forest-labs",
 	"fal-ai",
 	"fireworks-ai",
-	"nebius",
 	"hf-inference",
+	"nebius",
+	"novita",
 	"replicate",
 	"sambanova",
 	"together",
-	"black-forest-labs",
 ] as const;
+
 export type InferenceProvider = (typeof INFERENCE_PROVIDERS)[number];
 
 export interface BaseArgs {
diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts
@@ -351,15 +351,6 @@ describe.concurrent("HfInference", () => {
 				});
 				expect(response).toEqual(expect.arrayContaining([expect.any(Number)]));
 			});
-			it("FeatureExtraction - same model as sentence similarity", async () => {
-				const response = await hf.featureExtraction({
-					model: "sentence-transformers/paraphrase-xlm-r-multilingual-v1",
-					inputs: "That is a happy person",
-				});
-
-				expect(response.length).toBeGreaterThan(10);
-				expect(response).toEqual(expect.arrayContaining([expect.any(Number)]));
-			});
 			it("FeatureExtraction - facebook/bart-base", async () => {
 				const response = await hf.featureExtraction({
 					model: "facebook/bart-base",
@@ -1176,6 +1167,53 @@ describe.concurrent("HfInference", () => {
 		TIMEOUT
 	);
 
+	describe.concurrent(
+		"Novita",
+		() => {
+			const client = new HfInference(env.HF_NOVITA_KEY);
+
+			HARDCODED_MODEL_ID_MAPPING["novita"] = {
+				"meta-llama/llama-3.1-8b-instruct": "meta-llama/llama-3.1-8b-instruct",
+				"deepseek/deepseek-r1-distill-qwen-14b": "deepseek/deepseek-r1-distill-qwen-14b",
+			};
+
+			it("chatCompletion", async () => {
+				const res = await client.chatCompletion({
+					model: "meta-llama/llama-3.1-8b-instruct",
+					provider: "novita",
+					messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+				});
+				if (res.choices && res.choices.length > 0) {
+					const completion = res.choices[0].message?.content;
+					expect(completion).toContain("two");
+				}
+			});
+
+			it("chatCompletion stream", async () => {
+				const stream = client.chatCompletionStream({
+					model: "deepseek/deepseek-r1-distill-qwen-14b",
+					provider: "novita",
+					messages: [{ role: "user", content: "Say this is a test" }],
+					stream: true,
+				}) as AsyncGenerator<ChatCompletionStreamOutput>;
+
+				let fullResponse = "";
+				for await (const chunk of stream) {
+					if (chunk.choices && chunk.choices.length > 0) {
+						const content = chunk.choices[0].delta?.content;
+						if (content) {
+							fullResponse += content;
+						}
+					}
+				}
+
+				// Verify we got a meaningful response
+				expect(fullResponse).toBeTruthy();
+				expect(fullResponse.length).toBeGreaterThan(0);
+			});
+		},
+		TIMEOUT
+	);
 	describe.concurrent(
 		"Black Forest Labs",
 		() => {
diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json