huggingface
diff --git a/‎README.md‎
Lines changed: 13 additions & 13 deletions b/‎README.md‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎packages/inference/package.json‎
Lines changed: 1 addition & 1 deletion b/‎packages/inference/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/inference/src/lib/getInferenceProviderMapping.ts‎
Lines changed: 96 additions & 0 deletions b/‎packages/inference/src/lib/getInferenceProviderMapping.ts‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎packages/inference/src/lib/getProviderHelper.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/inference/src/lib/getProviderHelper.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/inference/src/lib/getProviderModelId.ts‎
Lines changed: 0 additions & 74 deletions b/‎packages/inference/src/lib/getProviderModelId.ts‎
Lines changed: 0 additions & 74 deletions
diff --git a/‎packages/inference/src/lib/makeRequestOptions.ts‎
Lines changed: 41 additions & 10 deletions b/‎packages/inference/src/lib/makeRequestOptions.ts‎
Lines changed: 41 additions & 10 deletions
diff --git a/‎packages/inference/src/providers/consts.ts‎
Lines changed: 5 additions & 2 deletions b/‎packages/inference/src/providers/consts.ts‎
Lines changed: 5 additions & 2 deletions
@@ -27,7 +27,7 @@ await uploadFile({
   }
 });
 
-// Use HF Inference API, or external Inference Providers!
+// Use all supported Inference Providers!
 
 await inference.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
@@ -55,7 +55,7 @@ await inference.textToImage({
 
 This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
 
-- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
+- [@huggingface/inference](packages/inference/README.md): Use all supported (serverless) Inference Providers or switch to Inference Endpoints (dedicated) to make calls to 100,000+ Machine Learning models
 - [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
 - [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
 - [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
@@ -97,7 +97,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```html
 <script type="module">
-    import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.7.1/+esm';
+    import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.8.1/+esm';
     import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
 </script>
 ```
@@ -128,18 +128,18 @@ import { InferenceClient } from "@huggingface/inference";
 
 const HF_TOKEN = "hf_...";
 
-const inference = new InferenceClient(HF_TOKEN);
+const client = new InferenceClient(HF_TOKEN);
 
 // Chat completion API
-const out = await inference.chatCompletion({
+const out = await client.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
   messages: [{ role: "user", content: "Hello, nice to meet you!" }],
   max_tokens: 512
 });
 console.log(out.choices[0].message);
 
 // Streaming chat completion API
-for await (const chunk of inference.chatCompletionStream({
+for await (const chunk of client.chatCompletionStream({
   model: "meta-llama/Llama-3.1-8B-Instruct",
   messages: [{ role: "user", content: "Hello, nice to meet you!" }],
   max_tokens: 512
@@ -148,14 +148,14 @@ for await (const chunk of inference.chatCompletionStream({
 }
 
 /// Using a third-party provider:
-await inference.chatCompletion({
+await client.chatCompletion({
   model: "meta-llama/Llama-3.1-8B-Instruct",
   messages: [{ role: "user", content: "Hello, nice to meet you!" }],
   max_tokens: 512,
   provider: "sambanova", // or together, fal-ai, replicate, cohere …
 })
 
-await inference.textToImage({
+await client.textToImage({
   model: "black-forest-labs/FLUX.1-dev",
   inputs: "a picture of a green bird",
   provider: "fal-ai",
@@ -164,7 +164,7 @@ await inference.textToImage({
 
 
 // You can also omit "model" to use the recommended model for the task
-await inference.translation({
+await client.translation({
   inputs: "My name is Wolfgang and I live in Amsterdam",
   parameters: {
     src_lang: "en",
@@ -173,17 +173,17 @@ await inference.translation({
 });
 
 // pass multimodal files or URLs as inputs
-await inference.imageToText({
+await client.imageToText({
   model: 'nlpconnect/vit-gpt2-image-captioning',
   data: await (await fetch('https://picsum.photos/300/300')).blob(),
 })
 
 // Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
-const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
-const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });
+const gpt2Client = client.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
+const { generated_text } = await gpt2Client.textGeneration({ inputs: 'The answer to the universe is' });
 
 // Chat Completion
-const llamaEndpoint = inference.endpoint(
+const llamaEndpoint = client.endpoint(
   "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct"
 );
 const out = await llamaEndpoint.chatCompletion({
 
@@ -1,6 +1,6 @@
 {
 	"name": "@huggingface/inference",
-	"version": "3.7.1",
+	"version": "3.8.1",
 	"packageManager": "[email protected]",
 	"license": "MIT",
 	"author": "Hugging Face and Tim Mikeladze <[email protected]>",
 
@@ -0,0 +1,96 @@
+import type { WidgetType } from "@huggingface/tasks";
+import type { InferenceProvider, ModelId } from "../types";
+import { HF_HUB_URL } from "../config";
+import { HARDCODED_MODEL_INFERENCE_MAPPING } from "../providers/consts";
+import { EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS } from "../providers/hf-inference";
+import { typedInclude } from "../utils/typedInclude";
+
+export const inferenceProviderMappingCache = new Map<ModelId, InferenceProviderMapping>();
+
+export type InferenceProviderMapping = Partial<
+	Record<InferenceProvider, Omit<InferenceProviderModelMapping, "hfModelId" | "adapterWeightsPath">>
+>;
+
+export interface InferenceProviderModelMapping {
+	adapter?: string;
+	adapterWeightsPath?: string;
+	hfModelId: ModelId;
+	providerId: string;
+	status: "live" | "staging";
+	task: WidgetType;
+}
+
+export async function getInferenceProviderMapping(
+	params: {
+		accessToken?: string;
+		modelId: ModelId;
+		provider: InferenceProvider;
+		task: WidgetType;
+	},
+	options: {
+		fetch?: (input: RequestInfo, init?: RequestInit) => Promise<Response>;
+	}
+): Promise<InferenceProviderModelMapping | null> {
+	if (HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId]) {
+		return HARDCODED_MODEL_INFERENCE_MAPPING[params.provider][params.modelId];
+	}
+	let inferenceProviderMapping: InferenceProviderMapping | null;
+	if (inferenceProviderMappingCache.has(params.modelId)) {
+		// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+		inferenceProviderMapping = inferenceProviderMappingCache.get(params.modelId)!;
+	} else {
+		const resp = await (options?.fetch ?? fetch)(
+			`${HF_HUB_URL}/api/models/${params.modelId}?expand[]=inferenceProviderMapping`,
+			{
+				headers: params.accessToken?.startsWith("hf_") ? { Authorization: `Bearer ${params.accessToken}` } : {},
+			}
+		);
+		if (resp.status === 404) {
+			throw new Error(`Model ${params.modelId} does not exist`);
+		}
+		inferenceProviderMapping = await resp
+			.json()
+			.then((json) => json.inferenceProviderMapping)
+			.catch(() => null);
+	}
+
+	if (!inferenceProviderMapping) {
+		throw new Error(`We have not been able to find inference provider information for model ${params.modelId}.`);
+	}
+
+	const providerMapping = inferenceProviderMapping[params.provider];
+	if (providerMapping) {
+		const equivalentTasks =
+			params.provider === "hf-inference" && typedInclude(EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS, params.task)
+				? EQUIVALENT_SENTENCE_TRANSFORMERS_TASKS
+				: [params.task];
+		if (!typedInclude(equivalentTasks, providerMapping.task)) {
+			throw new Error(
+				`Model ${params.modelId} is not supported for task ${params.task} and provider ${params.provider}. Supported task: ${providerMapping.task}.`
+			);
+		}
+		if (providerMapping.status === "staging") {
+			console.warn(
+				`Model ${params.modelId} is in staging mode for provider ${params.provider}. Meant for test purposes only.`
+			);
+		}
+		if (providerMapping.adapter === "lora") {
+			const treeResp = await (options?.fetch ?? fetch)(`${HF_HUB_URL}/api/models/${params.modelId}/tree/main`);
+			if (!treeResp.ok) {
+				throw new Error(`Unable to fetch the model tree for ${params.modelId}.`);
+			}
+			const tree: Array<{ type: "file" | "directory"; path: string }> = await treeResp.json();
+			const adapterWeightsPath = tree.find(({ type, path }) => type === "file" && path.endsWith(".safetensors"))?.path;
+			if (!adapterWeightsPath) {
+				throw new Error(`No .safetensors file found in the model tree for ${params.modelId}.`);
+			}
+			return {
+				...providerMapping,
+				hfModelId: params.modelId,
+				adapterWeightsPath,
+			};
+		}
+		return { ...providerMapping, hfModelId: params.modelId };
+	}
+	return null;
+}
@@ -124,6 +124,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
 	},
 	sambanova: {
 		conversational: new Sambanova.SambanovaConversationalTask(),
+		"feature-extraction": new Sambanova.SambanovaFeatureExtractionTask(),
 	},
 	together: {
 		"text-to-image": new Together.TogetherTextToImageTask(),
 
@@ -1,8 +1,9 @@
 import { name as packageName, version as packageVersion } from "../../package.json";
 import { HF_HEADER_X_BILL_TO, HF_HUB_URL } from "../config";
 import type { InferenceTask, Options, RequestArgs } from "../types";
+import type { InferenceProviderModelMapping } from "./getInferenceProviderMapping";
+import { getInferenceProviderMapping } from "./getInferenceProviderMapping";
 import type { getProviderHelper } from "./getProviderHelper";
-import { getProviderModelId } from "./getProviderModelId";
 import { isUrl } from "./isUrl";
 
 /**
@@ -40,7 +41,13 @@ export async function makeRequestOptions(
 
 	if (args.endpointUrl) {
 		// No need to have maybeModel, or to load default model for a task
-		return makeRequestOptionsFromResolvedModel(maybeModel ?? args.endpointUrl, providerHelper, args, options);
+		return makeRequestOptionsFromResolvedModel(
+			maybeModel ?? args.endpointUrl,
+			providerHelper,
+			args,
+			undefined,
+			options
+		);
 	}
 
 	if (!maybeModel && !task) {
@@ -54,16 +61,38 @@ export async function makeRequestOptions(
 		throw new Error(`Provider ${provider} requires a model ID to be passed directly.`);
 	}
 
-	const resolvedModel = providerHelper.clientSideRoutingOnly
-		? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-		  removeProviderPrefix(maybeModel!, provider)
-		: await getProviderModelId({ model: hfModel, provider }, args, {
-				task,
-				fetch: options?.fetch,
-		  });
+	const inferenceProviderMapping = providerHelper.clientSideRoutingOnly
+		? ({
+				// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+				providerId: removeProviderPrefix(maybeModel!, provider),
+				// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+				hfModelId: maybeModel!,
+				status: "live",
+				// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+				task: task!,
+		  } satisfies InferenceProviderModelMapping)
+		: await getInferenceProviderMapping(
+				{
+					modelId: hfModel,
+					// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+					task: task!,
+					provider,
+					accessToken: args.accessToken,
+				},
+				{ fetch: options?.fetch }
+		  );
+	if (!inferenceProviderMapping) {
+		throw new Error(`We have not been able to find inference provider information for model ${hfModel}.`);
+	}
 
 	// Use the sync version with the resolved model
-	return makeRequestOptionsFromResolvedModel(resolvedModel, providerHelper, args, options);
+	return makeRequestOptionsFromResolvedModel(
+		inferenceProviderMapping.providerId,
+		providerHelper,
+		args,
+		inferenceProviderMapping,
+		options
+	);
 }
 
 /**
@@ -77,6 +106,7 @@ export function makeRequestOptionsFromResolvedModel(
 		data?: Blob | ArrayBuffer;
 		stream?: boolean;
 	},
+	mapping: InferenceProviderModelMapping | undefined,
 	options?: Options & {
 		task?: InferenceTask;
 	}
@@ -138,6 +168,7 @@ export function makeRequestOptionsFromResolvedModel(
 		args: remainingArgs as Record<string, unknown>,
 		model: resolvedModel,
 		task,
+		mapping,
 	});
 	/**
 	 * For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error
 
@@ -1,15 +1,18 @@
+import type { InferenceProviderModelMapping } from "../lib/getInferenceProviderMapping";
 import type { InferenceProvider } from "../types";
 import { type ModelId } from "../types";
 
-type ProviderId = string;
 /**
  * If you want to try to run inference for a new model locally before it's registered on huggingface.co
  * for a given Inference Provider,
  * you can add it to the following dictionary, for dev purposes.
  *
  * We also inject into this dictionary from tests.
  */
-export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelId, ProviderId>> = {
+export const HARDCODED_MODEL_INFERENCE_MAPPING: Record<
+	InferenceProvider,
+	Record<ModelId, InferenceProviderModelMapping>
+> = {
 	/**
 	 * "HF model ID" => "Model ID on Inference Provider's side"
 	 *
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/inference",`
`3`		`- "version": "3.7.1",`
	`3`	`+ "version": "3.8.1",`
`4`	`4`	`"packageManager": "[email protected]",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"author": "Hugging Face and Tim Mikeladze <[email protected]>",`