huggingface
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/inference/package.json‎
Lines changed: 1 addition & 1 deletion b/‎packages/inference/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/inference/src/lib/makeRequestOptions.ts‎
Lines changed: 38 additions & 13 deletions b/‎packages/inference/src/lib/makeRequestOptions.ts‎
Lines changed: 38 additions & 13 deletions
diff --git a/‎packages/inference/src/providers/consts.ts‎
Lines changed: 1 addition & 0 deletions b/‎packages/inference/src/providers/consts.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/inference/src/providers/openai.ts‎
Lines changed: 35 additions & 0 deletions b/‎packages/inference/src/providers/openai.ts‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎packages/inference/src/tasks/nlp/featureExtraction.ts‎
Lines changed: 2 additions & 9 deletions b/‎packages/inference/src/tasks/nlp/featureExtraction.ts‎
Lines changed: 2 additions & 9 deletions
diff --git a/‎packages/inference/src/types.ts‎
Lines changed: 2 additions & 0 deletions b/‎packages/inference/src/types.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/inference/test/HfInference.spec.ts‎
Lines changed: 12 additions & 3 deletions b/‎packages/inference/test/HfInference.spec.ts‎
Lines changed: 12 additions & 3 deletions
@@ -96,7 +96,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
 
 ```html
 <script type="module">
-    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected].0/+esm';
+    import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected].1/+esm';
     import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm";
 </script>
 ```
 
@@ -1,6 +1,6 @@
 {
 	"name": "@huggingface/inference",
-	"version": "3.4.0",
+	"version": "3.4.1",
 	"packageManager": "[email protected]",
 	"license": "MIT",
 	"author": "Tim Mikeladze <[email protected]>",
 
@@ -11,6 +11,7 @@ import { NOVITA_CONFIG } from "../providers/novita";
 import { REPLICATE_CONFIG } from "../providers/replicate";
 import { SAMBANOVA_CONFIG } from "../providers/sambanova";
 import { TOGETHER_CONFIG } from "../providers/together";
+import { OPENAI_CONFIG } from "../providers/openai";
 import type { InferenceProvider, InferenceTask, Options, ProviderConfig, RequestArgs } from "../types";
 import { isUrl } from "./isUrl";
 import { version as packageVersion, name as packageName } from "../../package.json";
@@ -35,6 +36,7 @@ const providerConfigs: Record<InferenceProvider, ProviderConfig> = {
 	"fireworks-ai": FIREWORKS_AI_CONFIG,
 	"hf-inference": HF_INFERENCE_CONFIG,
 	hyperbolic: HYPERBOLIC_CONFIG,
+	openai: OPENAI_CONFIG,
 	nebius: NEBIUS_CONFIG,
 	novita: NOVITA_CONFIG,
 	replicate: REPLICATE_CONFIG,
@@ -74,22 +76,38 @@ export async function makeRequestOptions(
 	if (!providerConfig) {
 		throw new Error(`No provider config found for provider ${provider}`);
 	}
+	if (providerConfig.clientSideRoutingOnly && !maybeModel) {
+		throw new Error(`Provider ${provider} requires a model ID to be passed directly.`);
+	}
 	// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
 	const hfModel = maybeModel ?? (await loadDefaultModel(task!));
-	const model = await getProviderModelId({ model: hfModel, provider }, args, {
-		task,
-		chatCompletion,
-		fetch: options?.fetch,
-	});
+	const model = providerConfig.clientSideRoutingOnly
+		? // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+		  removeProviderPrefix(maybeModel!, provider)
+		: // For closed-models API providers, one needs to pass the model ID directly (e.g. "gpt-3.5-turbo")
+		  await getProviderModelId({ model: hfModel, provider }, args, {
+				task,
+				chatCompletion,
+				fetch: options?.fetch,
+		  });
 
-	/// If accessToken is passed, it should take precedence over includeCredentials
-	const authMethod = accessToken
-		? accessToken.startsWith("hf_")
-			? "hf-token"
-			: "provider-key"
-		: includeCredentials === "include"
-		  ? "credentials-include"
-		  : "none";
+	const authMethod = (() => {
+		if (providerConfig.clientSideRoutingOnly) {
+			// Closed-source providers require an accessToken (cannot be routed).
+			if (accessToken && accessToken.startsWith("hf_")) {
+				throw new Error(`Provider ${provider} is closed-source and does not support HF tokens.`);
+			}
+			return "provider-key";
+		}
+		if (accessToken) {
+			return accessToken.startsWith("hf_") ? "hf-token" : "provider-key";
+		}
+		if (includeCredentials === "include") {
+			// If accessToken is passed, it should take precedence over includeCredentials
+			return "credentials-include";
+		}
+		return "none";
+	})();
 
 	// Make URL
 	const url = endpointUrl
@@ -178,3 +196,10 @@ async function loadTaskInfo(): Promise<Record<string, { models: { id: string }[]
 	}
 	return await res.json();
 }
+
+function removeProviderPrefix(model: string, provider: string): string {
+	if (!model.startsWith(`${provider}/`)) {
+		throw new Error(`Models from ${provider} must be prefixed by "${provider}/". Got "${model}".`);
+	}
+	return model.slice(provider.length + 1);
+}
@@ -25,6 +25,7 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
 	hyperbolic: {},
 	nebius: {},
 	novita: {},
+	openai: {},
 	replicate: {},
 	sambanova: {},
 	together: {},
 
@@ -0,0 +1,35 @@
+/**
+ * Special case: provider configuration for a private models provider (OpenAI in this case).
+ */
+import type { ProviderConfig, UrlParams, HeaderParams, BodyParams } from "../types";
+
+const OPENAI_API_BASE_URL = "https://api.openai.com";
+
+const makeBody = (params: BodyParams): Record<string, unknown> => {
+	if (!params.chatCompletion) {
+		throw new Error("OpenAI only supports chat completions.");
+	}
+	return {
+		...params.args,
+		model: params.model,
+	};
+};
+
+const makeHeaders = (params: HeaderParams): Record<string, string> => {
+	return { Authorization: `Bearer ${params.accessToken}` };
+};
+
+const makeUrl = (params: UrlParams): string => {
+	if (!params.chatCompletion) {
+		throw new Error("OpenAI only supports chat completions.");
+	}
+	return `${params.baseUrl}/v1/chat/completions`;
+};
+
+export const OPENAI_CONFIG: ProviderConfig = {
+	baseUrl: OPENAI_API_BASE_URL,
+	makeBody,
+	makeHeaders,
+	makeUrl,
+	clientSideRoutingOnly: true,
+};
@@ -1,16 +1,9 @@
+import type { FeatureExtractionInput } from "@huggingface/tasks";
 import { InferenceOutputError } from "../../lib/InferenceOutputError";
 import type { BaseArgs, Options } from "../../types";
 import { request } from "../custom/request";
 
-export type FeatureExtractionArgs = BaseArgs & {
-	/**
-	 *  The inputs is a string or a list of strings to get the features from.
-	 *
-	 *  inputs: "That is a happy person",
-	 *
-	 */
-	inputs: string | string[];
-};
+export type FeatureExtractionArgs = BaseArgs & FeatureExtractionInput;
 
 /**
  * Returned values are a multidimensional array of floats (dimension depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README).
 
@@ -38,6 +38,7 @@ export const INFERENCE_PROVIDERS = [
 	"hyperbolic",
 	"nebius",
 	"novita",
+	"openai",
 	"replicate",
 	"sambanova",
 	"together",
@@ -97,6 +98,7 @@ export interface ProviderConfig {
 	makeBody: (params: BodyParams) => Record<string, unknown>;
 	makeHeaders: (params: HeaderParams) => Record<string, string>;
 	makeUrl: (params: UrlParams) => string;
+	clientSideRoutingOnly?: boolean;
 }
 
 export interface HeaderParams {
 
@@ -755,9 +755,9 @@ describe.concurrent("HfInference", () => {
 			it("custom openai - OpenAI Specs", async () => {
 				const OPENAI_KEY = env.OPENAI_KEY;
 				const hf = new HfInference(OPENAI_KEY);
-				const ep = hf.endpoint("https://api.openai.com");
-				const stream = ep.chatCompletionStream({
-					model: "gpt-3.5-turbo",
+				const stream = hf.chatCompletionStream({
+					provider: "openai",
+					model: "openai/gpt-3.5-turbo",
 					messages: [{ role: "user", content: "Complete the equation one + one =" }],
 				}) as AsyncGenerator<ChatCompletionStreamOutput>;
 				let out = "";
@@ -768,6 +768,15 @@ describe.concurrent("HfInference", () => {
 				}
 				expect(out).toContain("two");
 			});
+			it("OpenAI client side routing - model should have provider as prefix", async () => {
+				await expect(
+					new HfInference("dummy_token").chatCompletion({
+						model: "gpt-3.5-turbo", // must be "openai/gpt-3.5-turbo"
+						provider: "openai",
+						messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
+					})
+				).rejects.toThrowError(`Models from openai must be prefixed by "openai/". Got "gpt-3.5-turbo".`);
+			});
 		},
 		TIMEOUT
 	);
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/inference",`
`3`		`- "version": "3.4.0",`
	`3`	`+ "version": "3.4.1",`
`4`	`4`	`"packageManager": "[email protected]",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"author": "Tim Mikeladze <[email protected]>",`