Allow to provide accessToken

Wauplin · Wauplin · commit 782078b42d8d · 2025-06-04T10:48:22.000+02:00
diff --git a/packages/inference/src/snippets/getInferenceSnippets.ts b/packages/inference/src/snippets/getInferenceSnippets.ts
@@ -14,7 +14,10 @@ import { makeRequestOptionsFromResolvedModel } from "../lib/makeRequestOptions.j
 import type { InferenceProviderOrPolicy, InferenceTask, RequestArgs } from "../types.js";
 import { templates } from "./templates.exported.js";
 
-export type InferenceSnippetOptions = { streaming?: boolean; billTo?: string } & Record<string, unknown>;
+export type InferenceSnippetOptions = { streaming?: boolean; billTo?: string; accessToken?: string } & Record<
+	string,
+	unknown
+>;
 
 const PYTHON_CLIENTS = ["huggingface_hub", "fal_client", "requests", "openai"] as const;
 const JS_CLIENTS = ["fetch", "huggingface.js", "openai"] as const;
@@ -150,14 +153,15 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 			console.error(`Failed to get provider helper for ${provider} (${task})`, e);
 			return [];
 		}
+		const accessTokenOrPlaceholder = opts?.accessToken ?? ACCESS_TOKEN_PLACEHOLDER;
 
 		/// Prepare inputs + make request
 		const inputs = inputPreparationFn ? inputPreparationFn(model, opts) : { inputs: getModelInputSnippet(model) };
 		const request = makeRequestOptionsFromResolvedModel(
 			providerModelId,
 			providerHelper,
 			{
-				accessToken: ACCESS_TOKEN_PLACEHOLDER,
+				accessToken: accessTokenOrPlaceholder,
 				provider,
 				...inputs,
 			} as RequestArgs,
@@ -182,7 +186,7 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 
 		/// Prepare template injection data
 		const params: TemplateParams = {
-			accessToken: ACCESS_TOKEN_PLACEHOLDER,
+			accessToken: accessTokenOrPlaceholder,
 			authorizationHeader: (request.info.headers as Record<string, string>)?.Authorization,
 			baseUrl: removeSuffix(request.url, "/chat/completions"),
 			fullUrl: request.url,
@@ -251,7 +255,9 @@ const snippetGenerator = (templateName: string, inputPreparationFn?: InputPrepar
 						}
 
 						/// Replace access token placeholder
-						snippet = replaceAccessTokenPlaceholder(snippet, language, provider);
+						if (snippet.includes(ACCESS_TOKEN_PLACEHOLDER)) {
+							snippet = replaceAccessTokenPlaceholder(snippet, language, provider);
+						}
 
 						/// Snippet is ready!
 						return { language, client: client as string, content: snippet };
@@ -429,8 +435,8 @@ function replaceAccessTokenPlaceholder(
 	language: InferenceSnippetLanguage,
 	provider: InferenceProviderOrPolicy
 ): string {
-	// The snippets are generated with a placeholder in place of the access token.
-	// Once snippets are rendered, we replace the placeholder with correct code to fetch the access token from an environment variable.
+	// If "opts.accessToken" is not set, the snippets are generated with a placeholder.
+	// Once snippets are rendered, we replace the placeholder with code to fetch the access token from an environment variable.
 
 	// Determine if HF_TOKEN or specific provider token should be used
 	const accessTokenEnvVar =
@@ -447,9 +453,7 @@ function replaceAccessTokenPlaceholder(
 			`"Authorization: Bearer $${accessTokenEnvVar}"` // e.g. "Authorization: Bearer $HF_TOKEN"
 		);
 	} else if (language === "python") {
-		if (snippet.includes(ACCESS_TOKEN_PLACEHOLDER)) {
-			snippet = "import os\n" + snippet;
-		}
+		snippet = "import os\n" + snippet;
 		snippet = snippet.replace(
 			`"${ACCESS_TOKEN_PLACEHOLDER}"`,
 			`os.environ["${accessTokenEnvVar}"]` // e.g. os.environ["HF_TOKEN")
diff --git a/packages/tasks-gen/scripts/generate-snippets-fixtures.ts b/packages/tasks-gen/scripts/generate-snippets-fixtures.ts
@@ -240,6 +240,18 @@ const TEST_CASES: {
 		providers: ["hf-inference"],
 		opts: { billTo: "huggingface" },
 	},
+	{
+		testName: "with-access-token",
+		task: "conversational",
+		model: {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		},
+		providers: ["hf-inference"],
+		opts: { accessToken: "hf_xxx" },
+	},
 	{
 		testName: "text-to-speech",
 		task: "text-to-speech",
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/js/huggingface.js/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/with-access-token/js/huggingface.js/0.hf-inference.js
@@ -0,0 +1,16 @@
+import { InferenceClient } from "@huggingface/inference";
+
+const client = new InferenceClient("hf_xxx");
+
+const chatCompletion = await client.chatCompletion({
+    provider: "hf-inference",
+    model: "meta-llama/Llama-3.1-8B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+});
+
+console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/js/openai/0.hf-inference.js b/packages/tasks-gen/snippets-fixtures/with-access-token/js/openai/0.hf-inference.js
@@ -0,0 +1,18 @@
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+	baseURL: "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct/v1",
+	apiKey: "hf_xxx",
+});
+
+const chatCompletion = await client.chat.completions.create({
+	model: "meta-llama/Llama-3.1-8B-Instruct",
+    messages: [
+        {
+            role: "user",
+            content: "What is the capital of France?",
+        },
+    ],
+});
+
+console.log(chatCompletion.choices[0].message);
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/python/huggingface_hub/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/with-access-token/python/huggingface_hub/0.hf-inference.py
@@ -0,0 +1,18 @@
+from huggingface_hub import InferenceClient
+
+client = InferenceClient(
+    provider="hf-inference",
+    api_key="hf_xxx",
+)
+
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-3.1-8B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+)
+
+print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/python/openai/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/with-access-token/python/openai/0.hf-inference.py
@@ -0,0 +1,18 @@
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct/v1",
+    api_key="hf_xxx",
+)
+
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-3.1-8B-Instruct",
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+)
+
+print(completion.choices[0].message)
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/python/requests/0.hf-inference.py b/packages/tasks-gen/snippets-fixtures/with-access-token/python/requests/0.hf-inference.py
@@ -0,0 +1,22 @@
+import requests
+
+API_URL = "https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions"
+headers = {
+    "Authorization": "Bearer hf_xxx",
+}
+
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
+
+response = query({
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    "model": "meta-llama/Llama-3.1-8B-Instruct"
+})
+
+print(response["choices"][0]["message"])
diff --git a/packages/tasks-gen/snippets-fixtures/with-access-token/sh/curl/0.hf-inference.sh b/packages/tasks-gen/snippets-fixtures/with-access-token/sh/curl/0.hf-inference.sh
@@ -0,0 +1,13 @@
+curl https://router.huggingface.co/hf-inference/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions \
+    -H 'Authorization: Bearer hf_xxx' \
+    -H 'Content-Type: application/json' \
+    -d '{
+        "messages": [
+            {
+                "role": "user",
+                "content": "What is the capital of France?"
+            }
+        ],
+        "model": "meta-llama/Llama-3.1-8B-Instruct",
+        "stream": false
+    }'