Merge remote-tracking branch 'origin/main' into extension-fix

coyotte508 · coyotte508 · commit 4ea6d321b28d · 2024-11-14T21:56:50.000+01:00
diff --git a/packages/tasks/package.json b/packages/tasks/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "@huggingface/tasks",
 	"packageManager": "pnpm@8.10.5",
-	"version": "0.13.0",
+	"version": "0.13.1",
 	"description": "List of ML tasks for huggingface.co/tasks",
 	"repository": "https://github.com/huggingface/huggingface.js.git",
 	"publishConfig": {
diff --git a/packages/tasks/src/snippets/curl.spec.ts b/packages/tasks/src/snippets/curl.spec.ts
@@ -1,6 +1,6 @@
 import type { ModelDataMinimal } from "./types.js";
 import { describe, expect, it } from "vitest";
-import { snippetTextGeneration } from "./curl.js";
+import { getCurlInferenceSnippet } from "./curl.js";
 
 describe("inference API snippets", () => {
 	it("conversational llm", async () => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token");
+		const snippet = getCurlInferenceSnippet(model, "api_token");
 
 		expect(snippet.content)
 			.toEqual(`curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions' \\
@@ -29,14 +29,40 @@ describe("inference API snippets", () => {
 }'`);
 	});
 
+	it("conversational llm non-streaming", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getCurlInferenceSnippet(model, "api_token", { streaming: false });
+
+		expect(snippet.content)
+			.toEqual(`curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1/chat/completions' \\
+-H "Authorization: Bearer api_token" \\
+-H 'Content-Type: application/json' \\
+--data '{
+    "model": "meta-llama/Llama-3.1-8B-Instruct",
+    "messages": [
+		{
+			"role": "user",
+			"content": "What is the capital of France?"
+		}
+	],
+    "max_tokens": 500,
+    "stream": false
+}'`);
+	});
+
 	it("conversational vlm", async () => {
 		const model: ModelDataMinimal = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
 			pipeline_tag: "image-text-to-text",
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token");
+		const snippet = getCurlInferenceSnippet(model, "api_token");
 
 		expect(snippet.content)
 			.toEqual(`curl 'https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1/chat/completions' \\
diff --git a/packages/tasks/src/snippets/curl.ts b/packages/tasks/src/snippets/curl.ts
@@ -105,9 +105,13 @@ export const curlSnippets: Partial<
 	"image-segmentation": snippetFile,
 };
 
-export function getCurlInferenceSnippet(model: ModelDataMinimal, accessToken: string): InferenceSnippet {
+export function getCurlInferenceSnippet(
+	model: ModelDataMinimal,
+	accessToken: string,
+	opts?: Record<string, unknown>
+): InferenceSnippet {
 	return model.pipeline_tag && model.pipeline_tag in curlSnippets
-		? curlSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" }
+		? curlSnippets[model.pipeline_tag]?.(model, accessToken, opts) ?? { content: "" }
 		: { content: "" };
 }
 
diff --git a/packages/tasks/src/snippets/js.spec.ts b/packages/tasks/src/snippets/js.spec.ts
@@ -1,6 +1,6 @@
 import type { InferenceSnippet, ModelDataMinimal } from "./types.js";
 import { describe, expect, it } from "vitest";
-import { snippetTextGeneration } from "./js.js";
+import { getJsInferenceSnippet } from "./js.js";
 
 describe("inference API snippets", () => {
 	it("conversational llm", async () => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token") as InferenceSnippet[];
+		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
 
@@ -38,14 +38,41 @@ for await (const chunk of stream) {
 }`);
 	});
 
+	it("conversational llm non-streaming", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getJsInferenceSnippet(model, "api_token", { streaming: false }) as InferenceSnippet[];
+
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+
+const client = new HfInference("api_token")
+
+const chatCompletion = await client.chatCompletion({
+	model: "meta-llama/Llama-3.1-8B-Instruct",
+	messages: [
+		{
+			role: "user",
+			content: "What is the capital of France?"
+		}
+	],
+	max_tokens: 500
+});
+
+console.log(chatCompletion.choices[0].message);`);
+	});
+
 	it("conversational vlm", async () => {
 		const model: ModelDataMinimal = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
 			pipeline_tag: "image-text-to-text",
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetTextGeneration(model, "api_token") as InferenceSnippet[];
+		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
 
@@ -75,6 +102,41 @@ const stream = client.chatCompletionStream({
 	max_tokens: 500
 });
 
+for await (const chunk of stream) {
+	if (chunk.choices && chunk.choices.length > 0) {
+		const newContent = chunk.choices[0].delta.content;
+		out += newContent;
+		console.log(newContent);
+	}  
+}`);
+	});
+
+	it("conversational llm", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getJsInferenceSnippet(model, "api_token") as InferenceSnippet[];
+
+		expect(snippet[0].content).toEqual(`import { HfInference } from "@huggingface/inference"
+
+const client = new HfInference("api_token")
+
+let out = "";
+
+const stream = client.chatCompletionStream({
+	model: "meta-llama/Llama-3.1-8B-Instruct",
+	messages: [
+		{
+			role: "user",
+			content: "What is the capital of France?"
+		}
+	],
+	max_tokens: 500
+});
+
 for await (const chunk of stream) {
 	if (chunk.choices && chunk.choices.length > 0) {
 		const newContent = chunk.choices[0].delta.content;
diff --git a/packages/tasks/src/snippets/js.ts b/packages/tasks/src/snippets/js.ts
@@ -109,7 +109,7 @@ for await (const chunk of stream) {
 			return [
 				{
 					client: "huggingface.js",
-					content: `import { HfInference } from '@huggingface/inference'
+					content: `import { HfInference } from "@huggingface/inference"
 
 const client = new HfInference("${accessToken || `{API_TOKEN}`}")
 
@@ -292,10 +292,11 @@ export const jsSnippets: Partial<
 
 export function getJsInferenceSnippet(
 	model: ModelDataMinimal,
-	accessToken: string
+	accessToken: string,
+	opts?: Record<string, unknown>
 ): InferenceSnippet | InferenceSnippet[] {
 	return model.pipeline_tag && model.pipeline_tag in jsSnippets
-		? jsSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" }
+		? jsSnippets[model.pipeline_tag]?.(model, accessToken, opts) ?? { content: "" }
 		: { content: "" };
 }
 
diff --git a/packages/tasks/src/snippets/python.spec.ts b/packages/tasks/src/snippets/python.spec.ts
@@ -1,6 +1,6 @@
-import type { ModelDataMinimal } from "./types.js";
+import type { InferenceSnippet, ModelDataMinimal } from "./types.js";
 import { describe, expect, it } from "vitest";
-import { snippetConversational } from "./python.js";
+import { getPythonInferenceSnippet } from "./python.js";
 
 describe("inference API snippets", () => {
 	it("conversational llm", async () => {
@@ -10,7 +10,7 @@ describe("inference API snippets", () => {
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetConversational(model, "api_token");
+		const snippet = getPythonInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`from huggingface_hub import InferenceClient
 
@@ -34,14 +34,43 @@ for chunk in stream:
     print(chunk.choices[0].delta.content, end="")`);
 	});
 
+	it("conversational llm non-streaming", async () => {
+		const model: ModelDataMinimal = {
+			id: "meta-llama/Llama-3.1-8B-Instruct",
+			pipeline_tag: "text-generation",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = getPythonInferenceSnippet(model, "api_token", { streaming: false }) as InferenceSnippet[];
+
+		expect(snippet[0].content).toEqual(`from huggingface_hub import InferenceClient
+
+client = InferenceClient(api_key="api_token")
+
+messages = [
+	{
+		"role": "user",
+		"content": "What is the capital of France?"
+	}
+]
+
+completion = client.chat.completions.create(
+    model="meta-llama/Llama-3.1-8B-Instruct", 
+	messages=messages, 
+	max_tokens=500
+)
+
+print(completion.choices[0].message)`);
+	});
+
 	it("conversational vlm", async () => {
 		const model: ModelDataMinimal = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
 			pipeline_tag: "image-text-to-text",
 			tags: ["conversational"],
 			inference: "",
 		};
-		const snippet = snippetConversational(model, "api_token");
+		const snippet = getPythonInferenceSnippet(model, "api_token") as InferenceSnippet[];
 
 		expect(snippet[0].content).toEqual(`from huggingface_hub import InferenceClient
 
@@ -75,4 +104,41 @@ stream = client.chat.completions.create(
 for chunk in stream:
     print(chunk.choices[0].delta.content, end="")`);
 	});
+
+	it("text-to-image", async () => {
+		const model: ModelDataMinimal = {
+			id: "black-forest-labs/FLUX.1-schnell",
+			pipeline_tag: "text-to-image",
+			tags: [],
+			inference: "",
+		};
+		const snippets = getPythonInferenceSnippet(model, "api_token") as InferenceSnippet[];
+
+		expect(snippets.length).toEqual(2);
+
+		expect(snippets[0].client).toEqual("huggingface_hub");
+		expect(snippets[0].content).toEqual(`from huggingface_hub import InferenceClient
+client = InferenceClient("black-forest-labs/FLUX.1-schnell", token="api_token")
+
+# output is a PIL.Image object
+image = client.text_to_image("Astronaut riding a horse")`);
+
+		expect(snippets[1].client).toEqual("requests");
+		expect(snippets[1].content).toEqual(`import requests
+
+API_URL = "https://api-inference.huggingface.co/models/black-forest-labs/FLUX.1-schnell"
+headers = {"Authorization": "Bearer api_token"}
+
+def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+image_bytes = query({
+	"inputs": "Astronaut riding a horse",
+})
+
+# You can access the image with PIL.Image for example
+import io
+from PIL import Image
+image = Image.open(io.BytesIO(image_bytes))`);
+	});
 });
diff --git a/packages/tasks/src/snippets/python.ts b/packages/tasks/src/snippets/python.ts
@@ -4,6 +4,11 @@ import { stringifyGenerationConfig, stringifyMessages } from "./common.js";
 import { getModelInputSnippet } from "./inputs.js";
 import type { InferenceSnippet, ModelDataMinimal } from "./types.js";
 
+const snippetImportInferenceClient = (model: ModelDataMinimal, accessToken: string): string =>
+	`from huggingface_hub import InferenceClient
+client = InferenceClient("${model.id}", token="${accessToken || "{API_TOKEN}"}")
+`;
+
 export const snippetConversational = (
 	model: ModelDataMinimal,
 	accessToken: string,
@@ -161,18 +166,28 @@ export const snippetFile = (model: ModelDataMinimal): InferenceSnippet => ({
 output = query(${getModelInputSnippet(model)})`,
 });
 
-export const snippetTextToImage = (model: ModelDataMinimal): InferenceSnippet => ({
-	content: `def query(payload):
+export const snippetTextToImage = (model: ModelDataMinimal, accessToken: string): InferenceSnippet[] => [
+	{
+		client: "huggingface_hub",
+		content: `${snippetImportInferenceClient(model, accessToken)}
+# output is a PIL.Image object
+image = client.text_to_image(${getModelInputSnippet(model)})`,
+	},
+	{
+		client: "requests",
+		content: `def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
 	return response.content
 image_bytes = query({
 	"inputs": ${getModelInputSnippet(model)},
 })
+
 # You can access the image with PIL.Image for example
 import io
 from PIL import Image
 image = Image.open(io.BytesIO(image_bytes))`,
-});
+	},
+];
 
 export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet => ({
 	content: `def query(payload):
@@ -288,12 +303,14 @@ export function getPythonInferenceSnippet(
 		return snippets.map((snippet) => {
 			return {
 				...snippet,
-				content: `import requests
+				content: snippet.content.includes("requests")
+					? `import requests
 
 API_URL = "https://api-inference.huggingface.co/models/${model.id}"
 headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
 
-${snippet.content}`,
+${snippet.content}`
+					: snippet.content,
 			};
 		});
 	}

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@huggingface/tasks",`
`3`	`3`	`"packageManager": "[email protected]",`
`4`		`- "version": "0.13.0",`
	`4`	`+ "version": "0.13.1",`
`5`	`5`	`"description": "List of ML tasks for huggingface.co/tasks",`
`6`	`6`	`"repository": "https://github.com/huggingface/huggingface.js.git",`
`7`	`7`	`"publishConfig": {`